代码实例
import requests
import re
# 用户代理设置
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
# 定义一个方法
def baidu(company):
url = 'https://www.baidu.com/s?tn=news&rtt=1&bsst=1&cl=2&wd=' + company
res = requests.get(url, headers=headers).text
# 正则表达式的使用
p_href = '<h3 class="news-title_1YtI1"><a href="(.*?)"'
href = re.findall(p_href, res, re.S)
p_title = '<h3 class="news-title_1YtI1">.*?>(.*?)</a>'
title = re.findall(p_title, res, re.S)
p_date = '<span class="c-color-gray2 c-font-normal">(.*?)</span>'
date = re.findall(p_date, res)
p_source = '<span class="c-color-gray c-font-normal c-gap-right">(.*?)</span>'
source = re.findall(p_source, res)
for i in range(len(title)):
title[i] = title[i].strip(