1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
| import requests from bs4 import BeautifulSoup import csv titles = ('city','temp')
def pares_page(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36' } response = requests.get(url,headers=headers) text = response.content.decode('utf-8') soup = BeautifulSoup(text,'html5lib') conMidtab = soup.find('div',class_ = 'conMidtab') tables = conMidtab.find_all('table') lst = [] for table in tables: trs = table.find_all('tr')[2:] for index,tr in enumerate(trs): tds = tr.find_all('td') city_td = tds[0] if index == 0: city_td = tds[1]
info = {} city = list(city_td.stripped_strings)[0]
temp_td = tds[-2] temp = list(temp_td.stripped_strings)[0]
info['city'] = city info['temp'] = temp lst.append(info) print('city:',city,'temp:',temp) return lst
def writeData(lst):
with open('citytemp.csv','w',encoding='utf-8',newline='') as f: writer = csv.DictWriter(f,titles) writer.writeheader() writer.writerows(lst)
def main(): lst = [] url = 'http://www.weather.com.cn/textFC/hb.shtml'
urls = ['http://www.weather.com.cn/textFC/hb.shtml','http://www.weather.com.cn/textFC/db.shtml','http://www.weather.com.cn/textFC/gat.shtml'] for url in urls: lst += pares_page(url) writeData(lst) if __name__ == '__main__':
main()
|