首頁 > 軟體

Python實戰實現爬取天氣資料並完成視覺化分析詳解

2022-06-16 14:01:39

實現需求:

從網上(隨便一個網址,我爬的網址會在評論區告訴大家,dddd)獲取某一年的歷史天氣資訊,包括每天最高氣溫、最低氣溫、天氣狀況、風向等,完成以下功能:

(1)將獲取的資料資訊儲存到csv格式的檔案中,檔案命名為”城市名稱.csv”,其中每行資料格式為“日期,最高溫,最低溫,天氣,風向”;

(2)在資料中增加“平均溫度”一列,其中:平均溫度=(最高溫+最低溫)/2,在同一張圖中繪製兩個城市一年平均氣溫走勢折線圖;

(3)統計兩個城市各類天氣的天數,並繪製條形圖進行對比,假設適合旅遊的城市指數由多雲天氣佔比0.3,晴天佔比0.4,陰天數佔比0.3,試比較兩個城市中哪個城市更適合旅遊;

(4)統計這兩個城市每個月的平均氣溫,繪製折線圖,並通過折線圖分析該城市的哪個月最適合旅遊;

(5)統計出這兩個城市一年中,平均氣溫在18~25度,風力小於5級的天數,並假設該類天氣數越多,城市就越適宜居住,判斷哪個城市更適合居住;

爬蟲程式碼:

import random
import time
from spider.data_storage import DataStorage
from spider.html_downloader import HtmlDownloader
from spider.html_parser import HtmlParser
class SpiderMain:
    def __init__(self):
        self.html_downloader=HtmlDownloader()
        self.html_parser=HtmlParser()
        self.data_storage=DataStorage()
    def start(self):
        """
        爬蟲啟動方法
        將獲取的url使用下載器進行下載
        將html進行解析
        資料存取
        :return:
        """
        for i in range(1,13):  # 採用迴圈的方式進行依次爬取
            time.sleep(random.randint(0, 10))  # 隨機睡眠0到40s防止ip被封
            url="XXXX"
            if i<10:
               url =url+"20210"+str(i)+".html"  # 拼接url
            else:
                url=url+"2021"+str(i)+".html"
            html=self.html_downloader.download(url)
            resultWeather=self.html_parser.parser(html)
            if i==1:
             t = ["日期", "最高氣溫", "最低氣溫", "天氣", "風向"]
             resultWeather.insert(0,t)
            self.data_storage.storage(resultWeather)
if __name__=="__main__":
    main=SpiderMain()
    main.start()
import requests as requests
class HtmlDownloader:
    def download(self,url):
        """
        根據給定的url下載網頁
        :param url:
        :return: 下載好的文字
        """
        headers = {"User-Agent":
                       "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0"}
        result = requests.get(url,headers=headers)
        return result.content.decode('utf-8')

此處大家需要注意,將User-Agent換成自己瀏覽器存取該網址的,具體如何檢視呢,其實很簡單,只需大家進入網站後,右鍵網頁,然後點選檢查將出現這樣的介面:

然後只需再點選網路,再隨便點選一個請求,如下圖:

就可以進入如下圖,然後再複製,圖中User-Agent的內容就好了!

繼續:

from bs4 import BeautifulSoup
class HtmlParser:
    def parser(self,html):
        """
        解析給定的html
        :param html:
        :return: area set
        """
        weather = []
        bs = BeautifulSoup(html, "html.parser")
        body = bs.body  # 獲取html中的body部分
        div = body.find('div', {'class:', 'tian_three'})  # 獲取class為tian_three的<div></div>
        ul = div.find('ul')  # 獲取div中的<ul></ul>
        li = ul.find_all('li')  # 獲取ul中的所有<li></li>
        for l in li:
            tempWeather = []
            div1 = l.find_all("div")  # 獲取當前li中的所有div
            for i in div1:
                tempStr = i.string.replace("℃", "")  # 將℃進行替換
                tempStr = tempStr.replace(" ", "")  # 替換空格
                tempWeather.append(tempStr)
            weather.append(tempWeather)
        return weather
import pandas as pd
class DataStorage:
    def storage(self,weather):
        """
        資料儲存
        :param weather list
        :return:
        """
        data = pd.DataFrame(columns=weather[0], data=weather[1:])  # 格式化資料
        data.to_csv("C:\Users\86183\Desktop\成都.csv", index=False, sep=",",mode="a")  # 儲存到csv檔案當中

注意,檔案儲存路徑該成你們自己的哦!

ok,爬取程式碼就到這,接下來是圖形化效果大致如下:

程式碼如下:

import pandas as pd
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"] = ["SimHei"]  # 設定字型
plt.rcParams["axes.unicode_minus"] = False  # 該語句解決影象中的「-」負號的亂碼問題
def broken_line_chart(x, y1, y2):  # 折線圖繪製函數
    plt.figure(dpi=500, figsize=(10, 5))
    plt.title("瀘州-成都每日平均氣溫折線圖")
    plt.plot(x, y1, color='cyan', label='瀘州')
    plt.plot(x, y2, color='yellow', label='成都')
    # 獲取圖的座標資訊
    coordinates = plt.gca()
    # 設定x軸每個刻度的間隔天數
    xLocator = mpl.ticker.MultipleLocator(30)
    coordinates.xaxis.set_major_locator(xLocator)
    # 將日期旋轉30°
    plt.xticks(rotation=30)
    plt.xticks(fontsize=8)
    plt.ylabel("溫度(℃)")
    plt.xlabel("日期")
    plt.legend()
    plt.savefig("平均氣溫走勢折線圖.png")  # 平均氣溫折線圖
    plt.show()
    plt.close()
data_luZhou = pd.read_csv('C:\Users\86183\Desktop\瀘州.csv')
data_chengdu = pd.read_csv('C:\Users\86183\Desktop\成都.csv')
# 將列的名稱轉為列表型別方便新增
columS = data_luZhou.columns.tolist()
columY = data_chengdu.columns.tolist()
# 將資料轉換為列表
data_luZhou=np.array(data_luZhou).tolist()
data_chengdu=np.array(data_chengdu).tolist()
# 在最開始的位置上新增列的名字
data_luZhou.insert(0, columS)
data_chengdu.insert(0, columY)
# 新增平均氣溫列
data_luZhou[0].append("平均氣溫")
data_chengdu[0].append("平均氣溫")
weather_dict_luZhou = {}
weather_dict_chengdu = {}
for i in range(1, len(data_luZhou)):
    # 去除日期中的星期
    data_luZhou[i][0] = data_luZhou[i][0][0:10]
    data_chengdu[i][0] = data_chengdu[i][0][0:10]
    # 獲取平均氣溫
    average_luZhou = int((int(data_luZhou[i][1]) + int(data_luZhou[i][2])) / 2)
    average_chengdu = int((int(data_chengdu[i][1]) + int(data_chengdu[i][2])) / 2)
    # 將平均氣溫新增進入列表中
    data_luZhou[i].append(average_luZhou)
    data_chengdu[i].append(average_chengdu)
# 將新的資料存入新的csv中
new_data_luZhou = pd.DataFrame(columns=data_luZhou[0], data=data_luZhou[1:])
new_data_chengdu = pd.DataFrame(columns=data_chengdu[0], data=data_chengdu[1:])
new_data_luZhou.to_csv("D:/PythonProject/spider/瀘州.csv", index=False, sep=",")
new_data_chengdu.to_csv("D:/PythonProject/spider/成都.csv", index=False, sep=",")
# 折線圖的繪製
y1 = np.array(new_data_luZhou.get("平均氣溫")).tolist()
y2 = np.array(new_data_chengdu.get("平均氣溫")).tolist()
x = np.array(new_data_luZhou.get("日期")).tolist()
broken_line_chart(x, y1, y2)
# 進行每個月的平均氣溫求解
new_data_luZhou["日期"] = pd.to_datetime(new_data_luZhou["日期"])
new_data_chengdu["日期"] = pd.to_datetime(new_data_chengdu["日期"])
new_data_luZhou.set_index("日期", inplace=True)
new_data_chengdu.set_index("日期", inplace=True)
# 按月進行平均氣溫的求取
month_l = new_data_luZhou.resample('m').mean()
month_l = np.array(month_l).tolist()
month_c = new_data_chengdu.resample('m').mean()
month_c = np.array(month_c).tolist()
length = len(month_c)
month_average_l = []
month_average_c = []
for i in range(length):
    month_average_l.append(month_l[i][2])
    month_average_c.append(month_c[i][2])
month_list = [str(i) + "月" for i in range(1, 13)]
plt.figure(dpi=500, figsize=(10, 5))
plt.title("瀘州-成都每月平均折線氣溫圖")
plt.plot(month_list, month_average_l, color="cyan",label="瀘州", marker='o')
plt.plot(month_list, month_average_c, color="blue",label='成都', marker='v')
for a, b in zip(month_list, month_average_l):
    plt.text(a, b + 0.5, '%.2f' % b, horizontalalignment='center', verticalalignment='bottom', fontsize=6)
for a, b in zip(month_list, month_average_c):
    plt.text(a, b - 0.5, '%.2f' % b, horizontalalignment='center', verticalalignment='bottom', fontsize=6)
plt.legend()
plt.xlabel("月份")
plt.ylabel("溫度(℃)")
plt.savefig("月平均氣溫折線圖.png")  # 月平均氣溫折線圖
plt.show()
#
# 只獲取兩列的資料
data_l = pd.read_csv("瀘州.csv", usecols=['風向', '平均氣溫'])
data_c = pd.read_csv("成都.csv", usecols=['風向', '平均氣溫'])
data_l = np.array(data_l).tolist()
data_c = np.array(data_c).tolist()
day_c = 0
day_l = 0
for i in range(len(data_l)):
    if len(data_l[i][0]) == 5:
        if int(data_l[i][0][3]) < 5 and 18 <= int(data_l[i][1]) <= 25:
            day_l += 1
    else:
        if int(data_l[i][0][2]) < 5 and 18 <= int(data_l[i][1]) <= 25:
            day_l += 1
    if len(data_c[i][0]) == 5:
        if int(data_c[i][0][3]) < 5 and 10 <= int(data_c[i][1]) <= 25:
            day_c += 1
    else:
        if int(data_c[i][0][2]) < 5 and 18 <= int(data_c[i][1]) <= 25:
            day_c += 1
plt.figure(dpi=500, figsize=(8, 4))
plt.title("瀘州-成都平均氣溫在18-25且風力<5級的天數")
list_name = ['瀘州', '成都']
list_days = [day_l, day_c]
plt.bar(list_name, list_days, width=0.5)
plt.text(0, day_l, '%.0f' % day_l, horizontalalignment='center', verticalalignment='bottom', fontsize=7)
plt.text(1, day_c, '%.0f' % day_c, horizontalalignment='center', verticalalignment='bottom', fontsize=7)
plt.xlabel("城市")
plt.ylabel("天數(d)")
plt.savefig("適宜居住柱形圖.png")
plt.show()
data_l=pd.read_csv("瀘州.csv")
data_c=pd.read_csv("成都.csv")
# 將資料轉換為列表
data_l=np.array(data_l).tolist()
data_c=np.array(data_c).tolist()
# 獲取每種天氣的天數,採用字典型別進行儲存
for i in range(1,365):
    weather_l = data_l[i][3]
    weather_c = data_c[i][3]
    if weather_l in weather_dict_luZhou:
       weather_dict_luZhou[weather_l] = weather_dict_luZhou.get(weather_l) + 1
    else:
       weather_dict_luZhou[weather_l]=1
    if weather_c in weather_dict_chengdu:
        weather_dict_chengdu[weather_c]=weather_dict_chengdu.get(weather_c)+1
    else:
       weather_dict_chengdu[weather_c]=1
weather_list_luZhou = list(weather_dict_luZhou)
weather_list_chengdu = list(weather_dict_chengdu)
value_l = []
value_c = []
# 獲取所有的天氣種類
weather_list = sorted(set(weather_list_luZhou + weather_list_chengdu))
# 獲取每種天氣的天數,並將其對應的放入列表中,沒有的則用0進行替代,方便條形圖的繪製。
for i in weather_list:
    if i in weather_dict_luZhou:
        value_l.append(weather_dict_luZhou[i])
    else:
        value_l.append(0)
    if i in weather_dict_chengdu:
        value_c.append(weather_dict_chengdu[i])
    else:
        value_c.append(0)
# 繪製條形圖進行對比
plt.figure(dpi=500, figsize=(10, 5))
plt.title("瀘州-成都各種天氣情況對比")
x1 = list(range(len(weather_list)))
x = [i + 0.4 for i in x1]
plt.bar(x1, value_l, width=0.4, color='red', label='瀘州')
plt.bar(x, value_c, width=0.4, color='orange', label='成都')
for a, b in zip(x1, value_l):
    plt.text(a, b + 0.4, '%.0f' % b, ha='center', va='bottom', fontsize=7)
for a, b in zip(x, value_c):
    plt.text(a, b + 0.4, '%.0f' % b, ha='center', va='bottom', fontsize=7)
plt.xticks(x1, weather_list)
plt.ylabel("天數")
plt.xlabel("天氣")
plt.xticks(rotation=270)
plt.legend()
plt.savefig("瀘州成都天氣情況對比.png")
plt.show()
plt.close()

好的這次就到這兒吧,我們下次見哦!!!

到此這篇關於Python實戰實現爬取天氣資料並完成視覺化分析詳解的文章就介紹到這了,更多相關Python爬取天氣資料內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com!


IT145.com E-mail:sddin#qq.com