首頁 > 軟體

Python實現批次採集商品資料的範例詳解

2022-03-29 19:00:36

本次目的

python批次採集某商品資料

知識點

requests 傳送請求

re 解析網頁資料

json 型別資料提取

csv 表格資料儲存

開發環境

python 3.8

pycharm

requests

程式碼

匯入模組

import json
import random
import time
import csv
import requests
import re
import pymysql

核心程式碼

# 連線資料庫
def save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick):
    count = pymysql.connect(
        host='xxx.xxx.xxx.xxx',  # 資料庫地址
        port=3306,  # 資料庫埠
        user='xxxx',  # 資料庫賬號
        password='xxxx',  # 資料庫密碼
        db='xxxx'  # 資料庫表名
    )
    # 建立資料庫物件
    db = count.cursor()
    # 寫入sql
    sql = f"insert into goods(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) values ('{title}', '{pic_url}', '{detail_url}', {view_price}, '{item_loc}', '{view_sales}', '{nick}')"
    # 執行sql
    db.execute(sql)
    # 儲存修改內容
    count.commit()
    db.close()

headers = {
    'cookie': 'miid=4137864361077413341; tracknick=%5Cu5218%5Cu6587%5Cu9F9978083283; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; cna=MNI4GicXYTQCAa8APqlAWWiS; enc=%2FWC5TlhZCGfEq7Zm4Y7wyNToESfZVxhucOmHkanuKyUkH1YNHBFXacrDRNdCFeeY9y5ztSufV535NI0AkjeX4g%3D%3D; t=ad15767ffa6febb4d2a8709edebf63d3; lgc=%5Cu5218%5Cu6587%5Cu9F9978083283; sgcookie=E100EcWpAN49d4Uc3MkldEc205AxRTa81RfV4IC8X8yOM08mjVtdhtulkYwYybKSRnCaLHGsk1mJ6lMa1TO3vTFmr7MTW3mHm92jAsN%2BOA528auARfjf2rnOV%2Bx25dm%2BYC6l; uc3=nk2=ogczBg70hCZ6AbZiWjM%3D&vt3=F8dCvCogB1%2F5Sh1kqHY%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D&id2=UNGWOjVj4Vjzwg%3D%3D; uc4=nk4=0%40oAWoex2a2MA2%2F2I%2FjFnivZpTtTp%2F2YKSTg%3D%3D&id4=0%40UgbuMZOge7ar3lxd0xayM%2BsqyxOW; _cc_=W5iHLLyFfA%3D%3D; _m_h5_tk=ac589fc01c86be5353b640607e791528_1647451667088; _m_h5_tk_enc=7d452e4e140345814d5748c3e31fc355; xlly_s=1; x5sec=7b227365617263686170703b32223a223264393234316334363365353038663531353163633366363036346635356431434c61583635454745506163324f2f6b2b2b4b6166686f4d4d7a45774e7a4d794d6a59324e4473784d4b6546677037382f2f2f2f2f77453d227d; JSESSIONID=1F7E942AC30122D1C7DBA22C429521B9; tfstk=cKKGBRTY1F71aDbHPcs6LYjFVa0dZV2F6iSeY3hEAYkCuZxFizaUz1sbK1hS_r1..; l=eBEVp-O4gnqzSzLbBOfwnurza77OIIRAguPzaNbMiOCPO75p5zbNW60wl4L9CnGVhsTMR3lRBzU9BeYBqo44n5U62j-la1Hmn; isg=BDw8SnVxcvXZcEU4ugf-vTadDdruNeBfG0WXdBa9WicK4dxrPkd97hHTxQmZqRi3',
    'referer': 'https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
}
with open('淘寶.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(['title', 'pic_url', 'detail_url', 'view_price', 'item_loc', 'view_sales', 'nick'])

for page in range(1, 101):
    url = f'https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={44*page}'
    response = requests.get(url=url, headers=headers)
    json_str = re.findall('g_page_config = (.*);', response.text)[0]
    json_data = json.loads(json_str)
    auctions = json_data['mods']['itemlist']['data']['auctions']
    for auction in auctions:
        try:
            title = auction['raw_title']
            pic_url = auction['pic_url']
            detail_url = auction['detail_url']
            view_price = auction['view_price']
            item_loc = auction['item_loc']
            view_sales = auction['view_sales']
            nick = auction['nick']
            print(title, pic_url, detail_url, view_price, item_loc, view_sales, nick)
            save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick)
            with open('淘寶.csv', mode='a', encoding='utf-8', newline='') as f:
                csv_writer = csv.writer(f)
                csv_writer.writerow([title, pic_url, detail_url, view_price, item_loc, view_sales, nick])
        except:
            pass
    time.sleep(random.randint(3, 5))

效果展示

到此這篇關於Python實現批次採集商品資料的範例詳解的文章就介紹到這了,更多相關Python採集商品資料內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com!


IT145.com E-mail:sddin#qq.com