天天看點

微商相冊朋友圈爬圖文

因為易語言爬取utf8有些不友善,一些emoji表情轉換成ansi對象會變成??,這樣用url編碼也會帶來些困難,是以這次用py爬取資料。廢話不多說 上代碼

import re
import requests
import json
import time
from urllib import parse
def run():
    tms=sz()
    for i in  range (len(tms)):
        url="https://www.wsxcme.com/service/album/get_album_themes_list.jsp?act=get_share_type&shareType=3&shop_id=A201804031358226960053045&search_value=&start_date=&end_date=&from_id=&tag_id=&tab_type=0&slip_type=1&time_stamp="+tms[i]+"000"+"&client_type=android&token=RUUyQThEQTU4NjA1NUI0NzUzRkE0MTQ4QjMxQzJEQjREN0E3RDY0MzEzNzRGNUY0ODgyQjJERjAzQ0QzMEJFQTZEQ0FBMUE3QjU3QzgzODIyRDBBNzY0N0VGRDA1QzAzNzI1Mzg1OTFFMzVEOTU1RDNGRkFFOUE4QzAwQjBERTc=&version=2843&_=1627363540999"
        res=requests.get(url,verify=False)
        res.encoding="utf-8"
        r=res.text
        print(r)
        l=""
        json_str = json.loads(r)
        for j in range (len(json_str['result']['goods_list'])):
            s=json_str['result']['goods_list'][j].setdefault('digital_watermark')
            a=json_str['result']['goods_list'][j]['title']
            b=str(json_str['result']['goods_list'][j]['imgsSrc'])
            c=parse.quote(a)
            d=parse.quote(b)
            n=json_str['result']['goods_list'][j]['digital_watermark']
            n=str(n)
            g=json_str['result']['goods_list'][j]['goods_id']
            l=l+c+"|"+d+"|"+n+"|"+g+"\n"
        with open("C:\\Users\\zzz\\Desktop\\建立文本文檔.txt","r") as f:
                data=f.read()
        with open("C:\\Users\\zzz\\Desktop\\建立文本文檔.txt","w") as f:
                f.write(data+l)
        time.sleep(1)
        print(i)
    return ;
def sz():
    a="1451577610,1452009610,1452441610,1452873610,1453305610,1453737610,1454083210,1454256010,1454688010,1455120010,1455552010,1455984010,1456416010,1118054145,1456761610,1457193610,1457625610,1458057610,1458489610,1458921610,1459267210,1459440010,1459872010,1460304010,1460736010,1461168010,1461600010,1461945610,1462032010,1462464010,1462896010,1463328010,1463760010,1464192010,1464537610,1464710410,1465142410,1465574410,1466006410,1466438410,1466870410,1467216010,1467302410,1467734410,1468166410,1468598410,1469030410,1469462410,1469808010,1469980810,1470412810,1470844810,1471276810,1471708810,1472140810,1472486410,1472659210,1473091210,1473523210,1473955210,1474387210,1474819210,1475164810,1475251210,1475683210,1476115210,1476547210,1476979210,1477411210,1477756810,1477929610,1478361610,1478793610,1479225610,1479657610,1480089610,1480435210,1480521610,1480953610,1481385610,1481817610,1482249610,1482681610,1483027210,1483200010,1483632010,1484064010,1484496010,1484928010,1485360010,1485705610,1485878410,1486310410,1486742410,1487174410,1487606410,1488038410,1118054145,1488297610,1488729610,1489161610,1489593610,1490025610,1490457610,1490803210,1490976010,1491408010,1491840010,1492272010,1492704010,1493136010,1493481610,1493568010,1494000010,1494432010,1494864010,1495296010,1495728010,1496073610,1496246410,1496678410,1497110410,1497542410,1497974410,1498406410,1498752010,1498838410,1499270410,1499702410,1500134410,1500566410,1500998410,1501344010,1501516810,1501948810,1502380810,1502812810,1503244810,1503676810,1504022410,1504195210,1504627210,1505059210,1505491210,1505923210,1506355210,1506700810,1506787210,1507219210,1507651210,1508083210,1508515210,1508947210,1509292810,1509465610,1509897610,1510329610,1510761610,1511193610,1511625610,1511971210,1512057610,1512489610,1512921610,1513353610,1513785610,1514217610,1514563210,1514736010,1515168010,1515600010,1516032010,1516464010,1516896010,1517241610,1517414410,1517846410,1518278410,1518710410,1519142410,1519574410,1118054145,1519833610,1520265610,1520697610,1521129610,1521561610,1521993610,1522339210,1522512010,1522944010,1523376010,1523808010,1524240010,1524672010,1525017610,1525104010,1525536010,1525968010,1526400010,1526832010,1527264010,1527609610,1527782410,1528214410,1528646410,1529078410,1529510410,1529942410,1530288010,1530374410,1530806410,1531238410,1531670410,1532102410,1532534410,1532880010,1533052810,1533484810,1533916810,1534348810,1534780810,1535212810,1535558410,1535731210,1536163210,1536595210,1537027210,1537459210,1537891210,1538236810,1538323210,1538755210,1539187210,1539619210,1540051210,1540483210,1540828810,1541001610,1541433610,1541865610,1542297610,1542729610,1543161610,1543507210,1543593610,1544025610,1544457610,1544889610,1545321610,1545753610,1546099210,1546272010,1546704010,1547136010,1547568010,1548000010,1548432010,1548777610,1548950410,1549382410,1549814410,1550246410,1550678410,1551110410,1118054145,1551369610,1551801610,1552233610,1552665610,1553097610,1553529610,1553875210,1554048010,1554480010,1554912010,1555344010,1555776010,1556208010,1556553610,1556640010,1557072010,1557504010,1557936010,1558368010,1558800010,1559145610,1559318410,1559750410,1560182410,1560614410,1561046410,1561478410,1561824010,1561910410,1562342410,1562774410,1563206410,1563638410,1564070410,1564416010,1564588810,1565020810,1565452810,1565884810,1566316810,1566748810,1567094410,1567267210,1567699210,1568131210,1568563210,1568995210,1569427210,1569772810,1569859210,1570291210,1570723210,1571155210,1571587210,1572019210,1572364810,1572537610,1572969610,1573401610,1573833610,1574265610,1574697610,1575043210,1575129610,1575561610,1575993610,1576425610,1576857610,1577289610,1577635210,1577808010,1578240010,1578672010,1579104010,1579536010,1579968010,1580313610,1580486410,1580918410,1581350410,1581782410,1582214410,1582646410,1118054145,1582992010,1583424010,1583856010,1584288010,1584720010,1585152010,1585497610,1585670410,1586102410,1586534410,1586966410,1587398410,1587830410,1588176010,1588262410,1588694410,1589126410,1589558410,1589990410,1590422410,1590768010,1590940810,1591372810,1591804810,1592236810,1592668810,1593100810,1593446410,1593532810,1593964810,1594396810,1594828810,1595260810,1595692810,1596038410,1596211210,1596643210,1597075210,1597507210,1597939210,1598371210,1598716810,1598889610,1599321610,1599753610,1600185610,1600617610,1601049610,1601395210,1601481610,1601913610,1602345610,1602777610,1603209610,1603641610,1603987210,1604160010,1604592010,1605024010,1605456010,1605888010,1606320010,1606665610,1606752010,1607184010,1607616010,1608048010,1608480010,1608912010,1609257610,1609430410,1609862410,1610294410,1610726410,1611158410,1611590410,1611936010,1612108810,1612540810,1612972810,1613404810,1613836810,1614268810,1118054145,1614528010,1614960010,1615392010,1615824010,1616256010,1616688010,1617033610,1617206410,1617638410,1618070410,1618502410,1618934410,1619366410,1619712010,1619798410,1620230410,1620662410,1621094410,1621526410,1621958410,1622304010,1622476810,1622908810,1623340810,1623772810,1624204810,1624636810,1624982410,1625068810,1625500810,1625932810,1626364810,1626796810,1627228810,1627574410,1627747210,1628179210,1628611210,1629043210,1629475210,1629907210,1630252810,1630425610,1630857610,1631289610,1631721610,1632153610,1632585610,1632931210,1633017610,1633449610,1633881610,1634313610,1634745610,1635177610,1635523210,1635696010,1636128010,1636560010,1636992010,1637424010,1637856010,1638201610,1638288010,1638720010,1639152010,1639584010,1640016010,1640448010,1640793610"
    #這麼大的資料是時間戳 求求号 3221483426
    b=a.split(',')
    return b[::-1];
           

如果對爬蟲有興趣的可以一起探讨。代碼僅供學習參考,請勿用于任何商業用途,以及違法事務等,否則後果自負。