天天看點

爬取攝圖網裡的 音樂和視訊 攝圖網模拟登陸

#首席那安裝selinium
from selenium import webdriver
import time
from lxml import etree
import json
#添加顯示等待
from selenium.webdriver.support.ui import WebDriverWait
#根據條件尋找對應節點
from selenium.webdriver.support import expected_conditions as EC
import requests
import re
import urllib.parse
import urllib

header = {
    "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    
}
#設定無頭浏覽器
# options=webdriver.ChromeOptions()
# options.set_headless()
#建立浏覽器驅動





# driver = webdriver.Chrome(
#     executable_path='/home/lbc/Documents/chromedriver',
#     )
#     # options=options 
# driver.get('http://699pic.com/soundtrack/?sem=1&sem_kid=206316&sem_type=3')
# #擷取cookie
# cookies = driver.get_cookies()

# cookie_dict = {}
# for cookie in cookies:
#     cookie_dict[cookie['name']] = cookie['value']
# # print(cookie_dict)

# #導入滑鼠移入
# from selenium.webdriver import ActionChains
# #用xpath解析并拖拽滑鼠進行點選
# # 點選登入
# element = driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/a[2]')
# #将滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #将滑鼠移動到指定的節點并且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()
# # 手機号登入
# element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[1]/div[2]/p[2]/a[1]')
# #将滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #将滑鼠移動到指定的節點并且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()

# driver.find_element_by_name('phone').send_keys('15326245558')
# #隐式等待
# driver.find_element_by_name('passwd').send_keys('q134679.')
# # 輸入賬号密碼點選登入
# element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[2]/div[1]/div/label[3]/a')
# #将滑鼠移動到指定的節點
# ActionChains(driver).move_to_element(element).perform()
# #将滑鼠移動到指定的節點并且點選該節點(單擊)
# ActionChains(driver).move_to_element(element).click(element).perform()







def qingqiu(url):
    response = requests.get(url,headers=header)
    # response.encoding = 'gbk'
    print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//div[@class="audio-list"]/ul[@class="soundEffect-block clearfix"]/li')
    for i in b:
        title = i.xpath('.//a[@class="soundEffect-name"]/text()')
        link = i.xpath('.//a[@class="soundEffect-name"]/@href')[0]
        tlink = urllib.parse.urljoin(response.url,link)
        # print(title,tlink)
        music(tlink)
def music(url):
    response = requests.get(url,headers=header)
    print(response.status_code)
    a= etree.HTML(response.text)
    title = a.xpath('//div[@class="photo-content fl"]/h1/text()')
    # print(title)
    # print(response.text)
    b = a.xpath('//div[@class="audio-body"]/div[@class="audio-bodyBg"]/div[@class="audio-box clearfix"]')
    for i in b:
        mp = i.xpath('./audio[@id="audio0"]/source/@src')
        # print(mp3)
        for lin in mp:
            song(lin,title)
def song(t,title):
    response = requests.get(t,headers=header)
    # r = re.compile(".*-(.{1,15})",re.S)
    # c = r.findall(response.url)[0]
    # print(c)
    with open('{}.mp3'.format(title),'wb+') as f:
        f.write(response.content)



    
    



if __name__ == '__main__':
    for i in range(3,4):
        url = 'http://699pic.com/media/soundtrack-so-%s-0-0-0-0-0-0-0.html'%str(i)
        qingqiu(url)