这只是源码，没什么意思，有意思的在这里：震惊！我用Python分析了天蚕土豆的玄幻三部曲竟然发现…

斗破苍穹：

import re
import threading
from pyquery import PyQuery as pq

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}


# 获取链接和标题
def get_href_and_title(url):
    lists = []
    html = pq(url, headers, encoding="utf-8")
    i = 0
    for item in html("#list > dl > dd").items():
        i += 1
        if i > 9:
            lists.append(("http://www.tycqxs.com" + item('a').attr('href'), item.text()))
            if item.text() == "第一千六百二十三章 结束，也是开始。":
                break
    return lists


# 获取小说网页内容
def get_one_page(url):
    contents1 = "".join(
            re.findall("(.*?)[\(|（]", pq(url, headers, encoding="utf-8")("#content").text().replace("\n", "")))
    contents2 = pq(url, headers, encoding="utf-8")("#content").text().replace("\n", "")
    return contents1 if contents1 != "" else contents2


def main():
    for item in get_href_and_title("http://www.tycqxs.com/57_57672/"):
        chapter_url, chapter_title = item
        if chapter_title[0] != "第":
            continue
        with open('C:\天蚕土豆\DouPoCangQiong\{}.txt'.format(chapter_title), "w", encoding="utf-8")as f:
            f.write(get_one_page(chapter_url))
        print(chapter_url, chapter_title)


if __name__ == '__main__':
    threading.Thread(target=main()).start()

武动乾坤：

import re
import threading
from pyquery import PyQuery as pq

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763'
}


# 获取链接和标题
def get_href_and_title(url):
    lists = []
    for item in pq(url, headers)("#list > dl > dd").items():
        if item.text()[0] != "第":
            continue
        lists.append(("http://www.xbiquge.la" + item('a').attr('href'), item.text()))
    return lists


# 获取小说网页内容
def get_one_page(url):
    return "".join(re.findall("(.*?)微信.*?", pq(url, headers, encoding="utf-8")("#content").text().replace("\n", "")))


def main():
    for item in get_href_and_title("http://www.xbiquge.la/15/15/"):
        chapter_url, chapter_title = item
        with open('C:\天蚕土豆\WuDongQianKun\{}.txt'.format(chapter_title), "w", encoding="utf-8")as f:
            f.write(get_one_page(chapter_url))
        print(chapter_url, chapter_title)


if __name__ == '__main__':
    threading.Thread(target=main()).start()

大主宰：

import re
import threading
from pyquery import PyQuery as pq

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}


# 获取链接和标题
def get_href_and_title(url):
    lists = []
    html = pq(url, headers, encoding="gbk")
    for item in html("#main > div > dl > dd").items():
        if item('a').attr('href') is None:
            continue
        lists.append((item('a').attr('href'), item.text()))
    return lists


# 获取小说网页内容
def get_one_page(url):
    contents1 = "".join(re.findall("(.*?)[(|（]", pq(url, headers, encoding="gbk")("#BookText").text().replace("\n", "")))
    contents2 = pq(url, headers, encoding="gbk")("#BookText").text().replace("\n", "")
    return contents1 if contents1 != "" else contents2


def main():
    for item in get_href_and_title("http://www.32xs.org/html/0/1/index.html"):
        chapter_url, chapter_title = item
        if chapter_title[0] != "第":
            continue
        with open('C:\天蚕土豆\DaZhuZai\{}.txt'.format(chapter_title.replace("?", "").replace("？", "")), "w",
                  encoding="utf-8")as f:
            f.write(get_one_page(chapter_url))
        print(chapter_url, chapter_title)


if __name__ == '__main__':
    threading.Thread(target=main()).start()

都是一个套路，代码可以直接拿来用。

pyquery爬取天蚕土豆经典玄幻三部曲斗破苍穹：武动乾坤：大主宰：

斗破苍穹：

武动乾坤：

大主宰：

继续阅读

TestLink导出用例转换工具(XML2Excel)

YAML简介和PyYAML安全操作YAML支持的类型YAML的优点：yaml的基本语法python操作

Small tricks

libsvm for python 安装

学习软件测试基础测试第七天

Zeppelin 配置访问 REST APIApache Zeppelin Configuration REST API

【Torch】最简洁logging使用指南

27. Remove Element(列表)题目代码

sort()函数到底是怎样进行数字排序的

neo4j之cypher使用文档

Cloud Studio初体验

使用 ctypes 进行 Python 和 C 的混合编程

【python】【数据处理】画多维数据分布图

【python】netconf协议对接管理设备

「Python 网络自动化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 网络设备

在python中创建excel并写入