下载一个电子书籍,每页有图片,多个图片就是整个教材的页数。
流程:利用webbrowser来判断网页加载完成,对网页代码用正则提取对应的书页实际地址。逐个下载每个书页到一个文件夹。
问题:webbrowser在判断加载完成时出现困难。
1.完成状态无法使用。wb.ReadyState = WebBrowserReadyState.Complete根本不起作用。
2.网页是否繁忙(Not wb.IsBusy)起作用,但同类网页激活次数不一,有些2次,有些3次,还有些4次。无法判断哪一次最终加载完成。
3.合并状态也无法判断加载完成:If Not wb.IsBusy And wb.ReadyState = WebBrowserReadyState.Complete Then
初步解决:
为了了解到底是哪个因素对加载完成起标志作用,在原程序中加入一个列表框(用来看网页加载完成进入时的信息)。
利用Listbox1.Items.add(e.GetType.Tostring & "=" & e.Url.Tostring)来观察每次进入加载完成的信息情况。
终于发现e.Url.Tostring有变化情况。主要由一个googlead和api.uyan的变化,前者次数不确定,由1-3次组成;后者每次网页必定只加载一次。
于是,判断网页加载完成由api.uyan来决定(不同网页网址不同)。删除listbox完成本次小程序。
程序界面:

完整源代码:
Imports System.Text.RegularExpressions
Public Class Form1
Dim mythread As Threading.Thread
Dim strHead As String '首网页前头部分
Dim intPage As Int32 '页数计数
Dim intPageCur As Int32
Dim strDownWeb As String
Dim flag As String = "complete" '完成情况,默认完成
Dim intCount As Int32
Dim intMax As Int32
'委托
Private Delegate Sub voidShowMessage(ByVal strMessage As String)
Private Sub btnStart_Click(sender As Object, e As EventArgs) Handles btnStart.Click
'对连续使用时,变量清空
strHead = ""
flag = "complete"
intCount = 0
'提取首页头部
Dim a() As String, i As Int32
If txtFirstWeb.Text = "" Then
MsgBox("网址错误")
Exit Sub
End If
a = Split(txtFirstWeb.Text, "/")
If a.GetUpperBound(0) < 3 Then
MsgBox("网址错误")
Exit Sub
End If
strHead = ""
For i = 0 To a.GetUpperBound(0) - 1
strHead = strHead & a(i) & "/"
Next
intPage = 1
wb.Navigate(strHead & intPage.ToString("000") & ".htm")
'wb.Navigate("http://www.dzkbw.com/books/rjb/yuwen/pc7x/271.htm")
End Sub
Private Sub wb_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs) Handles wb.DocumentCompleted
If InStr(e.Url.ToString, "api.uyan") > 0 Then
Dim strAllCode As String = wb.DocumentText
Dim reg As Regex
intCount = intCount + 1
If intCount = 1 Then
reg = New Regex("(?<=maxPage=)\d{1,3}(?=;)")
intMax = reg.Matches(strAllCode)(0).Value
End If
reg = New Regex("(?<=img[ ]{1,3}src="").*?.jpg(?=""[ ]{1,3}id=""ebookimg)")
If reg.Matches(wb.DocumentText).Count > 0 Then
strDownWeb = reg.Matches(strAllCode)(0).Value
Do While flag = "down" '等待下载完成
Application.DoEvents()
Loop
flag = "down"
mythread = New Threading.Thread(AddressOf DownFile)
intPageCur = intPage
mythread.Start(strDownWeb)
reg = Nothing
Do While flag = "down" '等待下载完成
Application.DoEvents()
Loop
If flag = "complete" Then
If intPage >= intMax Then
lblState.Text = "状态:已完成!"
Exit Sub
Else
intPage = intPage + 1
wb.Navigate(strHead & intPage.ToString("000") & ".htm")
End If
End If
End If
End If
End Sub
Private Sub DownFile(ByVal strweb As String)
Try
My.Computer.Network.DownloadFile(strweb, "D:\School\" & intPageCur.ToString("000") & ".jpg")
Me.Invoke(New voidShowMessage(AddressOf ShowMessage), "状态:下载第" & intPageCur.ToString("000") & "页")
'Me.Invoke(New voidShowMessage(AddressOf ShowMessage), strweb)
Catch ex As Exception
Me.Invoke(New voidShowMessage(AddressOf ShowMessage), "状态:第" & intPageCur.ToString("000") & "页下载失败." & ex.Message)
End Try
flag = "complete"
mythread.Abort()
End Sub
Private Sub ShowMessage(ByVal m As String)
lblState.Text = m
End Sub
End Class