天天看点

Vba如何读取多个本地HTML,VBA循环通过多个URL和运行HTML请求

我将有多个仪器编号和URL来运行此代码。仪器编号将从行8的列B开始并向下。此VBA目前仅运行仪器编号19930074944。我怎样才能让它遍历所有这些仪器号码并跳过空白单元格?VBA循环通过多个URL和运行HTML请求

searchResultsURL = baseURL & "GetRecDataDetail.aspx?rec=19930074944&suf=&bdt=1/1/1947&edt=11/18/2016&nm=&doc1=&doc2=&doc3=&doc4=&doc5="

所以,我需要让IT部门编辑:

searchResultsURL = baseURL & "GetRecDataDetail.aspx?rec= & InstNum & "&suf=&bdt=1/1/1947&edt=11/18/2016&nm=&doc1=&doc2=&doc3=&doc4=&doc5="

然后InstNum必须引用B8和向下。并在每个不同的网址上运行所有这些代码。我不知道该怎么做。非常感谢!

Option Explicit

Public Sub Download_PDF()

Dim baseURL As String, searchResultsURL As String, pdfURL As String, PDFdownloadURL As String

Dim httpReq As Object

Dim HTMLdoc As Object

Dim PDFlink As Object

Dim cookie As String

Dim downloadFolder As String, localFile As String

Const WinHttpRequestOption_EnableRedirects = 6

'Folder in which the downloaded file will be saved

downloadFolder = ThisWorkbook.Path

If Right(downloadFolder, 1) <> "\" Then downloadFolder = downloadFolder & "\"

baseURL = "http://recorder.maricopa.gov/recdocdata/"

searchResultsURL = baseURL & "GetRecDataDetail.aspx? rec=19930074944&suf=&bdt=1/1/1947&edt=11/18/2016&nm=&doc1=&doc2=&doc3=&doc4=&doc5="

Set httpReq = CreateObject("WinHttp.WinHttpRequest.5.1")

With httpReq

'Send GET to request search results page

.Open "GET", searchResultsURL, False

.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0"

.Send

cookie = .getResponseHeader("Set-Cookie")

'Put response in HTMLDocument for parsing

Set HTMLdoc = CreateObject("HTMLfile")

HTMLdoc.body.innerHTML = .responseText

'Get PDF URL from pages link

'< a id="ctl00_ContentPlaceHolder1_lnkPages" title="Click to view unofficial document"

' href="unofficialpdfdocs.aspx?rec=19930074944&pg=1&cls=RecorderDocuments&suf=" target="_blank" rel="external nofollow" target="_blank">11< /a>

Set PDFlink = HTMLdoc.getElementById("ctl00_ContentPlaceHolder1_lnkPages")

pdfURL = Replace(PDFlink.href, "about:", baseURL)

'Send GET request to the PDF URL with automatic http redirects disabled. This returns a http 302 status (Found) with the Location header containing the URL of the PDF file

.Open "GET", pdfURL, False

.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0"

.setRequestHeader "Referer", searchResultsURL

.setRequestHeader "Set-Cookie", cookie

.Option(WinHttpRequestOption_EnableRedirects) = False

.Send

PDFdownloadURL = .getResponseHeader("Location")

'Send GET to request the PDF file download

.Open "GET", PDFdownloadURL, False

.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/46.0"

.setRequestHeader "Referer", pdfURL

.Send

End With

End Sub