#coding:utf-8
import numpy as np
import pandas as pd
from lxml import etree
import csv
import requests
col = "ABCDEFGHIJKLM"
row = 1
nameArray1 = np.array([])
nameArray3 = np.array([])
nameArray4 = np.array([])
nameArray5 = np.array([])
nameArray6 = np.array([])
nameArray7 = np.array([])
nameArray8 = np.array([])
nameArray10 = np.array([])
nameArray11 = np.array([])
nameArray12 = np.array([])
nameArray13 = np.array([])
nameArray14 = np.array([])
nameArray15 = np.array([])
nameArray16 = np.array([])
nameArray17 = np.array([])
nameArrayEnd1 = np.array([])
nameArrayEnd3 = np.array([])
def getData(idUrl):
global nameArray1
global nameArray3
global nameArray4
global nameArray5
global nameArray6
global nameArray7
global nameArray8
global nameArray10
global nameArray11
global nameArray12
global nameArray13
global nameArray14
global nameArray15
global nameArray16
global nameArray17
global nameArrayEnd1
global nameArrayEnd3
ht = 1
while(1):
try:
ht = requests.get(url = idUrl, timeout=(5,15))
except:
print("wait "+idUrl)
continue
if ht.status_code == 200:
break
else:
print(ht)
print("wait "+idUrl)
html=etree.HTML(ht.text)
#html = etree.parse(txt, etree.HTMLParser(encoding='gbk'))
#res = html.xpath('//table[@class ="detailTable"]/tbody/tr//td')
print(ht.text)
res = html.xpath('//td')
for r in res:
print(r)
faren = ""
try:
faren = res[14].text
except:
print("error return");
return;
res = html.xpath('//td[@align = "center"]')
i = 0
end = 2000
'''
nameArray1 = np.append(nameArray1, res[1].text.replace(' ', ''))
nameArray3 = np.append(nameArray3, res[3].text.replace(' ', ''))
nameArray4 = np.append(nameArray4, res[4].text.replace(' ', ''))
nameArray5 = np.append(nameArray5, res[5].text.replace(' ', ''))
nameArray6 = np.append(nameArray6, res[6].text.replace(' ', ''))
nameArray7 = np.append(nameArray7, faren.replace(' ', ''))
'''
for a in res:
#print(i)
i += 1
if a.text is not None:
astr = a.text.replace(' ', '')
#astr
#print(astr)
if astr== ('安全許可資訊'):
end = i
if i > end + 3:
break
print("eeeeeeeeeeeeeeeeeeee" + str(end))
i = 7
for m in range(int(end/10)):
if m == 0:
print(res[1].text.replace(' ', ''))
nameArray1 = np.append(nameArray1, res[1].text.replace(' ', ''))
nameArray3 = np.append(nameArray3, res[3].text.replace(' ', ''))
nameArray4 = np.append(nameArray4, res[4].text.replace(' ', ''))
nameArray5 = np.append(nameArray5, res[5].text.replace(' ', ''))
nameArray6 = np.append(nameArray6, res[6].text.replace(' ', ''))
nameArray7 = np.append(nameArray7, faren.replace(' ', ''))
nameArrayEnd1 = np.append(nameArrayEnd1, res[end + 1].text.replace(' ', ''))
nameArrayEnd3 = np.append(nameArrayEnd3, res[end + 3].text.replace(' ', ''))
else:
nameArray1 = np.append(nameArray1, '')
nameArray3 = np.append(nameArray3, '')
nameArray4 = np.append(nameArray4, '')
nameArray5 = np.append(nameArray5, '')
nameArray6 = np.append(nameArray6, '')
nameArray7 = np.append(nameArray7, '')
nameArrayEnd1 = np.append(nameArrayEnd1, '')
nameArrayEnd3 = np.append(nameArrayEnd3, '')
i = i + 1
nameArray8 = np.append(nameArray8, res[i].text.replace(' ', ''))
i = i + 2
nameArray10 = np.append(nameArray10, res[i].text.replace(' ', ''))
i = i + 1
nameArray11 = np.append(nameArray11, res[i].text.replace(' ', ''))
i = i + 1
nameArray12 = np.append(nameArray12, res[i].text.replace(' ', ''))
i = i + 1
nameArray13 = np.append(nameArray13, res[i].text.replace(' ', ''))
i = i + 1
nameArray14 = np.append(nameArray14, res[i].text.replace(' ', ''))
i = i + 1
nameArray15 = np.append(nameArray15, res[i].text.replace(' ', ''))
i = i + 1
try:
nameArray16 = np.append(nameArray16, res[i].text.replace(' ', ''))
except:
nameArray16 = np.append(nameArray16, res[i].text)
print("res" + str(res[i].text))
i = i + 1
try:
nameArray17 = np.append(nameArray17, res[i].text.replace(' ', ''))
except:
nameArray17 = np.append(nameArray17, res[i].text)
print("res" + str(res[i].text))
global yema
writefile = "shuju" + str(yema) +".csv"
data = [nameArray1, nameArray3, nameArray4, nameArray5, nameArray6, nameArray7, \
nameArray8, nameArray10, nameArray11, nameArray12, nameArray13, nameArray14, \
nameArray15, nameArray16, nameArray17, nameArrayEnd1, nameArrayEnd3]
data = np.transpose(data)
ser2 = pd.DataFrame(data, columns=['企業名稱', '營業證', '位址', '注冊資本', '組織機構代碼号', '法人', \
'序号', '編号', '日期', '狀态', '資質序列', '類别', '等級', '發證機關', '核準日期', '證書', '有效期'])
#ser2 = pd.DataFrame(data)
print("write file")
ser2.to_csv(writefile, encoding="utf_8_sig")
import sys
yema = 0
def run():
global yema
start = int(sys.argv[1])
end = int(sys.argv[2])
#start = 168
#end = 2000
#if int(yema) > 3:
# print("> 3")
# return
#yema = 3
global nameArray1
global nameArray3
global nameArray4
global nameArray5
global nameArray6
global nameArray7
global nameArray8
global nameArray10
global nameArray11
global nameArray12
global nameArray13
global nameArray14
global nameArray15
global nameArray16
global nameArray17
global nameArrayEnd1
global nameArrayEnd3
for i in range(start, end+1):
yema = i
url = "http://124.115.170.171:7001/PDR/network/informationSearch/informationSearchList?name=&type=&certDeadline=&certReviewUnit=®Type=&pid1=&pid2=&pid3=&pageNumber=" + str(yema)+"&libraryName=enterpriseLibrary"
try:
res = requests.get(url = url, timeout=(5,15))
except:
print("waiting..." + url)
i = i - 1
continue
print(res.status_code)
#print(res.text)
html=etree.HTML(res.text)
aRes = html.xpath('//td/p/a/@onclick')
t = 0
for a in enumerate(aRes):
#print(type(a[1]))
ss = a[1].split(',')
name = ""
aid = ""
code = ""
for s0 in ss:
print(s0)
name = ss[0][6:-1]
aid = ss[1][1:-2]
code = ss[2][1:-1]
print(name)
print(aid)
print(code)
idUrl = 'http://124.115.170.171:7001/PDR/network/Enterprise/Informations/qyszit?enid=' + aid + "&name=" + name + "&org_code=" + code + "&type="
print(idUrl)
getData(idUrl)
nameArray1 = np.array([])
nameArray3 = np.array([])
nameArray4 = np.array([])
nameArray5 = np.array([])
nameArray6 = np.array([])
nameArray7 = np.array([])
nameArray8 = np.array([])
nameArray10 = np.array([])
nameArray11 = np.array([])
nameArray12 = np.array([])
nameArray13 = np.array([])
nameArray14 = np.array([])
nameArray15 = np.array([])
nameArray16 = np.array([])
nameArray17 = np.array([])
nameArrayEnd1 = np.array([])
nameArrayEnd3 = np.array([])
if __name__ == '__main__':
run()