urlencode & quote & unquote (url 中帶中文參數)

python httplib urllib urllib2差別（一撇）

python post請求執行個體 & json -- str互相轉化（application/x-www-form-urlencoded \ multipart/form-data）

1, 前言：

python提供很多種非常友好的通路網頁内容的方法，python2.x : 如 python的httplib、urllib和urllib2 ; python3.x 又提供了request的方法。同時，每種方法下面又分為：get post put delete 等method..

一時間江湖上充斥着“五門八派”的各種，令初學者眼花缭亂，不知如何下手，如何學起。

但是，有一點需要提醒的是：無論哪一種方案或方法，存在既有其合理性，用着哪一種方法上手；得心應手才是王道！！！

2, 下面我們比較一下python2.x 中的三種方法，先上執行個體，之後分析

（1）執行個體

import json
import sys
import hashlib
import urllib
import httplib 


### none using now 
def generate_json_list():
    reload(sys)
    sys.setdefaultencoding('gbk')
    print "[",
    flag=False
    for line in sys.stdin:
        if flag:
            print ",",
        else:
            flag=True
        line=line.strip()
        items=line.split("\t")
        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}
        out["createdAt"]=items[0]
        out["scale"]=items[1]
        out["channel"]=items[2]
        out["word"]=items[3]
        print json.dumps(out,encoding="gbk").decode("unicode-escape"),
    print "]"

import urllib2
def import_out_hotwords(key, json_str, out):
    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"
    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"
    #print "2--", json_str
    value={"configKey":key,"configValue":json_str}
    data=urllib.urlencode(value)
    print >> sys.stderr, "### 3params", value, data
    req = urllib2.Request(HOST, data)
    req.add_header("content-type", "application/x-www-form-urlencoded")
    req.get_method = lambda : 'PUT'
    response = None 
    try:
        response = urllib2.urlopen(req, timeout=5)
        if response.code == 200:
            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]
            response.close()
    except urllib2.URLError as e:
        if hasattr(e, 'code'):
            print 'Error code:',e.code
        elif hasattr(e, 'reason'):
            print 'Reason:',e.reason

    finally:
        if response:
            response.close()

def import_out_hotwords_2(key, json_str, out):
    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"
    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"
    #print "2--", json_str
    value={"configKey":key,"configValue":json_str}
    data=urllib.urlencode(value)
    print >> sys.stderr, "## 2params", value, data
    req = urllib2.Request(HOST, data)
    req.add_header("content-type", "application/x-www-form-urlencoded")
    req.get_method = lambda : 'PUT'
    response = None 
    try:
        response = urllib2.urlopen(req, timeout=5)
        if response.code == 200:
            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]
            response.close()
    except urllib2.URLError as e:
        if hasattr(e, 'code'):
            print 'Error code:',e.code
        elif hasattr(e, 'reason'):
            print 'Reason:',e.reason

    finally:
        if response:
            response.close()

    
def import_out_hotwords_old(key, json_str, out):
    HOST = "10.129.232.109:5005"
    conn = httplib.HTTPConnection(HOST)
    #print "2--", json_str
    value={"configKey":key,"configValue":json_str}
    data=urllib.urlencode(value)
    #print data
    headers = {
            'content-type': 'application/x-www-form-urlencoded',
            'cache-control': 'no-cache'
            }
    conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)
    handler = conn.getresponse()
    if handler.status == 200:
        print "insertSingle Succ: ", out["word"], out["channel"], out["key"]
    #if handler.read().decode('utf8').encode('gbk')[0] == "OK":
    #    print "insertSingle Succ: ", json_str
    conn.close()

def generate_json():
    reload(sys)
    sys.setdefaultencoding('gbk')
    for line in sys.stdin:
        line=line.strip()
        items=line.split("\t")
        if len(items) < 4:
            continue
        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}
        out["createdAt"]=items[0]
        #out["scale"]=items[1]
        out["channel"]=items[2]
        out["word"]=items[3]
        key = hashlib.md5((items[3] + items[2])).hexdigest()
        key = "externalHotWords_" + key
        out["key"] =  key
        json_str = json.dumps(out,encoding="gbk")#.decode("unicode-escape")
        #import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)
        import_out_hotwords_2(key, json_str, out)



def generate_json_old():
    reload(sys)
    sys.setdefaultencoding('gbk')
    for line in sys.stdin:
        line=line.strip()
        items=line.split("\t")
        if len(items) < 4:
            continue
        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}
        out["createdAt"]=items[0]
        #out["scale"]=items[1]
        out["channel"]=items[2]
        out["word"]=items[3]
        key = hashlib.md5((items[3] + items[2])).hexdigest()
        out["key"] = "externalHotWords_" + key
        json_str = json.dumps(out,encoding="gbk").decode("unicode-escape")
        #json_str = out
        #print "1--", json_str
        ## return 'req=' +  urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))
        import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)
        #import_out_hotwords(key, json_str)



if __name__=="__main__":
    #generate_json_list()
    generate_json()

下面的執行個體存在一個小問題：二次編碼問題，首先對out進行json.dumps() 的json_str轉化（正确），之後對json_str進行urllib.quote() （第一次編碼）；最後在

value={"configKey":key,"configValue":json_str}  之後有urllib.urlencode() （第二次編碼）

格式一：configValue=%7B%27scale%27%3A+%27%27%2C+%27word%27%3A+%27%5Cxb2%5Cxe2%5Cxca%
    5Cxd4soso%27%2C+%27channel%27%3A+%27360_%5Cxca%5Cxb5%5Cxca%5Cxb1%5Cxc8%5Cxc8%5Cxb5%5Cxe3%27%2C+%27key%27%3A+%27externalHotWords_ed9f4ea3b7ff116c67366f7a576bcb08%27%2C+%27type%
    27%3A+%27%27%2C+%27createdAt%27%3A+%272017-06-07+11%3A22%3A32%27%7D&configKey=ed9f4ea3b7ff116c67366f7a576bcb08

格式二：configValue=%257B%2522scale%2522%253A%2520%2522%2522%252C%2520%2522word%2522%253A%2520%2522%25E6%25B5%258B%25E8%25AF%2595soso%2522%2
    52C%2520%2522channel%2522%253A%2520%2522360_%25E5%25AE%259E%25E6%2597%25B6%25E7%2583%25AD%25E7%2582%25B9%2522%252C%2520%2522key%2522%253A%2520%2522externalHotWords_ed9f4ea3b7f
    f116c67366f7a576bcb08%2522%252C%2520%2522type%2522%253A%2520%2522%2522%252C%2520%2522createdAt%2522%253A%2520%25222017-06-07%252011%253A22%253A32%2522%257D&configKey=ed9f4ea3b
    7ff116c67366f7a576bcb08

顯然格式二是對格式一再次進行了編碼（因為{ --> %7B;  % --> %25; ）

import json
import sys
import hashlib
import urllib
import httplib 


### none using now 
def generate_json_list():
    reload(sys)
    sys.setdefaultencoding('gbk')
    print "[",
    flag=False
    for line in sys.stdin:
        if flag:
            print ",",
        else:
            flag=True
        line=line.strip()
        items=line.split("\t")
        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}
        out["createdAt"]=items[0]
        out["scale"]=items[1]
        out["channel"]=items[2]
        out["word"]=items[3]
        print json.dumps(out,encoding="gbk").decode("unicode-escape"),
    print "]"

import urllib2
def import_out_hotwords(key, json_str, out):
    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"
    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"
    #print "2--", json_str
    value={"configKey":key,"configValue":json_str}
    data=urllib.urlencode(value)
    req = urllib2.Request(HOST, data)
    req.add_header("content-type", "application/x-www-form-urlencoded")
    req.get_method = lambda : 'PUT'
    response = None 
    try:
        response = urllib2.urlopen(req, timeout=5)
        if response.code == 200:
            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]
            response.close()
    except urllib2.URLError as e:
        if hasattr(e, 'code'):
            print 'Error code:',e.code
        elif hasattr(e, 'reason'):
            print 'Reason:',e.reason

    finally:
        if response:
            response.close()

    
def import_out_hotwords_old(key, json_str, out):
    HOST = "10.129.232.109:5005"
    conn = httplib.HTTPConnection(HOST)
    #print "2--", json_str
    value={"configKey":key,"configValue":json_str}
    data=urllib.urlencode(value)
    #print data
    headers = {
            'content-type': 'application/x-www-form-urlencoded',
            'cache-control': 'no-cache'
            }
    conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)
    handler = conn.getresponse()
    if handler.status == 200:
        print "insertSingle Succ: ", out["word"], out["channel"], out["key"]
    #if handler.read().decode('utf8').encode('gbk')[0] == "OK":
    #    print "insertSingle Succ: ", json_str
    conn.close()

def generate_json():
    reload(sys)
    sys.setdefaultencoding('gbk')
    for line in sys.stdin:
        line=line.strip()
        items=line.split("\t")
        if len(items) < 4:
            continue
        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}
        out["createdAt"]=items[0]
        #out["scale"]=items[1]
        out["channel"]=items[2]
        out["word"]=items[3]
        key = hashlib.md5((items[3] + items[2])).hexdigest()
        out["key"] = "externalHotWords_" + key
        json_str = json.dumps(out,encoding="gbk").decode("unicode-escape")
        #json_str = out
        #print "1--", json_str
        ## return 'req=' +  urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))
        import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)
        #import_out_hotwords(key, json_str)



if __name__=="__main__":
    #generate_json_list()
    generate_json()cat

CMD: cat tmp | python generate_json2.py

[@10.134.105.160 HotRankingLoggers]# vi tmp

2017-06-07 11:22:32 6964 360_實時熱點測試APP

2017-06-07 11:22:32 6498 360_實時熱點測試soso

（2）分析（參考 python的httplib、urllib和urllib2的差別及用）

urllib和urllib2

urllib 和urllib2都是接受URL請求的相關子產品，但是urllib2可以接受一個Request類的執行個體來設定URL請求的headers，urllib僅可以接受URL。

這意味着，你不可以僞裝你的User Agent字元串等。

urllib提供urlencode方法用來GET查詢字元串的産生，而urllib2沒有。這是為何urllib常和urllib2一起使用的原因。

目前的大部分http請求都是通過urllib2來通路的

httplib

httplib實作了HTTP和HTTPS的用戶端協定，一般不直接使用，在python更高層的封裝子產品中（urllib,urllib2）使用了它的http實作。

（3）詳解

urllib簡單用法

1. google = urllib.urlopen('http://www.google.com')

2. print 'http header:/n', google.info()

3. print 'http status:', google.getcode()

4. print 'url:', google.geturl()

5. for line in google: # 就像在操作本地檔案

6. print line,

7. google.close()

urllib2簡單用法

1. import urllib2

2. response=urllib2.urlopen('http://www.douban.com')

3. html=response.read()

實際步驟：

1、urllib2.Request()的功能是構造一個請求資訊，傳回的req就是一個構造好的請求

2、urllib2.urlopen()的功能是發送剛剛構造好的請求req，并傳回一個檔案類的對象response，包括了所有的傳回資訊。

3、通過response.read()可以讀取到response裡面的html，通過response.info()可以讀到一些額外的資訊。如下：

1. #!/usr/bin/env python

2. import urllib2

3. req = urllib2.Request("http://www.douban.com")

4. response = urllib2.urlopen(req)

5. html = response.read()

6. print html

有時你會碰到，程式也對，但是伺服器拒絕你的通路。這是為什麼呢?問題出在請求中的頭資訊(header)。有的服務端有潔癖，不喜歡程式來觸摸它。這個時候你需要将你的程式僞裝成浏覽器來送出請求。請求的方式就包含在header中。常見的情形：

1. import urllib

2. import urllib2

3. url = 'http://www.someserver.com/cgi-bin/register.cgi'

4. user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'# 将user_agent寫入頭資訊

5. values = {'name' : 'who','password':'123456'}

6. headers = { 'User-Agent' : user_agent }

7. data = urllib.urlencode(values)

8. req = urllib2.Request(url, data, headers)

9. response = urllib2.urlopen(req)

10. the_page = response.read()

values是post資料

GET方法

例如百度：

百度是通過http://www.baidu.com/s?wd=XXX 來進行查詢的，這樣我們需要将{‘wd’:’xxx’}這個字典進行urlencode

1. #coding:utf-8

2. import urllib

3. import urllib2

4. url = 'http://www.baidu.com/s'

5. values = {'wd':'D_in'}

6. data = urllib.urlencode(values)

7. print data

8. url2 = url+'?'+data

9. response = urllib2.urlopen(url2)

10. the_page = response.read()

11. print the_page

POST方法

1. import urllib

2. import urllib2

3. url = 'http://www.someserver.com/cgi-bin/register.cgi'

4. user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' //将user_agent寫入頭資訊

5. values = {'name' : 'who','password':'123456'} //post資料

6. headers = { 'User-Agent' : user_agent }

7. data = urllib.urlencode(values) //對post資料進行url編碼

8. req = urllib2.Request(url, data, headers)

9. response = urllib2.urlopen(req)

10. the_page = response.read()

urllib2帶cookie的使用

1. #coding:utf-8

2. import urllib2,urllib

3. import cookielib

5. url = r'http://www.renren.com/ajaxLogin'

7. #建立一個cj的cookie的容器

8. cj = cookielib.CookieJar()

9. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

10. #将要POST出去的資料進行編碼

11. data = urllib.urlencode({"email":email,"password":pass})

12. r = opener.open(url,data)

13. print cj

httplib簡單用法

1. #!/usr/bin/env python

2. # -*- coding: utf-8 -*-

3. import httplib

4. import urllib

6. def sendhttp():

7. data = urllib.urlencode({'@number': 12524, '@type': 'issue', '@action': 'show'})

8. headers = {"Content-type": "application/x-www-form-urlencoded",

9. "Accept": "text/plain"}

10. conn = httplib.HTTPConnection('bugs.python.org')

11. conn.request('POST', '/', data, headers)

12. httpres = conn.getresponse()

13. print httpres.status

14. print httpres.reason

15. print httpres.read()

16.

17. if __name__ == '__main__':

18. sendhttp()

3，get put post delete 方法，參考自 python urllib2對http的get，put，post，delete）

#GET：

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib2

def get():

URL ='www.baidu.com' #頁面的位址

response =urllib2.urlopen(URL) #調用urllib2向伺服器發送get請求

returnresponse.read() #擷取伺服器傳回的頁面資訊

#POST：

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib

import urllib2

def post():

URL ='http://umbra.nascom.nasa.gov/cgi-bin/eit-catalog.cgi' #頁面的位址

values ={'obs_year':'2011','obs_month':'March', #post的值

'obs_day':'8','start_year':'2011'

,'start_month':'March','start_day':'8'

,'start_hour':'All Hours','stop_year':'2011'

,'stop_month':'March','stop_day':'8'

,'stop_hour':'All Hours','xsize':'All'

,'ysize':'All','wave':'all'

,'filter':'all','object':'all'

,'xbin':'all','ybin':'all'

,'highc':'all'}

data =urllib.urlencode(values) #适用urllib對資料進行格式化編碼

printdata #輸出檢視編碼後的資料格式

req =urllib2.Request(URL, data) #生成頁面請求的完整資料

response =urllib2.urlopen(req) #發送頁面請求

returnresponse.read() #擷取伺服器傳回的頁面資訊

#PUT

import urllib2

request = urllib2.Request('http://example.org',data='your_put_data')

request.add_header('Content-Type', 'your/contenttype')

request.get_method = lambda: 'PUT'

response = urllib2.urlopen(request)

#DELETE

import urllib2

request = urllib2.Request(uri)

request.get_method = lambda: 'DELETE'

response = urllib2.urlopen(request)

python httplib urllib urllib2差別（一撇） 3，get put post delete 方法，參考自 python urllib2對http的get，put，post，delete）

GET方法

POST方法

urllib2帶cookie的使用

3，get put post delete 方法，參考自 python urllib2對http的get，put，post，delete）

繼續閱讀

來自python的【條件控制/語句循環/break/continue/else/pass】一、條件控制二、語句循環

無法解析的外部符号 wmain，該符号在函數 "void cdecl mainCRTStartupHelper(struct HINSTANCE *,unsigned short con......

TestLink導出用例轉換工具(XML2Excel)

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

Small tricks

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入