使用httpclient模拟新浪微網誌登入,貼在這裡做個備忘,也希望能幫助到有這方面需求的童鞋們,代碼如下:

package com.yida.spider4j.crawler.test.sina;
import java.io.unsupportedencodingexception;
import java.math.biginteger;
import java.net.urlencoder;
import java.security.invalidkeyexception;
import java.security.keyfactory;
import java.security.nosuchalgorithmexception;
import java.security.interfaces.rsapublickey;
import java.security.spec.invalidkeyspecexception;
import java.security.spec.rsapublickeyspec;
import java.util.hashmap;
import java.util.map;
import javax.crypto.badpaddingexception;
import javax.crypto.cipher;
import javax.crypto.illegalblocksizeexception;
import javax.crypto.nosuchpaddingexception;
import com.yida.spider4j.crawler.utils.common.fastjsonutils;
import com.yida.spider4j.crawler.utils.common.stringutils;
import com.yida.spider4j.crawler.utils.httpclient.httpclientutils;
import com.yida.spider4j.crawler.utils.httpclient.result;
import com.yida.spider4j.crawler.utils.io.fileutils;
/**
* @classname: logintest
* @description: 新浪微網誌登入測試
* @author lanxiaowei([email protected])
* @date 2015年10月29日 下午5:48:58
*
*/
public class logintest {
private static final char[] digits_lower = { '0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
private static final char[] digits_upper = { '0', '1', '2', '3', '4', '5',
public static void main(string[] args) throws exception {
//登入賬号和密碼
string account = "xxxxxxxxx";
string pwd = "xxxxxxx";
//新浪微網誌登入
result result = login(account,pwd);
string cookie = result.getcookie();
cookie = getsub(cookie);
system.out.println("cookie:" + cookie);
//通路【文章同學】的新浪微網誌首頁
string html = visitwenzhang(cookie);
//把傳回的html内容寫入檔案,友善打開進行驗證是否正确傳回
fileutils.writefile(html, "c:/wenzhang.html", "utf-8", false);
}
public static string getsub(string cookie) {
string sub = cookie.replaceall(".*;sub=(.*);subp=.*", "$1");
sub = "sub=" + sub + ";";
return sub;
/**
* @author: lanxiaowei([email protected])
* @title: visitwenzhang
* @description: 通路【文章同學】的新浪微網誌
* @param @return
* @param @throws exception
* @return string
* @throws
*/
public static string visitwenzhang(string cookie) throws exception {
string wenzhang = "http://weibo.com/wenzhang626";
map<string,string> headers = new hashmap<string,string>();
//sub cookie項是關鍵
//sub=_2a257navgdetxgedh7lsz8yvpwziihxvyqpgordv8punbunamlronkw8p9rh2bsuc2yusku1pzjykmllc7q..;
headers.put("cookie", cookie);
string html = httpclientutils.gethtml(wenzhang,headers);
//system.out.println(html);
return html;
* @title: login
* @description: 模拟新浪微網誌登入
* @param @param account
* @param @param pwd
public static result login(string account,string pwd) throws exception {
string url = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=" +
system.currenttimemillis();
string content = prelogin();
map<string,object> parammap = fastjsonutils.tomap(content);
string pubkey = parammap.get("pubkey").tostring();
string servertime = parammap.get("servertime").tostring();
string nonce = parammap.get("nonce").tostring();
string rsakv = parammap.get("rsakv").tostring();
headers.put("host", "login.sina.com.cn");
headers.put("origin", "http://weibo.com");
headers.put("content-type", "application/x-www-form-urlencoded");
map<string,string> params = new hashmap<string,string>();
params.put("entry", "weibo");
params.put("gateway", "1");
params.put("from", "");
params.put("savestate", "7");
params.put("useticket", "1");
params.put("pagerefer", "http://s.weibo.com/weibo/%25e6%2596%2587%25e7%25ab%25a0%25e5%2590%258c%25e5%25ad%25a6?topnav=1&wvr=6&b=1");
params.put("vsnf", "1");
params.put("su", encodeaccount(account));
params.put("service", "miniblog");
params.put("servertime", servertime);
params.put("nonce", nonce);
params.put("pwencode", "rsa2");
params.put("rsakv", rsakv);
params.put("sp", getsp(pwd, pubkey, servertime, nonce));
params.put("encoding", "utf-8");
params.put("cdult", "2");
params.put("domain", "weibo.com");
params.put("prelt", "154");
params.put("returntype", "text");
result result = httpclientutils.post(url, headers, params);
//system.out.println(json);
return result;
* @title: prelogin
* @description: 登入必需參數擷取
public static string prelogin() throws exception {
string url = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinassocontroller.prelogincallback&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1446099453139";
string content = httpclientutils.gethtml(url);
if(null != content && !content.equals("")) {
content = content.replaceall("sinassocontroller.prelogincallback\\((.*)\\)", "$1");
}
//system.out.println(content);
return content;
* @title: getsp
* @description: 登入密碼加密
* @param @param pubkey
* @param @param servertime
* @param @param nonce
public static string getsp(string pwd,string pubkey,string servertime,string nonce) {
string t = "10001";
string message = servertime + "\t" + nonce + "\n" + pwd;
string result = null;
try {
result = rsa(pubkey, t , message);
} catch (invalidkeyexception e) {
e.printstacktrace();
} catch (illegalblocksizeexception e) {
} catch (badpaddingexception e) {
} catch (nosuchalgorithmexception e) {
} catch (invalidkeyspecexception e) {
} catch (nosuchpaddingexception e) {
} catch (unsupportedencodingexception e) {
}
system.out.println("rsa加密後的密碼:" + result);
* @title: encodeaccount
* @description: 登入賬号編碼
private static string encodeaccount(string account) {
string username = "";
username = stringutils.base64encode(urlencoder.encode(account,
"utf-8"));
return username;
* @title: rsa
* @description: rsa加密
* @param @param exponenthex
* @param @throws illegalblocksizeexception
* @param @throws badpaddingexception
* @param @throws nosuchalgorithmexception
* @param @throws invalidkeyspecexception
* @param @throws nosuchpaddingexception
* @param @throws invalidkeyexception
* @param @throws unsupportedencodingexception
public static string rsa(string pubkey, string exponenthex, string pwd)
throws illegalblocksizeexception, badpaddingexception,
nosuchalgorithmexception, invalidkeyspecexception,
nosuchpaddingexception, invalidkeyexception,
unsupportedencodingexception {
keyfactory factory = keyfactory.getinstance("rsa");
biginteger m = new biginteger(pubkey, 16);
biginteger e = new biginteger(exponenthex, 16);
rsapublickeyspec spec = new rsapublickeyspec(m, e);
//建立公鑰
rsapublickey pub = (rsapublickey) factory.generatepublic(spec);
cipher enc = cipher.getinstance("rsa");
enc.init(cipher.encrypt_mode, pub);
byte[] encryptedcontentkey = enc.dofinal(pwd.getbytes("utf-8"));
return new string(encodehex(encryptedcontentkey));
protected static char[] encodehex(final byte[] data, final char[] todigits) {
final int l = data.length;
final char[] out = new char[l << 1];
for (int i = 0, j = 0; i < l; i++) {
out[j++] = todigits[(0xf0 & data[i]) >>> 4];
out[j++] = todigits[0x0f & data[i]];
return out;
public static char[] encodehex(final byte[] data, final boolean tolowercase) {
return encodehex(data, tolowercase ? digits_lower : digits_upper);
public static char[] encodehex(final byte[] data) {
return encodehex(data, true);
}
很不幸,代碼才僅僅公布一天,新浪微網誌方面就已經做出相應防禦措施:如果你通路過于頻繁的話,登入接口即http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinassocontroller.prelogincallback&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1446099453139會直接傳回

<html><head><script language='javascript'>parent.sinassocontroller.feedbackurlcallback({"result":false,"errno":"4049","reason":"\u4e3a\u4e86\u60a8\u7684\u5e10\u53f7\u5b89\u5168\uff0c\u8bf7\u8f93\u5165\u9a8c\u8bc1\u7801"});</script></head><body></body></html>
經過unicode解碼後,就是
這tm比較蛋疼,我暫時不知道解決!
除此之外,送出參數新浪微網誌也做了小調整,如圖:

map<string,string> params = new hashmap<string,string>();
params.put("entry", "weibo");
params.put("gateway", "1");
params.put("from", "");
params.put("savestate", "7");
params.put("useticket", "1");
params.put("pagerefer", "http://s.weibo.com/weibo/%25e6%2596%2587%25e7%25ab%25a0%25e5%2590%258c%25e5%25ad%25a6?topnav=1&wvr=6&b=1");
params.put("vsnf", "1");
params.put("su", encodeaccount(account));
params.put("service", "miniblog");
params.put("servertime", servertime);
params.put("nonce", nonce);
params.put("pwencode", "rsa2");
params.put("rsakv", rsakv);
params.put("sp", getsp(pwd, pubkey, servertime, nonce));
params.put("encoding", "utf-8");
params.put("sr", "1366*768");
params.put("prelt", "1011");
params.put("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinassocontroller.feedbackurlcallback");
params.put("domain", "weibo.com");
params.put("returntype", "meta");
起始我也就是在研究爬蟲想內建爬蟲自動登入驗證功能,拿新浪微網誌來試試手測試一下罷了,新浪微網誌不用緊張。如果你第一次運作此代碼可能會模拟登入成功,試了好多次之後,可能也會出現跟我一樣的情況,要求輸入驗證碼啦。或許當你看到此篇部落格時,新浪微網誌的登入驗證邏輯已經發現變化,上面的代碼已經完全失效,但我想還是能夠給你們些許提示。驗證碼破解這個比較有難度,我就不趟這渾水了,你們有興趣就繼續研究去吧.ok,that's all,thanks your attention!
轉載:http://iamyida.iteye.com/blog/2253117