使用httpclient模拟新浪微博登录,贴在这里做个备忘,也希望能帮助到有这方面需求的童鞋们,代码如下:

package com.yida.spider4j.crawler.test.sina;
import java.io.unsupportedencodingexception;
import java.math.biginteger;
import java.net.urlencoder;
import java.security.invalidkeyexception;
import java.security.keyfactory;
import java.security.nosuchalgorithmexception;
import java.security.interfaces.rsapublickey;
import java.security.spec.invalidkeyspecexception;
import java.security.spec.rsapublickeyspec;
import java.util.hashmap;
import java.util.map;
import javax.crypto.badpaddingexception;
import javax.crypto.cipher;
import javax.crypto.illegalblocksizeexception;
import javax.crypto.nosuchpaddingexception;
import com.yida.spider4j.crawler.utils.common.fastjsonutils;
import com.yida.spider4j.crawler.utils.common.stringutils;
import com.yida.spider4j.crawler.utils.httpclient.httpclientutils;
import com.yida.spider4j.crawler.utils.httpclient.result;
import com.yida.spider4j.crawler.utils.io.fileutils;
/**
* @classname: logintest
* @description: 新浪微博登录测试
* @author lanxiaowei([email protected])
* @date 2015年10月29日 下午5:48:58
*
*/
public class logintest {
private static final char[] digits_lower = { '0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
private static final char[] digits_upper = { '0', '1', '2', '3', '4', '5',
public static void main(string[] args) throws exception {
//登录账号和密码
string account = "xxxxxxxxx";
string pwd = "xxxxxxx";
//新浪微博登录
result result = login(account,pwd);
string cookie = result.getcookie();
cookie = getsub(cookie);
system.out.println("cookie:" + cookie);
//访问【文章同学】的新浪微博首页
string html = visitwenzhang(cookie);
//把返回的html内容写入文件,方便打开进行验证是否正确返回
fileutils.writefile(html, "c:/wenzhang.html", "utf-8", false);
}
public static string getsub(string cookie) {
string sub = cookie.replaceall(".*;sub=(.*);subp=.*", "$1");
sub = "sub=" + sub + ";";
return sub;
/**
* @author: lanxiaowei([email protected])
* @title: visitwenzhang
* @description: 访问【文章同学】的新浪微博
* @param @return
* @param @throws exception
* @return string
* @throws
*/
public static string visitwenzhang(string cookie) throws exception {
string wenzhang = "http://weibo.com/wenzhang626";
map<string,string> headers = new hashmap<string,string>();
//sub cookie项是关键
//sub=_2a257navgdetxgedh7lsz8yvpwziihxvyqpgordv8punbunamlronkw8p9rh2bsuc2yusku1pzjykmllc7q..;
headers.put("cookie", cookie);
string html = httpclientutils.gethtml(wenzhang,headers);
//system.out.println(html);
return html;
* @title: login
* @description: 模拟新浪微博登录
* @param @param account
* @param @param pwd
public static result login(string account,string pwd) throws exception {
string url = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=" +
system.currenttimemillis();
string content = prelogin();
map<string,object> parammap = fastjsonutils.tomap(content);
string pubkey = parammap.get("pubkey").tostring();
string servertime = parammap.get("servertime").tostring();
string nonce = parammap.get("nonce").tostring();
string rsakv = parammap.get("rsakv").tostring();
headers.put("host", "login.sina.com.cn");
headers.put("origin", "http://weibo.com");
headers.put("content-type", "application/x-www-form-urlencoded");
map<string,string> params = new hashmap<string,string>();
params.put("entry", "weibo");
params.put("gateway", "1");
params.put("from", "");
params.put("savestate", "7");
params.put("useticket", "1");
params.put("pagerefer", "http://s.weibo.com/weibo/%25e6%2596%2587%25e7%25ab%25a0%25e5%2590%258c%25e5%25ad%25a6?topnav=1&wvr=6&b=1");
params.put("vsnf", "1");
params.put("su", encodeaccount(account));
params.put("service", "miniblog");
params.put("servertime", servertime);
params.put("nonce", nonce);
params.put("pwencode", "rsa2");
params.put("rsakv", rsakv);
params.put("sp", getsp(pwd, pubkey, servertime, nonce));
params.put("encoding", "utf-8");
params.put("cdult", "2");
params.put("domain", "weibo.com");
params.put("prelt", "154");
params.put("returntype", "text");
result result = httpclientutils.post(url, headers, params);
//system.out.println(json);
return result;
* @title: prelogin
* @description: 登录必需参数获取
public static string prelogin() throws exception {
string url = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinassocontroller.prelogincallback&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1446099453139";
string content = httpclientutils.gethtml(url);
if(null != content && !content.equals("")) {
content = content.replaceall("sinassocontroller.prelogincallback\\((.*)\\)", "$1");
}
//system.out.println(content);
return content;
* @title: getsp
* @description: 登录密码加密
* @param @param pubkey
* @param @param servertime
* @param @param nonce
public static string getsp(string pwd,string pubkey,string servertime,string nonce) {
string t = "10001";
string message = servertime + "\t" + nonce + "\n" + pwd;
string result = null;
try {
result = rsa(pubkey, t , message);
} catch (invalidkeyexception e) {
e.printstacktrace();
} catch (illegalblocksizeexception e) {
} catch (badpaddingexception e) {
} catch (nosuchalgorithmexception e) {
} catch (invalidkeyspecexception e) {
} catch (nosuchpaddingexception e) {
} catch (unsupportedencodingexception e) {
}
system.out.println("rsa加密后的密码:" + result);
* @title: encodeaccount
* @description: 登录账号编码
private static string encodeaccount(string account) {
string username = "";
username = stringutils.base64encode(urlencoder.encode(account,
"utf-8"));
return username;
* @title: rsa
* @description: rsa加密
* @param @param exponenthex
* @param @throws illegalblocksizeexception
* @param @throws badpaddingexception
* @param @throws nosuchalgorithmexception
* @param @throws invalidkeyspecexception
* @param @throws nosuchpaddingexception
* @param @throws invalidkeyexception
* @param @throws unsupportedencodingexception
public static string rsa(string pubkey, string exponenthex, string pwd)
throws illegalblocksizeexception, badpaddingexception,
nosuchalgorithmexception, invalidkeyspecexception,
nosuchpaddingexception, invalidkeyexception,
unsupportedencodingexception {
keyfactory factory = keyfactory.getinstance("rsa");
biginteger m = new biginteger(pubkey, 16);
biginteger e = new biginteger(exponenthex, 16);
rsapublickeyspec spec = new rsapublickeyspec(m, e);
//创建公钥
rsapublickey pub = (rsapublickey) factory.generatepublic(spec);
cipher enc = cipher.getinstance("rsa");
enc.init(cipher.encrypt_mode, pub);
byte[] encryptedcontentkey = enc.dofinal(pwd.getbytes("utf-8"));
return new string(encodehex(encryptedcontentkey));
protected static char[] encodehex(final byte[] data, final char[] todigits) {
final int l = data.length;
final char[] out = new char[l << 1];
for (int i = 0, j = 0; i < l; i++) {
out[j++] = todigits[(0xf0 & data[i]) >>> 4];
out[j++] = todigits[0x0f & data[i]];
return out;
public static char[] encodehex(final byte[] data, final boolean tolowercase) {
return encodehex(data, tolowercase ? digits_lower : digits_upper);
public static char[] encodehex(final byte[] data) {
return encodehex(data, true);
}
很不幸,代码才仅仅公布一天,新浪微博方面就已经做出相应防御措施:如果你访问过于频繁的话,登录接口即http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinassocontroller.prelogincallback&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1446099453139会直接返回

<html><head><script language='javascript'>parent.sinassocontroller.feedbackurlcallback({"result":false,"errno":"4049","reason":"\u4e3a\u4e86\u60a8\u7684\u5e10\u53f7\u5b89\u5168\uff0c\u8bf7\u8f93\u5165\u9a8c\u8bc1\u7801"});</script></head><body></body></html>
经过unicode解码后,就是
这tm比较蛋疼,我暂时不知道解决!
除此之外,提交参数新浪微博也做了小调整,如图:

map<string,string> params = new hashmap<string,string>();
params.put("entry", "weibo");
params.put("gateway", "1");
params.put("from", "");
params.put("savestate", "7");
params.put("useticket", "1");
params.put("pagerefer", "http://s.weibo.com/weibo/%25e6%2596%2587%25e7%25ab%25a0%25e5%2590%258c%25e5%25ad%25a6?topnav=1&wvr=6&b=1");
params.put("vsnf", "1");
params.put("su", encodeaccount(account));
params.put("service", "miniblog");
params.put("servertime", servertime);
params.put("nonce", nonce);
params.put("pwencode", "rsa2");
params.put("rsakv", rsakv);
params.put("sp", getsp(pwd, pubkey, servertime, nonce));
params.put("encoding", "utf-8");
params.put("sr", "1366*768");
params.put("prelt", "1011");
params.put("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinassocontroller.feedbackurlcallback");
params.put("domain", "weibo.com");
params.put("returntype", "meta");
起始我也就是在研究爬虫想集成爬虫自动登录验证功能,拿新浪微博来试试手测试一下罢了,新浪微博不用紧张。如果你第一次运行此代码可能会模拟登录成功,试了好多次之后,可能也会出现跟我一样的情况,要求输入验证码啦。或许当你看到此篇博客时,新浪微博的登录验证逻辑已经发现变化,上面的代码已经完全失效,但我想还是能够给你们些许提示。验证码破解这个比较有难度,我就不趟这浑水了,你们有兴趣就继续研究去吧.ok,that's all,thanks your attention!
转载:http://iamyida.iteye.com/blog/2253117