天天看点

在PHP语言使用爬虫ip代码示例

对于经常做数据抓取的技术员应该知道,优化好的爬虫程序能事半功倍,对于工作量增加或者调整业务类型也是方便快捷,下面几种PHP语言下使用爬虫IP的代码给大家提供参考:

PHP curl

/**

 * 请求

 *

 * @param [type] $targetUrl 目标站点

 * @param [type] $proxyIp   爬虫IP

 * @param [type] $proxyPort  代理端口

 * @param [type] $proxyUser   authKey(key)

 * @param [type] $proxyPassword  authpwd(密码)

 * @return void

 */

function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){

  $ch = curl_init();

  curl_setopt($ch, CURLOPT_URL, $targetUrl);

  curl_setopt($ch, CURLOPT_HEADER, 0);

  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

  curl_setopt($ch, CURLOPT_PROXYPORT, $proxyPort);

  curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');

  curl_setopt($ch, CURLOPT_PROXY, $proxyIp);

  curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyUser . ':' . $proxyPassword);

  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);

  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);

  $data = curl_exec($ch);

  curl_close($ch);

  return $data;

}

$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');

var_dump($data);      

PHP stream

/**

 * 请求

 *

 * @param [type] $targetUrl 目标站点

 * @param [type] $proxyIp   爬虫IP

 * @param [type] $proxyPort  代理端口

 * @param [type] $proxyUser   authKey(key)

 * @param [type] $proxyPassword  authpwd(密码)

 * @return void

 */

function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){

  $proxyAuth = base64_encode($proxyUser . ":" . $proxyPassword);

  $headers = implode("\r\n", [

    "Proxy-Authorization: Basic {$proxyAuth}"

  ]);

  // 目标站为https时需要

  // $sniServer = parse_url($targetUrl, PHP_URL_HOST);

  $options = [

    "http" => [

      "proxy"  => $proxyIp . ':' . $proxyPort,

      "header" => $headers,

      "method" => "GET",

      "request_fulluri" => true,

    ],

    // 目标站为https时需要

    // 'ssl' => array(

    //         'SNI_enabled' => true,  
    //         'SNI_server_name' => $sniServer

    // )

  ];

  $context = stream_context_create($options);

  $result = file_get_contents($targetUrl, false, $context);

  return $result;

}

$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');

var_dump($data);      
/**

 * 请求

 *

 * @param [type] $targetUrl 目标站点

 * @param [type] $proxyIp   爬虫IP

 * @param [type] $proxyPort  代理端口

 * @param [type] $proxyUser   authKey(key)

 * @param [type] $proxyPassword  authpwd(密码)

 * @return void

 */

function sendRequest($targetUrl, $proxyIp, $proxyPort, $proxyUser, $proxyPassword){

  $client = new \GuzzleHttp\Client();

  $proxyAuth = base64_encode($proxyUser . ":" . $proxyPassword);

  $options = [

    "proxy"  => $proxyIp . ':' . $proxyPort,

    "headers" => [

      "Proxy-Authorization" => "Basic " . $proxyAuth

    ]

  ];

  $result = $client->request('GET', $targetUrl, $options);

  return $result->getBody()->getContents();

}

$data = sendRequest('http://jshk.com.cn/ip', '219.151.125.106', 31615, '895314XY', '24D6YB309ZCB');

var_dump($data);