天天看點

PHP爬取網站内容

最近公司需要存在阿裡雲對象存儲(oss)裡的視訊檔案,而且需要18套課程的視訊源檔案,這周就要,我想了一下這要是一個一個找那可就麻煩了。要想想一套課程有n個章節,每個章節有n個視訊檔案。是以我下定決心要寫一個自動下載下傳的程式。
廢話不多說,先看效果
PHP爬取網站内容
下面是代碼
<?php 

class Request{

 public static function post($url, $post_data = '', $timeout = 5){//curl

  $ch = curl_init();

  curl_setopt ($ch, CURLOPT_URL, $url);

  curl_setopt ($ch, CURLOPT_POST, 1);

  if($post_data != ''){

   curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);

  }

  curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);

  curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);

  curl_setopt($ch, CURLOPT_HEADER, false);

  $file_contents = curl_exec($ch);

  curl_close($ch);

  return $file_contents;

 }

 public static function post2($url, $data=array()){//file_get_content

  $postdata = http_build_query(

   $data

  );  

  $opts = array('http' =>

      array(

       'method' => 'POST',

       'header' => 'Content-type: application/x-www-form-urlencoded',

       'content' => $postdata

      )

  );  

  $context = stream_context_create($opts);

  $result = file_get_contents($url, false, $context);

  return $result;

 }

 public static function post3($host,$path,$query,$others=''){//fsocket

  $post="POST $path HTTP/1.1\r\nHost: $host\r\n";

  $post.="Content-type: application/x-www-form-";

  $post.="urlencoded\r\n${others}";

  $post.="User-Agent: Mozilla 4.0\r\nContent-length: ";

  $post.=strlen($query)."\r\nConnection: close\r\n\r\n$query";

  $h=fsockopen($host,80);

  fwrite($h,$post);

  for($a=0,$r='';!$a;){

    $b=fread($h,8192);

    $r.=$b;

    $a=(($b=='')?1:0);

   }

  fclose($h);

  return $r;

 }

}

// 設定頁面不逾時
ini_set('max_execution_time', '0');
// 設定PHP存大小
@ini_set('memory_limit', '4048M');
// 抓取視訊資料内容  21,30,132,9,77,128,129,133,130,134,7,16,135,29,31,92,146,147
// 失敗的77 
$data = Request::post2('https://www.xxxx.cn/index/details_data',array('id'=>77));
echo '<pre>';
$data = json_decode($data,true);
foreach ($data as $k => &$v) {
    
   if(is_array($v)){
    // 建立檔案夾
    $dir = iconv("UTF-8", "GBK", "Public/".$v['name']);

    mkdir ($dir,0777,true);
    $Catalogdata = $v['Catalogdata'];

    foreach ($Catalogdata as $kk => &$vv) {
        //周遊檔案夾
        $hd = opendir($dir);
        $i = 0;
        // 讀取
        while($f=readdir($hd)){
            // 建立檔案夾
            $dir = iconv("UTF-8", "GBK","Public/".$v['name'].'/'.$vv['id'].$vv['name']);
            mkdir ($dir,0777,true);
            // 放入檔案 
            $hd = opendir($dir);
            // 讀取
            while($f=readdir($hd)){
                // 1.讀取檔案内容
                if(!empty($vv['video_url'])){
                    $mov = file_get_contents('http:'.$vv['video_url']);
                    file_put_contents($dir.'/'.$vv['name'].'.mov',$mov);
                }

                $chapter = $vv['chapter'];
                // 判斷是否是數組
                if(is_array($chapter)){
                   foreach ($chapter as $key => $value) {
                        // 1.讀取檔案内容
                        $mov = file_get_contents('http:'.$value['vedio']);
                        file_put_contents($dir.'/'.$value['title'].'.mov',$mov);
                        echo ++$i;

                  }
            }
            }
        }
        // 關閉
        closedir($hd);
        }
    }
}
?>