天天看點

音悅台800多萬MV視訊抓取

    閑的蛋疼,抓下音悅台的mv玩玩,昨晚研究到淩晨1點,現把代碼分享與此,以作備忘,如果有涉及侵權,請音悅台通知我,我馬上删除代碼,哈哈!!!

音悅台800多萬MV視訊抓取
音悅台800多萬MV視訊抓取

//因為我發現音悅台的mv id都是7位數字  

        int max = 9999999;  

        for(int i=0; i < max; i++) {  

            string videoid = i + "";  

            string html = httpclientutils.gethtml("http://www.yinyuetai.com/insite/get-video-info?flex=true&videoid=" + videoid);  

            //system.out.println(html);  

            if(html == null || "".equals(html) || html.indexof("找不到編号為") >= 0) {  

                system.out.println("沒有id={" + videoid + "}這個mv");  

                continue;  

            }  

          //音悅台vip專享mv  

            pattern pattern = pattern.compile(".+(http://sh.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.mp4\\?(?!http).*?&vst=0y).+");  

            matcher matcher = pattern.matcher(html);  

            if(matcher.find()) {  

                string url = matcher.group(1);  

                system.out.println("id={" + videoid + "}vip專享mv url:" + url);  

            } else {  

                //超清mv  

                pattern = pattern.compile(".+(http://he.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                matcher = pattern.matcher(html);  

                if(matcher.find()) {  

                    string url = matcher.group(1);  

                    system.out.println("id={" + videoid + "}超清mv url:" + url);  

                } else {  

                    //高清mv  

                    //.+(http://hd.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?[^?]+&vst=0y).+  

                    pattern = pattern.compile(".+(http://hd.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                    matcher = pattern.matcher(html);  

                    if(matcher.find()) {  

                        string url = matcher.group(1);  

                        system.out.println("id={" + videoid + "}高清mv url:" + url);  

                    } else {  

                        //流暢mv  

                        //.+(http://hc.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?[^?]+&vst=0y).+  

                        pattern = pattern.compile(".+(http://hc.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                        matcher = pattern.matcher(html);  

                        if(matcher.find()) {  

                            string url = matcher.group(1);  

                            system.out.println("id={" + videoid + "}流暢mv url:" + url);  

                        } else {  

                            continue;  

                        }  

                    }  

                }  

        }  

   得到了mv視訊的url後,你可以先把url全部寫到一個txt檔案裡,然後就寫個程式一行一行的讀取url去下載下傳flv檔案即可啦,這個大家我想都會吧,就跟下載下傳jpg圖檔類似的,我就不多啰嗦了!!!效果圖如下:

音悅台800多萬MV視訊抓取

轉載:http://iamyida.iteye.com/blog/2250181