天天看点

音悦台800多万MV视频抓取

    闲的蛋疼,抓下音悦台的mv玩玩,昨晚研究到凌晨1点,现把代码分享与此,以作备忘,如果有涉及侵权,请音悦台通知我,我马上删除代码,哈哈!!!

音悦台800多万MV视频抓取
音悦台800多万MV视频抓取

//因为我发现音悦台的mv id都是7位数字  

        int max = 9999999;  

        for(int i=0; i < max; i++) {  

            string videoid = i + "";  

            string html = httpclientutils.gethtml("http://www.yinyuetai.com/insite/get-video-info?flex=true&videoid=" + videoid);  

            //system.out.println(html);  

            if(html == null || "".equals(html) || html.indexof("找不到编号为") >= 0) {  

                system.out.println("没有id={" + videoid + "}这个mv");  

                continue;  

            }  

          //音悦台vip专享mv  

            pattern pattern = pattern.compile(".+(http://sh.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.mp4\\?(?!http).*?&vst=0y).+");  

            matcher matcher = pattern.matcher(html);  

            if(matcher.find()) {  

                string url = matcher.group(1);  

                system.out.println("id={" + videoid + "}vip专享mv url:" + url);  

            } else {  

                //超清mv  

                pattern = pattern.compile(".+(http://he.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                matcher = pattern.matcher(html);  

                if(matcher.find()) {  

                    string url = matcher.group(1);  

                    system.out.println("id={" + videoid + "}超清mv url:" + url);  

                } else {  

                    //高清mv  

                    //.+(http://hd.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?[^?]+&vst=0y).+  

                    pattern = pattern.compile(".+(http://hd.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                    matcher = pattern.matcher(html);  

                    if(matcher.find()) {  

                        string url = matcher.group(1);  

                        system.out.println("id={" + videoid + "}高清mv url:" + url);  

                    } else {  

                        //流畅mv  

                        //.+(http://hc.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?[^?]+&vst=0y).+  

                        pattern = pattern.compile(".+(http://hc.yinyuetai.com/uploads/videos/common/[a-za-z0-9]+\\.flv\\?(?!http).*?&vst=0y).+");  

                        matcher = pattern.matcher(html);  

                        if(matcher.find()) {  

                            string url = matcher.group(1);  

                            system.out.println("id={" + videoid + "}流畅mv url:" + url);  

                        } else {  

                            continue;  

                        }  

                    }  

                }  

        }  

   得到了mv视频的url后,你可以先把url全部写到一个txt文件里,然后就写个程序一行一行的读取url去下载flv文件即可啦,这个大家我想都会吧,就跟下载jpg图片类似的,我就不多啰嗦了!!!效果图如下:

音悦台800多万MV视频抓取

转载:http://iamyida.iteye.com/blog/2250181