使用Jsoup下載下傳圖檔、PDF和壓縮等檔案時,需要将響應轉換為輸出流。轉換為輸出流的目的是增強寫檔案的能力,即以位元組為機關寫入指定檔案。
以圖檔下載下傳為例,程式3-12使用bodyStream()方法将響應轉換為輸出流,并以緩沖流的方式寫入指定檔案。另外,針對圖檔和PDF等檔案,在執行URL請求擷取Response時,必須通過ignoreContentType(boolean ignoreContentType)方法設定忽略響應内容的類型,否則會報錯。
//程式3-12
public class JsoupConnectInputstream {
public static void main(String[] args) throws IOException {
String imageUrl = "https://www.leichuangkj.com/img/WechatIMG2.png";
Connection connect = Jsoup.connect(imageUrl);
Connection.Response response = connect.method(Connection.Method.GET).ignoreContentType(true).execute();
System.out.println("檔案類型為:" + response.contentType());
//如果響應成功,則執行下面的操作
if (response.statusCode() == 200) {
//響應轉化成輸出流
BufferedInputStream bufferedInputStream = response.bodyStream();
//儲存圖檔
saveImage(bufferedInputStream,"/Users/steven/Documents/代碼/project/spider/src/main/java/com/file/1.jpg");
}
}
/**
* 儲存圖檔操作
* @param inputStream
* @param savePath
* @throws IOException
*/
static void saveImage(BufferedInputStream inputStream, String savePath) throws IOException {
byte[] buffer = new byte[1024];
int len = 0;
//建立緩沖流
FileOutputStream fileOutStream = new FileOutputStream(new File(savePath));
BufferedOutputStream bufferedOut = new BufferedOutputStream(fileOutStream);
//圖檔寫入
while ((len = inputStream.read(buffer, 0, 1024)) != -1) {
bufferedOut.write(buffer, 0, len);
}
//緩沖流釋放與關閉
bufferedOut.flush();
bufferedOut.close();
}
}