天天看點

java 讀取網頁 擷取内容導入到本地生成word 網頁轉word

啟動

public static void main(String[] args) throws ClientProtocolException, URISyntaxException, IOException {
    String s = 

    writeWordFile("d:/", "ab.doc", getHttpData(s));
    System.out.println("ok");
  }      

網絡請求

// 請求網路自考網資料
  public static String getHttpData(String url) throws URISyntaxException, ClientProtocolException, IOException {
    List<NameValuePair> nameValuePairList = Lists.newArrayList();
    nameValuePairList.add(new BasicNameValuePair("q", "x"));
    URI uri = new URIBuilder(url).addParameters(nameValuePairList).build();
    List<Header> headerList = Lists.newArrayList();
    headerList.add(new BasicHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate"));
    headerList.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive"));
    HttpClient httpClient = HttpClients.custom().setDefaultHeaders(headerList).build();
    HttpUriRequest httpUriRequest = RequestBuilder.get().setUri(uri).build();
    HttpResponse httpResponse = httpClient.execute(httpUriRequest);
    HttpEntity entity = httpResponse.getEntity();
    String rawHTMLContent = EntityUtils.toString(entity);
    EntityUtils.consume(entity);
    return rawHTMLContent;
  }      

導入本地word

public static void writeWordFile(String path, String fileName, String content) {
    try {
      if (!"".equals(path)) {
        // 檢查目錄是否存在
        File fileDir = new File(path);
        if (fileDir.exists()) {
          byte b[] = content.getBytes();
          ByteArrayInputStream bais = new ByteArrayInputStream(b);
          POIFSFileSystem poifs = new POIFSFileSystem();
          DirectoryEntry directory = poifs.getRoot();
          DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
          FileOutputStream ostream = new FileOutputStream(path + fileName);
          poifs.writeFilesystem(ostream);
          bais.close();
          ostream.close();
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }      

引入

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.assertj.core.util.Lists;      

導入依賴

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.14</version>
</dependency>      

運作 本地生成word檔案 内容為html頁面

ok

持續更新