天天看點

java簡單實作xml網站地圖

本來用某魚抓取整理網站url的,結果超過一萬條要付費充會員導出,有點郁悶,怎麼辦?因為java爬蟲還不怎麼會,就隻有拼接了,想想就是查庫然後轉換成xml,勤快點自己動手。

擷取網站url的字尾位址,一般都是id主鍵,先擷取id,然後進行字元串拼接,最後輸出成xml,這裡采用springboot+mybatis+xStream。

引入依賴:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.booy</groupId>
    <artifactId>url</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>war</packaging>

    <!--引入springboot父版本-->
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.2.RELEASE</version>
        <relativePath/>
    </parent>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <!--mybatis包-->
        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>1.3.1</version>
        </dependency>
        <!-- mysql驅動包 -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.29</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.thoughtworks.xstream</groupId>
            <artifactId>xstream</artifactId>
            <version>1.4.11.1</version>
        </dependency>
    </dependencies>

    <!--配置資源檔案掃描,否則Mapper-->
    <build>
        <!--将springboot的應用程式打包成fat jar的插件-->
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
        <resources>
            <resource>
                <directory>src/main/java</directory>
                <includes>
                    <include>**/*.xml</include>
                </includes>
                <filtering>true</filtering>
            </resource>
            <resource>
                <directory>src/main/resources</directory>
                <includes>
                    <include>**/*.*</include>
                </includes>
            </resource>
        </resources>
    </build>
</project>
           

要查庫,需要先配置下資料源

#資料源的基本資訊
spring.datasource.url=jdbc:mysql://localhost:3306/test?characterEncoding=utf8
spring.datasource.username=test
spring.datasource.password=123456
spring.datasource.driverClassName = com.mysql.jdbc.Driver
#mybatis中mapper檔案的路徑
mybatis.mapper-locations=classpath*:com/booy/url/dao/mapper/*.xml
#起别名,可省略寫mybatis的xml中的resultType的全路徑
mybatis.type-aliases-package=com.booy.url.pojo
#視圖解析器,規定通路資源路徑的字尾
spring.mvc.view.suffix=.html
           

接口

package com.booy.url.dao;

import java.util.List;

public interface UrlDao {
    List<Integer> getAllId();
}
           

mapper查詢

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
        PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.booy.url.dao.UrlDao">
    <select id="getAllId"  resultType="int">
        select web_id
        from test_website
    </select>
</mapper>
           

service接口

package com.booy.url.service;

import java.util.List;

public interface UrlService {
    List<StringBuilder> getAllId();
}
           

業務邏輯實作,如果一個表資料超過5萬可以做下判斷,寫入到第二個xml,需要多個表寫入到一個xml中,就别覆寫了,直接資料追加即可

package com.booy.url.service.Impl;

import com.booy.url.dao.UrlDao;
import com.booy.url.pojo.Url;
import com.booy.url.service.UrlService;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.Xpp3Driver;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

@Service
public class UrlServiceImpl implements UrlService {
    @Resource
    private UrlDao urlDao;
    @Override
    public List<StringBuilder> getAllId() {
        //前後字元串
        String urlPre = "http://www.zhuangyi.net/w/";
        String urlSuf = ".html";
        //存放url
        List<StringBuilder> urls = new ArrayList<>();
        //存放xml的url對象
        List<Url> urlsXml = new ArrayList<>();
        List<Integer> allId = urlDao.getAllId();
        for (Integer id : allId) {
            StringBuilder sb = new StringBuilder(40);
            sb.append(urlPre).append(id).append(urlSuf);
            urls.add(sb);
            //建構單個對象
            Url url = simpleObject(sb);
            //将對象添加進集合
            urlsXml.add(url);
        }
        //将對象集合輸出為xml文檔
        outXml(urlsXml);
        return urls;
    }
    //建構單個對象
    public Url simpleObject(StringBuilder sb){
        //設定目前時間
        Date nowDate =new Date(System.currentTimeMillis());
        DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
        String now = df.format(nowDate);
        //建構輸出對象
        Url url = new Url();
        String sb1 = new String(sb);
        url.setLoc(sb1);
        url.setPriority("0.6");
        url.setChangefreq("always");
        url.setLastmod(now);
        return url;
    }
    //将對象集合輸出為xml
    public void outXml(List<Url> urlsXml){
        FileOutputStream out=null;
        try {
            out = new FileOutputStream("D:/xml/sitemap.xml");//預設覆寫
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        //通過驅動建構一個xStream對象
        XStream xStream = new XStream(new Xpp3Driver());
        //修改别名Url.class為url
        xStream.alias("url",Url.class);
        xStream.alias("urlset",List.class);

        //生成xml檔案
//        xStream.toXML(urlsXml,out);不帶頭輸出
        String top ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
        String s = top + xStream.toXML(urlsXml);
        try {
            out.write(s.getBytes());
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(s);
    }
}
           

xml元素實體

package com.booy.url.pojo;

public class Url {
    private String loc;
    private String priority;
    private String lastmod;
    private String changefreq;
	//getter and setter
}
           

在頁面上看看url

@RestController
public class resultcontroller {
    @Resource private UrlService urlService;
    @RequestMapping
    public List<StringBuilder> test(){
        return urlService.getAllId();
    }
}
           

啟動類

@SpringBootApplication
@MapperScan(basePackages = "com.booy.url.dao")
public class UrlApplication {
    public static void main(String[] args) {
        SpringApplication.run(UrlApplication.class, args);
    }
}
           

控制台輸出,實際就别控制台和頁面上輸出了

java簡單實作xml網站地圖
java簡單實作xml網站地圖