天天看點

一個單詞詞根統計系統

開始寫一些小東西來友善自己的生活

前幾日和leader讨論了一下記單詞的方法(别問我程式員為啥要記單詞),詞根法比較好。以前也見過一些既用詞根又用别的方法記單詞的,但是比較難。leader說他發現隻用詞根的方法也是可以的,隻要量足夠大,就可以發現其中的規律。遂前往下載下傳單詞詞庫和詞根表。使用簡單的字元串比對算法,統計了每種詞根在單詞中出現的次數。然後調用百度翻譯接口(本來打算調用谷歌翻譯,但由于 谷歌的限制措施,是以宣告失敗),對詞庫中的單詞進行翻譯。以上已經寫完,不過功能尚未成型,後續功能我還要仔細想想。

一個單詞詞根統計系統

上面是項目結構

子產品一 從比較淩亂的詞庫中,将單詞取出來,做成特定格式放到新的檔案中

package com.chaojilaji.Util;

import java.io.*;
import java.util.Arrays;

/**
 * ClassName WrodsFromFiles
 * Description TODO
 * Auther chaoj
 * Date 2018/12/8 22:19
 * Version 1.0
 **/
public class WrodsFromFiles {
    public static String changeFile(String filenmame) throws IOException {
        String newfilename = filenmame+"new";
        filenmame = filenmame + ".txt";
        newfilename = newfilename + ".txt";
        File file = new File(filenmame);
        if (file.exists()){
           InputStream inputStream = new FileInputStream(file);
           File file1 = new File(newfilename);
           if (!file1.exists()){
               file1.createNewFile();
           }
           BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
           String line = null;
           String ans = "";
           String anstmp = "";
           int cnt = 0;
           int i = 0;
           while((line = bufferedReader.readLine())!= null){
//               System.out.println(i);
               if (line != "\n"){
                   anstmp = anstmp + line;
                    i++;
               }
               if (i >= 10000){
                   cnt++;
                   System.out.println(cnt);
                   ans = ans + anstmp;
                   anstmp = "";
                   i=0;
               }
           }
           String x = ans.replaceAll("\"","");
           String y = x.replaceAll("\n","");
           String z = y.replaceAll("\t","");
           String f= z.replaceAll("  "," ");
           f.trim();
           String[] xxx = f.split(" ");
           Arrays.sort(xxx);
           String kk = "";
           String kktmp = "";
           i = 0;
           cnt = 0;
           int cntline = 0;
           int flag = 0;
           for (String xxxx: xxx){
               if (xxxx != " " && xxxx != "\n" && xxxx != "\b" && xxxx != "\t" && xxxx != "" && xxxx.length() != 0){
                   if (cntline == 0 && i < 100){
                       System.out.println("xxxx: "+xxxx.length());
                   }
                   if (flag == 0){
                       kktmp = kktmp + xxxx;
                       flag = 1;
                       i++;
                       cnt ++;
                   }else {
                       kktmp = kktmp + " ";
                       kktmp = kktmp + xxxx;
                       i++;
                       cnt ++;
                   }
               }
               if (i == 10000){
                   cntline ++;
                   System.out.println(cntline);
                   kk = kk + kktmp;
                   kktmp = "";
                   i = 0;
               }
           }
           System.out.println(cnt);
           FileOutputStream fileOutputStream = null;
           BufferedOutputStream bufferedOutputStream = null;
           fileOutputStream = new FileOutputStream(file1);
           bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
           bufferedOutputStream.write(kk.getBytes());
            bufferedOutputStream.flush();
            bufferedOutputStream.close();
            fileOutputStream.close();
            return newfilename;
        }else {
            System.out.println("檔案不存在");
            return null;
        }
    }

    public  static String changeRootFiles(String filename) throws IOException {
        String newfilename = filename + "_new";
        filename = filename + ".txt";
        newfilename = newfilename + ".txt";
        File file = new File(filename);
        if (file.exists()){
            File file1 = new File(newfilename);
            if (!file1.exists()){
                file1.createNewFile();
            }
            InputStream inputStream = new FileInputStream(file);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            String line = null;
            String tmp = "";
            int cntline = 0;
            String ans = "";
            while ((line = bufferedReader.readLine())!= null){
                tmp = "";
                String yisi = "";
                String x = "|";
                String y = "=";
                int indexx = line.indexOf(x);
                int indexy = line.indexOf(y);
                if (indexx != -1){
                    if (indexx+1<=line.length()-1) {
                        if (indexy != -1){
                            tmp = line.substring(indexx+1,indexy);
                            yisi = line.substring(indexy+1,line.length()-1);
                        }else {
                            int n = line.length();
                            for (int j = indexx+1;j < n;j++){
                                String line1 = line.toLowerCase();
                                char word = line1.charAt(j);
                                if (word == ',')continue;
                                int l = word - 'a';
                                if (l < 0 || l > 26){
                                    indexy = j;
                                    break;
                                }
                            }
                            if (indexy == -1){
                                tmp = line.substring(indexx+1,n-1);
                            }else {
                                tmp = line.substring(indexx+1,indexy);
                                yisi = line.substring(indexy,n-1);
                            }
                        }
                    }else {
                        tmp = line.substring(indexx,indexy);
                        yisi = line.substring(indexy+1,line.length()-1);
                    }
                }
                String douhao = ",";
                int indexd = tmp.indexOf(douhao);
                if (indexd != -1){
                    String[] xx = tmp.split(",");
                    for (String xxx : xx){
                        xxx = xxx+":"+yisi;
                        xxx = xxx.replaceAll(" ","");
                        if (cntline == 0){
                            ans = ans + xxx;
                        }else {
                            ans = ans + " ";
                            ans = ans + xxx;
                        }
                    }
                }else {

                    tmp = tmp +":"+yisi;
                    tmp = tmp.replaceAll(" ","");
                    if (cntline == 0){
                        ans = ans + tmp;
                    }else{
                        ans = ans + " ";
                        ans = ans + tmp;
                    }
                }
                cntline ++;
            }
            FileOutputStream fileOutputStream = new FileOutputStream(file1);
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
            bufferedOutputStream.write(ans.getBytes());
            bufferedOutputStream.flush();
            bufferedOutputStream.close();
            fileOutputStream.close();
            return newfilename;
        }else {
            System.out.println("檔案不存在");
            return null;
        }
    }

    public static void tongJi(String words,String roots) throws IOException {
        String res = words+"_result.txt";
        words = words + ".txt";
        roots = roots + ".txt";
        File wordfile = new File(words);
        File rootfile = new File(roots);
        if (wordfile.exists() && rootfile.exists()){
            InputStream inputStreamword = new FileInputStream(wordfile);
            BufferedReader bufferedReaderword = new BufferedReader(new InputStreamReader(inputStreamword));
            InputStream inputStreamroot = new FileInputStream(rootfile);
            BufferedReader bufferedReaderroot = new BufferedReader(new InputStreamReader(inputStreamroot));
            String word = "";
            String root = "";
            String line = null;
            while((line = bufferedReaderroot.readLine())!= null){
                if (line.length()!=0) root = root + line;
            }
            line = null;
            while ((line = bufferedReaderword.readLine())!= null){
                if (line.length()!=0)word = word + line;
            }
            String[] wordss = word.split(" ");
            String[] rootss = root.split(" ");
            int n = rootss.length;
            String[] ans = new String[n];
            int i = 0;
            String trueans = "";
            for (String r : rootss){
                System.out.println(r);
                int nn = r.length();
                String rr = "";
                String rrr = "";
                for (int j = 0;j<nn;j++){
                    char jj = r.charAt(j);
                    if (jj == ':'){
                        rr = r.substring(0,j);
                        rrr = r.substring(j+1,r.length()-1);
                        break;
                    }
                }
                ans[i] = rrr+"\n"+rr + ": ";
                System.out.println("rr:"+rr);
                for (String w : wordss){

                    int index = w.indexOf(rr);
                    if (index != -1){
                        ans[i] = ans[i] + w;
                        ans[i] = ans[i] + ", ";
                    }
                }
                trueans = trueans + ans[i];
                trueans = trueans + "\n\n";
                i++;
            }
            File file = new File(res);
            if (!file.exists()){
                file.createNewFile();
            }

            FileOutputStream fileOutputStream = new FileOutputStream(file);
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
            bufferedOutputStream.write(trueans.getBytes());
            bufferedOutputStream.flush();
            bufferedOutputStream.close();
            fileOutputStream.close();
        }else{
            System.out.println("檔案不存在");
        }
    }
}      

其中 、、這三個是百度翻譯相關的代碼

子產品二 翻譯并處理

package com.chaojilaji.Util;

import net.sf.ezmorph.*;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

import java.io.*;

/**
 * ClassName FanYi
 * Description TODO
 * Auther chaoj
 * Date 2018/12/9 3:19
 * Version 1.0
 **/
public class FanYi {
    public static final String APP_ID = "";
    private static final String SECURITY_KEY = "";

    public static int getNumberfromfile(String name) throws IOException {
        name = name+"_jilu.txt";
        File file = new File(name);
        if (file.exists()){
            InputStream inputStream = new FileInputStream(file);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            String line = bufferedReader.readLine();
            if (line != null){
                int n = Integer.parseInt(line);
                return n;
            }
        }
        name = name + "_fanyi.txt";
        File file1 = new File(name);
        if (file1.exists()){
            InputStream inputStream = new FileInputStream(file1);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            String line = null;
            int cnt = 0;
            while ((line = bufferedReader.readLine()) != null){
                cnt ++;
            }
            return cnt;
        }else {
            System.out.println("檔案不存在");
            return -1;
        }
    }


    public static String fanyi(String word){
        TransApi api = new TransApi(APP_ID, SECURITY_KEY);
        String json = api.getTransResult(word, "en", "zh");
        JSONObject jsonObject = JSONObject.fromObject(json);
        Object ans = jsonObject.get("trans_result");
        Object ans1 = null;
        Object ans2 = null;
//        System.out.println(json);
        if (ans instanceof JSONArray){
            JSONArray jsonArray = (JSONArray) ans;
            int n = jsonArray.size();
            ans1 = jsonArray.get(n-1);
//            System.out.println(ans);
            if (ans1 instanceof JSONObject){
                ans2 = ((JSONObject) ans1).get("dst");
            }
        }
        return (String) ans2;
    }

    public static int fanYiFromword(String name,int begin,int le) throws IOException {
        String newname = name + "_fanyi";
        String jilu = name + "_jilu";
        name = name + ".txt";
        newname = newname + ".txt";
        File file = new File(name);
        if (file.exists()){
            InputStream inputStream = new FileInputStream(file);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            String line = null;
            String tmps = "";
            int cntline = 0;
            String words = "";
            int cntword = 0;
            while((line = bufferedReader.readLine())!=null){
                if (line != "" && line.length()>0){
                    tmps = tmps + line;
                    if (cntline == 10){
                        cntline = 0;
                        words = words + tmps;
                        tmps = "";
                    }
                    cntline ++;
                }
            }
            words = words + tmps;
            String[] words1 = words.split(" ");
            String ans = "";
            int cntnowword = 0;
            for (String words2 : words1){
                cntword ++ ;
                if (cntword < begin){
                    continue;
                }else {
                    String words3 = words2 + ": ";
                    String tmp = fanyi(words2);
//                    System.out.println(tmp);//null
                    words3 = words3 + tmp + "\n";
                    ans = ans + words3;
                    cntnowword ++;
                    if ( cntnowword == le){
                        break;
                    }
                }
            }
            File file1 = new File(newname);
            if (!file1.exists()){
                file1.createNewFile();
            }
            FileOutputStream fileOutputStream = new FileOutputStream(file1,true);
            if (fileOutputStream == null){
                return begin;
            }
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
            if (bufferedOutputStream == null){
                return begin;
            }
            bufferedOutputStream.write(ans.getBytes());
            bufferedOutputStream.flush();
            bufferedOutputStream.close();
            fileOutputStream.close();
            jilu = jilu + ".txt";
            File file2 = new File(jilu);
            if (!file2.exists()){
                file2.createNewFile();
            }
            String n = "";
            int k = begin+le;
            n = n + k;
            FileOutputStream fileOutputStream1 = new FileOutputStream(file2);
            BufferedOutputStream bufferedOutputStream1 = new BufferedOutputStream(fileOutputStream1);
            bufferedOutputStream1.write(n.getBytes());
            bufferedOutputStream1.flush();
            bufferedOutputStream1.close();
            fileOutputStream1.close();
            return begin + le;
        }else {
            System.out.println("檔案不存在");
            return begin;
        }
    }

}      
package com.chaojilaji;

import com.chaojilaji.Util.FanYi;
import com.chaojilaji.Util.WrodsFromFiles;
import org.apache.commons.lang.WordUtils;

import java.io.IOException;

/**
 * ClassName FenWord
 * Description TODO
 * Auther chaoj
 * Date 2018/12/8 22:14
 * Version 1.0
 **/
public class FenWord {
    private static void getwordFromfile() throws IOException {
        String name = "C:\\Users\\chaoj\\Desktop\\讀書筆記\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文詞庫";
        WrodsFromFiles.changeFile(name);
    }
    private static void getrootFromfile() throws IOException {
        String roots = "C:\\Users\\chaoj\\Desktop\\讀書筆記\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\english-root";
        WrodsFromFiles.changeRootFiles(roots);
    }
    private static void tongJi() throws IOException {
        String words = "C:\\Users\\chaoj\\Desktop\\讀書筆記\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文詞庫new";
        String roots = "C:\\Users\\chaoj\\Desktop\\讀書筆記\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\english-root_new";
        WrodsFromFiles.tongJi(words,roots);
    }
    private static void  fanyi() throws IOException {
        String name = "C:\\Users\\chaoj\\Desktop\\讀書筆記\\effictive java\\word\\src\\main\\java\\com\\chaojilaji\\英文詞庫new";
        int s = FanYi.getNumberfromfile(name);
        if (s != -1){
            int n = s + 10000;
            for (int i = s;i < n;i ++){
                int le = 100;
                int begin = i;
                int k = FanYi.fanYiFromword(name,begin,le);
                if (k == begin){
                    System.out.println("失敗");
                }else {
                    System.out.println("成功");
                }
                i += 99;
            }
        }else{
            System.out.println("無法擷取目前行數,請手動打開");
        }


    }

    public static void main(String args[]){
      try {
         fanyi();
      }catch (Exception e){
            e.printStackTrace();
      }
    }
}