天天看點

統計文本英文單詞總個數,并列出每個單詞的個數

package test;
/*
 * Task :統計文本英文單詞總個數,并列出每個單詞的個數
 *
 * Date:2014.02.26
 *
 *Author:璀若星辰
 * */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class IO_Word {
	  public static List<String>Io_word(String str)throws Exception{
	    File file = new File(str);
	    int n = 0;//文章中單詞總數
	    TreeMap<Object, Integer> myTreeMap = new TreeMap<Object, Integer>();//存放鍵值對
	    Object word = null;//文章中的單詞
	    Object num = null;//出現的次數
	    FileInputStream fis = new FileInputStream(file);
	    try{
	      InputStreamReader isr = new InputStreamReader(fis, "gb2312");
	      try{
	         BufferedReader br = new BufferedReader(isr);
	         try{
	           List<String> all = new ArrayList<String>();
	           String temp = br.readLine();
	           while (temp !=null){
	             all.add(temp);
	             temp = br.readLine();
	           }
	           //System.out.println("all="+all.size());
	          // System.out.println(all.get(0));
	           Pattern expression = Pattern.compile("[a-zA-Z]+");//定義正規表達式比對單詞
	           String string1 = all.toString().toLowerCase();//轉換成小寫
	           Matcher matcher = expression.matcher(string1);//定義string1的比對器
	           while(matcher.find()){
	             word = matcher.group();//得到一個單詞—樹映射的鍵
	             //System.out.println("word="+word);
	             n++;
	             if(myTreeMap.containsKey(word)){
	               num = myTreeMap.get(word);//得到單詞出現的次數
	               Integer count = (Integer)num;
	               myTreeMap.put(word, new Integer(count.intValue()+1));
	             }else {
	               myTreeMap.put(word, new Integer(1));//否則單詞第一次出現,添加到映射中
	             }
	           }
	           System.out.println("統計分析如下:");
	           System.out.println("txt文章中單詞總數"+ n +"個");
	           /*Iterator<Object> iter = myTreeMap.keySet().iterator();//得到樹映射鍵集合的疊代器
	           while(iter.hasNext()){
	             key = iter.next();
	             System.out.println(((String)key+"-"+myTreeMap.get(key)));
	           }*/
	           List<Map.Entry<Object, Integer>> list = new ArrayList<Map.Entry<Object,Integer>>(myTreeMap.entrySet());
	           System.out.println("list="+list.size());
	           Collections.sort(list,new Comparator<Map.Entry<Object, Integer>>(){

	            public int compare(Map.Entry<Object, Integer>zj,  Map.Entry<Object, Integer> zz) {
	              return (zz.getValue() - zj.getValue());
	            }
	           });
	           for (Entry<Object, Integer> entry : list) {
	            System.out.println(entry.getKey() + "-" + entry.getValue() );
	          }
	           return all;
	         }finally{
	           br.close();
	         }
	      }finally{
	        isr.close();
	      }
	    }finally{
	      fis.close();
	    }
	  }
	  public static void main(String[] args) {
		  try {
		      IO_Word.Io_word("D:/abc.txt");
		    } catch (Exception e) {
		      e.printStackTrace();
		    }
	}
	}
 
           

 運作結果效果如下

統計文本英文單詞總個數,并列出每個單詞的個數