package test;
/*
* Task :統計文本英文單詞總個數,并列出每個單詞的個數
*
* Date:2014.02.26
*
*Author:璀若星辰
* */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IO_Word {
public static List<String>Io_word(String str)throws Exception{
File file = new File(str);
int n = 0;//文章中單詞總數
TreeMap<Object, Integer> myTreeMap = new TreeMap<Object, Integer>();//存放鍵值對
Object word = null;//文章中的單詞
Object num = null;//出現的次數
FileInputStream fis = new FileInputStream(file);
try{
InputStreamReader isr = new InputStreamReader(fis, "gb2312");
try{
BufferedReader br = new BufferedReader(isr);
try{
List<String> all = new ArrayList<String>();
String temp = br.readLine();
while (temp !=null){
all.add(temp);
temp = br.readLine();
}
//System.out.println("all="+all.size());
// System.out.println(all.get(0));
Pattern expression = Pattern.compile("[a-zA-Z]+");//定義正規表達式比對單詞
String string1 = all.toString().toLowerCase();//轉換成小寫
Matcher matcher = expression.matcher(string1);//定義string1的比對器
while(matcher.find()){
word = matcher.group();//得到一個單詞—樹映射的鍵
//System.out.println("word="+word);
n++;
if(myTreeMap.containsKey(word)){
num = myTreeMap.get(word);//得到單詞出現的次數
Integer count = (Integer)num;
myTreeMap.put(word, new Integer(count.intValue()+1));
}else {
myTreeMap.put(word, new Integer(1));//否則單詞第一次出現,添加到映射中
}
}
System.out.println("統計分析如下:");
System.out.println("txt文章中單詞總數"+ n +"個");
/*Iterator<Object> iter = myTreeMap.keySet().iterator();//得到樹映射鍵集合的疊代器
while(iter.hasNext()){
key = iter.next();
System.out.println(((String)key+"-"+myTreeMap.get(key)));
}*/
List<Map.Entry<Object, Integer>> list = new ArrayList<Map.Entry<Object,Integer>>(myTreeMap.entrySet());
System.out.println("list="+list.size());
Collections.sort(list,new Comparator<Map.Entry<Object, Integer>>(){
public int compare(Map.Entry<Object, Integer>zj, Map.Entry<Object, Integer> zz) {
return (zz.getValue() - zj.getValue());
}
});
for (Entry<Object, Integer> entry : list) {
System.out.println(entry.getKey() + "-" + entry.getValue() );
}
return all;
}finally{
br.close();
}
}finally{
isr.close();
}
}finally{
fis.close();
}
}
public static void main(String[] args) {
try {
IO_Word.Io_word("D:/abc.txt");
} catch (Exception e) {
e.printStackTrace();
}
}
}
運作結果效果如下
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsICdzFWRoRXdvN1LclHdpZXYyd2LcBzNvwVZ2x2bzNXak9CX90TQNNkRrFlQKBTSvwFbslmZvwFMwQzLcVmepNHdu9mZvwFVywUNMZTY18CX052bm9CX90zdOJTSU1keZpWT4FEVkZXUYpVd1kmYr50MZV3YyI2cKJDT29GRjBjUIF2LcRHelR3LcJzLctmch1mclRXY39jN0UzM0UDNzEjNyIDM0EDMy8CX0Vmbu4GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.jpg)