天天看点

出现次数的top k问题

出现次数的top k问题
import java.util.*;
import java.util.Map.*;
//出现次数的top k问题
public class GetTopKProblem{

    //堆节点定义
    public static class Node{
         public String str;
         public  int times;
         public Node(String s,int t){
             str=s;
             times=t;
         }
    }
    //获得出现次数的top k 问题
    public static void printTopKAndRank(String[] arr, int topK) {
        if (arr == null || topK < ) {
            return;
        }
        HashMap<String, Integer> map = new HashMap<String, Integer>();
        // 生成哈希表(字符串--词频)
        for (int i = ; i != arr.length; i++) {
            String cur = arr[i];
            if (!map.containsKey(cur)) {
                map.put(cur, );
            } else {
                map.put(cur, map.get(cur) + );
            }
        }
        Node[] heap = new Node[topK];
        int index = ;
        // 遍历哈希表,决定每条信息是否进堆
        for (Entry<String, Integer> entry : map.entrySet()) {
            String str = entry.getKey();
            int times = entry.getValue();
            Node node = new Node(str, times);
            if (index != topK) {
                heap[index] = node;
                heapInsert(heap, index++);
            } else {
                if (heap[].times < node.times) {
                    heap[] = node;
                    heapify(heap, , topK);
                }
            }
        }
        // 把小根堆的所有元素按词频从大到小排序
        for (int i = index - ; i != ; i--) {
            swap(heap, , i);
            heapify(heap, , i);
        }
        // 严格按照排名打印k条记录
        for (int i = ; i != heap.length; i++) {
            if (heap[i] == null) {
                break;
            } else {
                System.out.print("No." + (i + ) + ": ");
                System.out.print(heap[i].str + ", times: ");
                System.out.println(heap[i].times);
            }
        }
    }
    //向堆中添加数据
    public static void heapInsert(Node[] heap, int index) {
        while (index != ) {
            int parent = (index - ) / ;
            if (heap[index].times < heap[parent].times) {
                swap(heap, parent, index);
                index = parent;
            } else {
                break;
            }
        }
    }
     //交换堆的两个数
    public static void heapify(Node[] heap, int index, int heapSize) {
        int left = index *  + ;
        int right = index *  + ;
        int smallest = index;
        while (left < heapSize) {
            if (heap[left].times < heap[index].times) {
                smallest = left;
            }
            if (right < heapSize && heap[right].times < heap[smallest].times) {
                smallest = right;
            }
            if (smallest != index) {
                swap(heap, smallest, index);
            } else {
                break;
            }
            index = smallest;
            left = index *  + ;
            right = index *  + ;
        }
    }
    //交换两个数
    public static void swap(Node[] heap, int index1, int index2) {
        Node tmp = heap[index1];
        heap[index1] = heap[index2];
        heap[index2] = tmp;
    }
    //生成随机大小的数组
    public static String[] generateRandomArray(int len, int max) {
        String[] res = new String[len];
        for (int i = ; i != len; i++) {
            res[i] = String.valueOf((int) (Math.random() * (max + )));
        }
        return res;
    }

    //打印数组
    public static void printArray(String[] arr) {
        for (int i = ; i != arr.length; i++) {
            System.out.print(arr[i] + " ");
        }
        System.out.println();
    }

    public static void main(String[]args){
          //System.out.println("Hello");
          String[] arr = generateRandomArray(, );
          int topK = ;
          printArray(arr);
          printTopKAndRank(arr, topK);
    }
}
           
出现次数的top k问题