天天看點

java 實作top N排序

用spark api 實作java top N 排序

import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

/**
* @author 作者 E-mail:
* @version 建立時間:2017年8月31日 下午3:36:03
* 類說明 
*/
public class Top3Sort {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf().setAppName("top3").setMaster("local");
		JavaSparkContext sc = new JavaSparkContext(conf);
		JavaRDD<String> tf = sc.textFile("G://top3.txt");
		JavaPairRDD<Integer, String> mapToPair = tf.mapToPair(new PairFunction<String, Integer,String >() {

			private static final long serialVersionUID = 1L;

			@Override
			public Tuple2<Integer, String> call(String t) throws Exception {
				
				return new Tuple2<Integer, String>(Integer.valueOf(t), t);
			}
		});
		JavaPairRDD<Integer, String> sortByKey = mapToPair.sortByKey(false);
		JavaRDD<Integer> map = sortByKey.map(new Function<Tuple2<Integer,String>, Integer>() {
			private static final long serialVersionUID = 1L;

			@Override
			public Integer call(Tuple2<Integer, String> v1) throws Exception {
				// TODO Auto-generated method stub
				return v1._1;
			}
		});
          //取出前N
		List<Integer> take = map.take(5);
		for (Integer nums: take){
			System.out.println("...."+nums);
		}
		sc.close();
	}

}
           

  測試資料:

1

2

3

4

6

77

88

99

987

121

12121

4556

轉載于:https://www.cnblogs.com/ptbx/p/7459282.html