開發者學堂課程【大資料實時計算架構 Spark 快速入門:Spark 算子操作及總結_2】學習筆記,與課程緊密聯系,讓使用者快速學習知識。
課程位址:
https://developer.aliyun.com/learning/course/100/detail/1692Spark 算子操作及總結_2
内容簡介:
一、CartesianOperator 相關代碼
二、CountByKey Operator 相關代碼
17 //中文名笛卡爾乘積
18 //比如說兩個RDD, 分别有10條資料,用了cartesian算子以後
19 //兩個RDD的每 一個資料都會和另外一個RDD的每條資料執行一次JOIN
20 //最終組成一個笛 卡爾乘積
21
22 // 小案例
23 //比如說,現在有5件衣服,5條褲子,分别屬于兩個RDD
24 //就是說,需要對每件衣服都和每條褲子做一次JOIN操作, 嘗試進行服裝搭配!
25 SparkConf conf = new SparkConf()
26 . setAppName("CartesianOperator")
27 JavaSparkContext sc . new JavaSparkContext(conf);
28
29 List clothes = Arrays.asList("T恤衫","夾克","皮大衣", "襯衫","毛衣");
30List trousers = Arrays. asList("西褲"," 内褲", "鉛筆褲", "皮褲","牛仔褲");
31 JavaRDD clothesRDD= sc. parallelize(clothes);
32 JavaRDD trousersRDD = sc. parallelize(trousers);
33
34JavaPairRDDpairs-clothesRDD.cartesian(trousersRDD);
35 for(Tuple2 pair : pairs.collect()){
36 System.out.println(pair);
37 }
39 sc.close();
40 }
41}
30 //對RDD應用CountByKey算子,統計每個70s或者80s,人數分别是多少
31 //統計每種Key對應的元素個數
32 Map counts . students.countByKey();
33 for(Map. Entry studentCount : counts .entrySet())
34 System. out. println(studentCount. getKey() + ": " + studentCount.getValue());
35}
36
37 sc.close();
38
39 }
40
三、CogroupOperator 相關代碼
12
13 public class CogroupOperator {
14
15 public static void main(String[] args)
16 SparkConf conf - new SparkConf(). setAppName("ReduceOperator")
17 . setMaster("local");
18 JavaSparkContext sc = new JavaSparkContext(conf);
19
20 List studentsList E Arrays .asList(
21 new Tuple2("1"," xuruyun"),
22 new Tuple2("2" , "wangfei"),
23 new Tuple2("3" ,"lixin"));
24 List
25 new Tuple2("1","100"),
26 new Tuple2("2","90")
27 newTuple2("3","80"),
28 newTuple2("1","70"),
29 newTuple2("2","60"),
30 new Tuple2("3","50"));
31 JavaPairRDD students = sc.parallelizePairs(studentslist);
32 JavaPairRDD scores = sc. parallelizePairs(scorelist);
33