天天看點

[Spark基礎]-- spark中join和group操作

package com.scala

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
/**
 * scala測試join和cogroup:join是排列組合,而cgroup是分組
 */
object JoinAndCogroup {
  
  def main(args:Array[String]):Unit={
    val conf=new SparkConf().setAppName("joinAndcogroup").setMaster("local[1]")
    //擷取context
    val sc=new SparkContext(conf)
    //建立泛型集合
    val stuList=List((1,"tom"),(2,"jim"),(3,"cassie"))
    val scoreList=List((1,20),(1,90),(1,30),(2,23),(2,23),(2,80),(3,90),(3,100),(3,100))
    //轉化為RDD
    val stuRDD=sc.parallelize(stuList)
    val scoreRDD=sc.parallelize(scoreList)
  /*  //join操作
    //周遊
    val joinRDD=stuRDD.join(scoreRDD)
    for( join2 <- joinRDD ) {
      println("===========")
      println("id is "+join2._1)
      println("name is "+join2._2._1)
      println("score is "+join2._2._2)
    }    */
    
    //cogroup操作
    val gourpRDD=stuRDD.cogroup(scoreRDD)
    for (group2<- gourpRDD){
       println("===========")
      println("id is "+group2._1)
      println("name is "+group2._2._1)
      println("score is "+group2._2._2)
    }
    //周遊結果
  }
}      

繼續閱讀