天天看點

Spark的Wordcount程式圖文詳解!

先附上scala實作的代碼!

package cn.spark.study.core

import org.apache.spark.SparkConf

import org.apache.spark.SparkContext

object WordCount {

  def main(args: Array[String]) {

    val conf = new SparkConf()

        .setAppName("WordCount");

    val sc = new SparkContext(conf)

    val lines = sc.textFile("hdfs://spark1:9000/spark.txt", 1); 

    val words = lines.flatMap { line => line.split(" ") }   

    val pairs = words.map { word => (word, 1) }   

    val wordCounts = pairs.reduceByKey { _ + _ }

    wordCounts.foreach(wordCount => println(wordCount._1 + " appeared " + wordCount._2 + " times."))  

  }

}

下面的圖就是根據上面的代碼一步一步剖析底層實作的原理!

從下面的箭頭看整個邏輯流程!

圖檔來自網絡!
Spark的Wordcount程式圖文詳解!

繼續閱讀