天天看点

SparkStreaming--输入源(本地文件)

//输入源
object WordCountHDFSSource {
  def main(args: Array[String]): Unit = {
    System.setProperty("hadoop.home.dir", "E:\\software\\bigdate\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0")
    val conf = new SparkConf();
    conf.setMaster("local[2]") //一个线程用于读数据,一个用于处理数据
    conf.setAppName("WordCountStreaming")
    val sc = new SparkContext(conf);
    val batch=5
    val streamingContext = new StreamingContext(sc, Seconds(batch))
    //监听本地目录
    val sourceDS=streamingContext.textFileStream("E:\\sparkdata")
    sourceDS.flatMap(
      line=>{
        line.split(" ")
      })
      .map((_,1))
      .reduceByKey(_+_)
      .print()
    streamingContext.start()
    streamingContext.awaitTermination()
    }
  }
           

继续阅读