天天看点

02-flink-1.10.1-离线开发WordCount

package com.study.liucf.bounded

import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
/**
 * @Author liucf
 * @Date 2021/8/15
 *      flink1.10.1以Wordcount为例处理离线有界数据
 *      一共分为4步:
 *        ① 构建flink离线执行环境
 *        ② 读取数据
 *        ③ 转换数据
 *        ④ 输出结果
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    /**创建:flink执行环境 ExecutionEnvironment*/
    val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
    /**读取离线TXT文件*/
    val ds: DataSet[String] = env.readTextFile("D:\\IntellijWorkSpace\\flink-liucf-study\\src\\main\\resources\\wc.txt")
    /**转换处理*/
    val res = ds.flatMap(_.split(" ")) // DataSet[String]
      .map((_,1)) //DataSet[(String, Int)]
      .groupBy(0) //GroupedDataSet[(String, Int)]
      .sum(1) //AggregateDataSet[(String, Int)]
    /**输出结果:这里打印到控制台*/
    res.print()
  }

}
           

继续阅读