1 flink sink
2 file sink
package com.study.liucf.unbounded.sink
import com.study.liucf.bean.LiucfSensorReding
import org.apache.flink.api.common.serialization.SimpleStringEncoder
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala._
/**
* @Author liucf
* @Date 2021/9/13
*/
object FileSink {
def main(args: Array[String]): Unit = {
//创建flink执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//读取数据
val inputStream: DataStream[String] = env.readTextFile("src\\main\\resources\\sensor.txt")
//转换数据类型 string 类型转换成LiucfSensorReding,求最小值
val ds = inputStream.map(r=>{
val arr = r.split(",")
LiucfSensorReding(arr(0),arr(1).toLong,arr(2).toDouble)
})
//输出到控制台
ds.print()
//输出到文件
// ds
// .writeAsCsv("src\\main\\resources\\sensor.csv")
// .setParallelism(1)//默认会分布式并行执行根据多少并行度生成多少文件,这里我让它生成一个文件
ds.addSink(StreamingFileSink.forRowFormat(
new Path("src\\main\\resources\\sensor2.csv"),
new SimpleStringEncoder[LiucfSensorReding]()
).build())
//可见writeAsCSV已经被弃用了
//启动flink执行
env.execute("liucf sink api")
}
}
2 kafka sink
本示例演示,数据从kafka的一个topic:sensor_input_csv读入然
后写出到kafka的另一个topic:sensor_out
2.1 生产者生产到topic:topic:sensor_input_csv
package com.study.liucf.kafka
import java.util.Properties
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.StringSerializer
object SensorProduce2 {
def main(args: Array[String]): Unit = {
val kafkaProp = new Properties()
kafkaProp.put("bootstrap.servers", "192.168.109.151:9092")
kafkaProp.put("acks", "1")
kafkaProp.put("retries", "3")
//kafkaProp.put("batch.size", 16384)//16k
kafkaProp.put("key.serializer", classOf[StringSerializer].getName)
kafkaProp.put("value.serializer", classOf[StringSerializer].getName)
kafkaProp.put("topic","sensor_input_csv")
val producer = new KafkaProducer[String, String](kafkaProp)
val sensor = "sensor_1,1617505482,36.6"
send(sensor,producer)
producer.close()
}
def send(str:String,producer: KafkaProducer[String, String]): Unit ={
val record = new ProducerRecord[String, String]("sensor_input_csv", str )
producer.send(record, new Callback {
override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
if (metadata != null) {
println("发送成功")
}
if (exception != null) {
println("消息发送失败")
}
}
})
}
}
2.2 flink代码
数据从kafka的一个topic:sensor_input_csv读入然 后写出到kafka的另一个topic:sensor_out
package com.study.liucf.unbounded.sink
import java.util.Properties
import com.study.liucf.bean.LiucfSensorReding
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka._
/**
* @Author liucf
* @Date 2021/9/18
* 本示例演示,数据从kafka的一个topic:sensor_input_csv读入然
* 后写出到kafka的另一个topic:sensor_out
*/
object KafkaSink {
def main(args: Array[String]): Unit = {
//创建执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置kafak配置项
val props = new Properties()
props.setProperty("bootstrap.servers","192.168.109.151:9092")
props.setProperty("topic","sensor_input_csv")
//添加kafka数据源
val inputDs: DataStream[String] = env.addSource(new FlinkKafkaConsumer[String]("sensor_input_csv",new SimpleStringSchema(),props))
val transDs: DataStream[String] = inputDs.map(d=>{
val arr = d.split(",")
LiucfSensorReding(arr(0),arr(1).toLong,arr(2).toDouble).toString
})
//输出结果到标准控制台
transDs.print()
//输出到kafka的另一个topic里
val outProps = new Properties()
outProps.setProperty("bootstrap.servers","192.168.109.151:9092")
transDs.addSink(new FlinkKafkaProducer[String](
"sensor_out",new SimpleStringSchema(),outProps))
//启动执行flink
env.execute("liucf kafka sink api test")
}
}
2.3 消费者从topic:sensor_out消费
消费数据并打印出来
package com.study.liucf.kafka
import java.util
import java.util.Properties
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
import scala.collection.JavaConversions._
/**
* @author [email protected]
* @date 2021/1/14 20:44
* @version 1.0
*/
object SimpleConsumer {
def main(args: Array[String]): Unit = {
/** 判断是否指定消费的topic */
if (args.length == 0) {
println("Enter topic name")
return
}
//Kafka consumer configuration settings
/**参数传递topic*/
val topicName = args(0)
/**为属性创建实例来访问生成器配置*/
val props: Properties = new Properties()
import java.util.UUID
props.put("bootstrap.servers", "192.168.109.151:9092")
props.put("acks", "all")
props.put("retries", "0")
/** 将单个消费者分配给组 */
props.put("group.id", "test")
/** 如果值为true,则为偏移启用自动落实,否则不提交。 */
props.put("enable.auto.commit", "true")
/** 返回更新的消耗偏移量写入ZooKeeper的频率 */
props.put("auto.commit.interval.ms", "1000")
/** 表示Kafka在放弃和继续消费消息之前等待ZooKeeper响应请求(读取或写入)多少毫秒。 */
props.put("session.timeout.ms", "30000")
/** 反序列化器接口的键 */
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
/** 反序列化器接口的值 */
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
/** 当各分区下有已提交的offset时,
* 从提交的offset开始消费(也就是从上一次消费的偏移量开始往后消费,不管上次消费的consumer是谁);
* 无提交的offset时,从头开始消费(如果该topic没被消费过就从开始消费) */
props.put("auto.offset.reset", "earliest")
/** props.put("auto.offset.reset", "earliest"); 加上props.put("group.id", UUID.randomUUID().toString());
* 可以实现 --from-beginning 功能 */
props.put("group.id", UUID.randomUUID.toString)
/** 当各分区下有已提交的offset时,从提交的offset开始消费(也就是从上一次消费的偏移量开始往后消费,不管上次消费的consumer是谁);
* 无提交的offset时,消费新产生的该分区下的数据(也就是说如果该topic没被消费过即没有offset记录的情况下
* 则从我这个consumer启动开始消费开始往后生产到该topic的数据才被消费,之前的数就不消费了) */
// props.put("auto.offset.reset", "latest");
val consumer: KafkaConsumer[String, String] = new KafkaConsumer(props)
/**Kafka Consumer subscribes list of topics here.Kafka使用者在这里订阅主题列表。*/
consumer.subscribe(util.Arrays.asList(topicName))
println("Subscribed to topic " + topicName)
while (true){
val records: ConsumerRecords[String, String] = consumer.poll(100)
for (record <- records) {
/** print the offset,key and value for the consumer records.打印消费者记录的偏移量、键和值。 */
printf("offset = %d, key = %s, value = %s\n", record.offset, record.key, record.value)
}
}
}
}