天天看點

Flink-Sql自定義UDF

最近嘗試使用flink的table-sql,發現沒有from_unixtime函數,隻能自定義該udf。

原始kafka消息日志

{"action":"exposure","itemId":"[email protected]@1576","rankIndex":14,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"[email protected]@16","rankIndex":10,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"[email protected]@287","rankIndex":21,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"[email protected]@12","rankIndex":22,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"[email protected]@1523","rankIndex":10,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"[email protected]@2759","rankIndex":25,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
{"action":"exposure","itemId":"[email protected]@68","rankIndex":2,"time":"1563641998","unionId":"ohmdTt_gZk2UkbbWsXBARMsTl1mI"}
{"action":"exposure","itemId":"[email protected]@2045","rankIndex":13,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"[email protected]@982","rankIndex":17,"time":"1563641998","unionId":"ohmdTtymqiQw5aSxIt3ejxeAqpgs"}
{"action":"exposure","itemId":"[email protected]@1498","rankIndex":28,"time":"1563641998","unionId":"ohmdTtzivXuT9u3oWFO5daAxziI0"}
           

我們要格式化就是time字段。

自定義udf函數

import org.apache.flink.table.functions.ScalarFunction;

import java.text.SimpleDateFormat;
import java.util.Date;

public class FromUnixTimeUDF extends ScalarFunction {
    public String DATE_FORMAT;

    public FromUnixTimeUDF() {
        this.DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
    }

    public FromUnixTimeUDF(String dateFormat) {
        this.DATE_FORMAT = dateFormat;
    }

    public String eval(String longTime) {
        try {
            SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
            Date date = new Date(Long.parseLong(longTime) * 1000);
            return sdf.format(date);
        } catch (Exception e) {
            return null;
        }
    }
}
           

主程式main函數

final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    	StreamTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env);
        tEnv.registerFunction("from_unixtime", new FromUnixTimeUDF());
        tEnv.connect(initKafkaDescriptor()).withFormat(new Json().failOnMissingField(true).deriveSchema())
        .withSchema(initSchema()).inAppendMode().registerTableSource("transfer_plan_show");
        Table result = tEnv.sqlQuery("select unionId,itemId,action,from_unixtime(`time`) as creat_time,rankIndex as rank_index from transfer_plan_show");
        result.printSchema();
        tEnv.toAppendStream(result, Row.class).print();
        env.execute();
           

說明因為sql中time是關鍵字,是以加上加上兩個反斜杠 ``.

相關函數

//連結kafka配置
    private Kafka initKafkaDescriptor(){
      Kafka kafkaDescriptor=  new Kafka().version("0.11").topic("transfer_plan_show")
                .startFromLatest().property("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST)
                .property("group.id", "trafficwisdom-streaming");
      return kafkaDescriptor;
    }
	//根據json自定義schema
    private Schema initSchema(){
        Schema schema=new Schema().field("action", Types.STRING())
                .field("itemId",Types.STRING())
                .field("time",Types.STRING())
                .field("unionId",Types.STRING())
                .field("rankIndex",Types.INT());
        return schema;
    }
           

繼續閱讀