一、所需依賴
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
二、實作代碼
package com.link.datawarehouse.hive;
/**
* @author 包菜
* @date 2020/12/8 15:08
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
public class IkParticiple extends UDF {
public String evaluate(String input) {
// 如果輸入為空,則直接傳回空即可
String output="";
if (input == null || input.trim().length() == 0) {
return null;
}
//JiebaSegmenter segmenter = new JiebaSegmenter();
// output=segmenter.sentenceProcess(input).toString().replaceAll(", ", " ").toLowerCase();
byte[] bt = input.getBytes();
InputStream ip = new ByteArrayInputStream(bt);
Reader read = new InputStreamReader(ip);
IKSegmenter iks = new IKSegmenter(read, true);
Lexeme t;
try {
while ((t = iks.next()) != null) {
output=output+t.getLexemeText().toLowerCase()+" ";
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return output;
}
/*測試使用的main方法*/
public static void main(String[] args) {
System.out.println(new IkParticiple().evaluate("超級喜歡寫代碼"));
}
}
三、資料結果

四、打包上傳,建立函數
注意:自定義UDF函數隻能在相應的庫使用
select linkdata_warehouse.fenciqi('超級喜歡寫代碼');