jieba庫
ex_1
import jieba
def cut_word(text):
text = ' '.join(list(jieba.cut(text)))
return text
def cut_chinese_demo2():
data = ["每一個公民的合法權利都值得守護",
"每一個維權訴求都值得珍視。",
"當且僅當舉報管道暢通無阻、",
"解決問題馬上就辦,",
"才能少一些驚詫眼球的“誇張舉報”"]
data_new = []
for sen in data:
data_new.append(cut_word(sen))
transfer = CountVectorizer()
data_final = transfer.fit_transform(data_new)
print("data_new:\n", data_final.toarray())
print("特征名字:\n", transfer.get_feature_names())