jieba库
ex_1
import jieba
def cut_word(text):
text = ' '.join(list(jieba.cut(text)))
return text
def cut_chinese_demo2():
data = ["每一个公民的合法权利都值得守护",
"每一个维权诉求都值得珍视。",
"当且仅当举报渠道畅通无阻、",
"解决问题马上就办,",
"才能少一些惊诧眼球的“夸张举报”"]
data_new = []
for sen in data:
data_new.append(cut_word(sen))
transfer = CountVectorizer()
data_final = transfer.fit_transform(data_new)
print("data_new:\n", data_final.toarray())
print("特征名字:\n", transfer.get_feature_names())