Jieba
Necessary Module
import jieba
import jieba.posseg as pseg
import jieba.analyse as analy
String Cutting
[word for word in jieba.cut(rawString, cut_all=False)]
rawStrCutList = jieba.lcut(rawString, cut_all=False)
[word for word in jieba.cut_for_search(rawString, HMM=False)]
rawStrCutSearchList = jieba.lcut_for_search(rawString, HMM=False)
ICTPOS Marking
rawStrCut = pseg.cut(rawString)
[[w.word, w.flag] for w in rawStrCut]
Parallel Computing
jieba.enable_parallel(2)
Self-defined Dictionary
jieba.load_userdict("self-dict.txt")
jieba.del_word("iOS11")
jieba.add_word("iOS11", freq=None, tag=None)
jieba.suggest_freq("iOS11", tune=True)
Tokenize : Position on Text
[[tk[0],tk[1],tk[2]] for tk in jieba.tokenize(rawString)]
[[kw[0],kw[1],kw[2]] for kw in jieba.tokenize(rawString, mode='search')]
analy.extract_tags(rawString, topK=10, \
withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))
analy.textrank(rawString, topK=10, \
withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))