from nltk.corpus import PlaintextCorpusReader import nltk corpus_root=r"C:\Users\sun\AppData\Roaming\nltk_data\corpora\jieba" file_pattern=r".*/.*\.txt" ptb=PlaintextCorpusReader(corpus_root,file_pattern) ptb.fileids() text=ptb.words("jieba3.txt") print(text)
原文:http://www.cnblogs.com/realmonkeykingsun/p/7992873.html