作业12

时间：2018-12-05 23:50:47 阅读：271 评论：0 收藏：0 [点我收藏+]

text = ‘‘‘The world is changing
and time is spinning fast
it‘s so amazing how you came into my life
I know it seems all hope is gone
I know you feel you can‘t be strong
and once again the story ends with you and I
And anytime you feel like you just can‘t go on
just hold on to my love
and you‘ll never be alone
Hold on
we can make it through the fire
and my love   ‘‘‘

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

#预处理
def preprocessing(text):
    #text=text.decode("utf-8)
    tokens=[word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]
    stops=stopwords.words(‘english‘)
    tokens=[token for token in tokens if token not in stops]

    tokens=[token.lower() for token in tokens if len(token)>=3]
    lmtzr=WordNetLemmatizer()
    tokens=[lmtzr.lemmatize(token) for token in tokens]
    preprocessed_text=‘ ‘.join(tokens)
    return preprocessed_text
preprocessing (text)

#读取数据集
import csv
file_path=r‘D:\SMSSpamCollectionjs.txt‘
sms=open(file_path,‘r‘,encoding=‘utf-8‘)
sms_data=[]
sms_label=[]
csv_rreader=csv.reader(sms,delimiter=‘\t‘)
for line in csv_reader:
    sms_label.append(line[0])
    sms_data.append(preprocessing(line[1]))
sms.close()

#按0.7,0.3比例分为训练集和测试集
import numpy as np
sms_data=np.array(sms_data)
sms_label=np.array(sms_label)

from sklearn.model_selection import train_text_split
x_train, x_test, y_train, y_test = train_text_split(sms_data, sms_label, test_size=0.3, random_state=0, stratify=sms_label)

#将其向量化
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer(min_df=2,ngram_range=(1,2),stop_words=‘english‘,strip_accents=‘unicode‘,norm=‘l2‘)
X_train=vectorizer.fit_transform(x_train)
X_test=vectorizer.transform(x_test)

X_train
a=X_train.toarray()
print(a)

for i in range(1000):
    for j in range(5984):
        if a[i,j]!=0:
            print(i,j,a[i,j])

#朴素贝叶斯分类器
from sklearn.navie_bayes import MultionmialNB
clf= MultionmialNB().fit(X_train,y_train)
y_nb_pred=clf.predict(X_test)

#分类结果显示
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(y_nb_pred.shape, y_nb_pred) #x_test预测结果

#
from sklearn.metrics import classification_report
cr = classification_report(y_nb_pred,y_test)
print(cr)

作业12

原文：https://www.cnblogs.com/tangpaiq-/p/10074122.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)