# -*- coding: utf-8 -*- """ Created on Thu Oct 12 15:43:55 2017 @author: Trista """ import csv from nltk.tokenize import word_tokenize from nltk.corpus import stopwords import re from nltk.stem.snowball import SnowballStemmer key_word=[ 'flu','fever', 'chills','sweats','aches','pains', 'fatigue','coughing','breathing','nausea','vomiting','diarrhea','lymph','death'] stemmer = SnowballStemmer('english') key_word1 = [stemmer.stem(w) for w in key_word] f_text = open("sypmtom_text_1.csv", "w", encoding = 'utf-8') f_text.write("ID,created at,location,text,symptom\n") f_text_total = open("sypmtom_text_total_1.csv", "w", encoding = 'utf-8') f_text_total.write("ID,created at,location,text,symptom\n") stoplist = stopwords.words('english') words=[] key_text=[] text=[] text_total=[] f=csv.reader(open('Microblogs.csv','r')) for i in f: docs=word_tokenize(i[3]) docs1 = [w for w in docs if w not in stoplist] docs2=[w.lower() for w in docs1] docs3=[w for w in docs2 if re.search('^[a-z]+$',w)] docs4 = [stemmer.stem(w) for w in docs3] words.append(docs4) for x in docs4: if x in key_word1: key_text.append(x) f_text.write("%s,%s,%s,%s,%s\n" %(i[0],i[1],i[2],str(i[3]),x)) f_text_total.write("%s,%s,%s,%s,%s\n" %(i[0],i[1],i[2],str(i[3]),x)) else: f_text_total.write("%s,%s,%s,%s,%s\n" %(i[0],i[1],i[2],str(i[3]),'NA')) if len(key_text)>0: text.append(i) key_text=[] text_total.append(i) f_text.close() f_text_total.close()