Skip to content

Commit

Permalink
Remove unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
Marshall James committed Apr 30, 2018
1 parent 07a7562 commit f38187e
Show file tree
Hide file tree
Showing 5 changed files with 6,027 additions and 6,033 deletions.
22 changes: 5 additions & 17 deletions classifier.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
import json
import pickle
from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier
from textblob.classifiers import MaxEntClassifier

class Classifier:

def __init__(self, train=True):
if train:
fp = open("./data/train.csv")
self.cl = NaiveBayesClassifier(fp, format="csv")
# self.cl = MaxEntClassifier(fp, format="csv")
fp.close()

# fp = open("./data/classifier.pickle", "wb")
# pickle.dump(self.cl, fp, -1)
# fp.close()
else:
fp = open("./data/classifier.pickle", "rb")
self.cl = pickle.load(fp)
fp.close()
def __init__(self):
fp = open("./data/train.csv")
self.cl = NaiveBayesClassifier(fp, format="csv")
fp.close()

def test(self):
return self.cl.classify("This is a test sentence")
Expand All @@ -44,7 +32,7 @@ def accuracy(self):
return test_accuracy

def main():
cl = Classifier(train=True)
cl = Classifier()
print(cl.test())

if __name__ == "__main__":
Expand Down
36 changes: 21 additions & 15 deletions clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,39 @@
TRAIN_N = 5000
TEST_N = 1000

def removeEntities(text):
def cleanContent(content):

def clean(word):
if "@" in word: return "NAME"
elif "http:" in word: return "LINK"
elif "#" in word: return "HASHTAG"
else: return word
cleaned = content.replace('"', "")
cleaned = cleaned.replace('[', "")
cleaned = cleaned.replace(']', "")
cleaned = cleaned.replace(',', " ")
cleaned = cleaned.replace('\n', "")
cleaned = cleaned.lstrip()
cleaned = ' '.join(cleaned.split())

return ' '.join(map(clean, text.split()))
return cleaned

class Data:

def __init__(self, row):

split = row.split(",")
row = row.rstrip()

self.tweet_id = split[0]
self.sentiment = split[1].replace('"', '')
self.author = split[2].replace('"', '')
self.content = ' '.join(split[3:]).replace('"', '').replace("\n", "").lower()
self.content = removeEntities(self.content)
self.sentiment = ""
self.content = ""

dashes = 0
for c in row:
if c == "-": dashes += 1
elif dashes >= 3 and dashes < 6: self.sentiment += c
elif dashes >= 6: self.content += c

self.content = cleanContent(self.content)

def write(self, fp):
fp.write(self.content + "," + self.sentiment + "\n")

dirty = open("./data/text_emotion.csv")
next(dirty)
dirty = open("./data/isear.txt")
train = open("./data/train.csv", "w")
test = open("./data/test.csv", "w")

Expand Down
Loading

0 comments on commit f38187e

Please sign in to comment.