-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyzer.py
124 lines (102 loc) · 5.04 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import pandas as pd
import xlrd
import dummy_thread
import openpyxl
import re
import os
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import logging
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime
import time
logger = logging.getLogger()
logger.setLevel(logging.WARNING)
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
class quarterlyincome( object ):
def __init__(self,innetrevenue,innetincome):
self.netincome = innetincome
self.netrevenue = innetrevenue
def test(self):
print 'testing'
print self.netincome
def xlparse(filepath,classname):
wb = openpyxl.load_workbook(filepath)
sheetnames = wb.get_sheet_names() #getting list of all sheetnames
#section for dealing with sheet 1 which is consolidated statement of income
sheet = wb.get_sheet_by_name(sheetnames[1])
cola = [] #create new list to store all values in column A
colb = []
for cellobj in sheet.columns[0]: #iterate through all values where column0*header column is true..
cola.append(cellobj.value) #append them to list of columna so we can extract info out of it..
for cellobj in sheet.columns[1]: #iterate through all values in columnB and store in colb
colb.append(cellobj.value)
#extracting information on millions vs thousands from columnArowA
#format 'Consolidated Condensed Statements of Income - USD ($) shares in Millions, $ in Millions'
sharedeno = re.findall('shares in ([a-zA-Z]+)',cola[0])
dollardeno = re.findall('\$ in ([a-zA-Z]+)',cola[0]) #note the use of special character '\' to match for $ vs match at end of line..
#print sharedeno[0],dollardeno[0] #regular expression returns a list
#netrevenue -cost ofsales = grossmargin
#grossmargin - operating expenses = operating income
#operating expenses = randd + marketing + restructuring + amoritization
#operating income - gains(losses) on equity investments + iterests and other... = income before taxes
#income before taxes - provision for taxes = net incomde
#netincome/basic shares = basic earnings per share
#netincome/diluted shares = diluted earnings per share of commone stock
netrevenuelist = ['Net sales','Netsales','netsales','Net revenue','netrevenue'] #we will want to keep expanding this list based on how many different variants from different companies
netincomelist = ['Net income','netincome'] #we will want to keep expanding this list based on how many different variants from different companies
logger.debug('%s,%s',cola,colb)
for name,value in zip(cola,colb): #looping through both cola and b at the same time .. maybe not efficient time wise and better to use index ??
for entry in netrevenuelist:
if fuzz.ratio(entry,name) > 70:
#TODO Add a check for blank cells
logger.info('%s,%s',name,fuzz.ratio(entry,name))
netrevenue = value
break
for entry in netincomelist:
if fuzz.ratio(entry,name) > 70:
#TODO Add a check for blank cells
logger.info('%s,%s',name,fuzz.ratio(entry,name))
netincome = value
break
#print netrevenue,netincome
classname = quarterlyincome(netrevenue,netincome) #creating a new class with income vlaues
return classname
# Main Function
ticker = raw_input("Enter stock ticker")
classlist = []
datelist = []
# parse all files and create a class for each one of those with data
currentpath = os.getcwd() #getting current directory of .py script
newpath = currentpath + '\\Tickers\\' + ticker.upper() #planning to create new directory with ticker name in upper case
#if os.path.exists(newpath): #check on if path alrerady exists
for root,dirs,files in os.walk(newpath): #walk returns root path, directories and then the file names
for name in files:
#print name
filepath = (os.path.join(root, name))
filename = name.rstrip('.xls') #removing the .xls extension
date = re.findall(('\d{4}-\d{2}-\d{2}'), filename)
datet = datetime.datetime.strptime(date[0], '%Y-%m-%d')
datelist.append(datet)
logger.debug('%s %s',filepath,filename)
#filepath = 'C:\Users\Aravind\Dropbox\Learning\Programming\Python\Python Fun\SEC_10k_q\AAPL_10-K_2015-10-28.xlsx'
print filename
classfile = name.rstrip('.xls')
classfile = xlparse(filepath,filename)
classlist.append(classfile)
print 'Net Revenue and Net income is',classfile.netrevenue,classfile.netincome
#TODO understand file error with older xls files
#expand to other rows in income sheet
#start plotting
list = []
print date2num(datelist)
#for (date,value) in datelist:
# x = [date2num(datelist)]
# print x
for entry in classlist:
list.append(entry.netincome)
for x,y in zip(datelist,list):
print x,y
plt.bar(datelist,list,color="red",linestyle='-',linewidth=5)
plt.show()