-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathmultidefine.py
205 lines (178 loc) · 7.39 KB
/
multidefine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import pyfiglet
# INFO
print("---------------------------------------------------------------------------------------------------------------------------------")
print(pyfiglet.figlet_format("Multi\nDefine"))
print("---------------------------------------------------------------------------------------------------------------------------------")
print("- Enter a list of words and this program will return their definitions from Google, Oxford or Wikipedia - Whichever \nis available(in that order)")
print("- As this program is still in alpha, the software might not be stable. Please contact the developer if you face any bugs :)")
print()
print("Alpha version 0.2.5")
print("This version now comes with an update feature that automatically updates chromedriver to the latest version.")
print()
print("Developled by museHD @ https://github.com/musehd/MultiDefine")
print("---------------------------------------------------------------------------------------------------------------------------------")
print("Loading Browser...Please wait")
print()
# Don't need these libraries since we are using selenium
# import grequests
# from requests import get
# from requests.exceptions import RequestException
# from contextlib import closing
# from bs4 import BeautifulSoup
# import pickle
import update
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
import sys
import os
base_dir = os.path.dirname(os.path.abspath(__file__))
#Driver options
options = Options()
chromedriver_path = os.path.join(base_dir, 'chromedriver.exe')
options.headless = True
# driver = webdriver.Firefox(executable_path="D:/PROGRAMMING/PYTHON SCIRPTS/FILES/geckodriver.exe")
# Chromedriver seems to be faster than firefox
#To stop the "Now listening on xxxx" logs
options.add_experimental_option('excludeSwitches', ['enable-logging'])
try:
driver = webdriver.Chrome(executable_path=chromedriver_path,options=options)
except (SessionNotCreatedException, WebDriverException):
update.update_chromedriver()
driver = webdriver.Chrome(executable_path=chromedriver_path,options=options)
else:
print("Ready!")
# print when no exception
#Base URLS + intialising lists
oxford = "https://www.lexico.com/en/definition/"
google = "https://google.com/search?q="
pages = []
defs = []
queries = []
error_list = []
def get_ans():
global pages, defs, queries, error_list
print("Getting Answers")
print()
# if single_def == True:
for query in queries:
index = queries.index(query)
print(queries[index] + ': ', end='')
query.strip()
#driver.get(page)
driver.get((google+str("define " + query)))
#for ans in driver.find_elements_by_xpath('/html/body/div[6]/div[2]/div[9]/div[1]/div[2]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/div/span/div/div/div[3]/div/div[4]/div/div/ol/li/div/div'):
try:
# Google displays text with style=display:inline so this searches for that specific style and picks ONLY the first ones as there are often multiple definitions
span = driver.find_element_by_class_name('vmod')
division = span.find_element_by_xpath("//*[@style='display:inline']").text
xdivision = division.split("\n")
print(xdivision[0])
if xdivision[0] is not None and len(xdivision[0]) > 0:
defs.append(xdivision[0])
# Bunch of Failsafes: First it checks the definition tab; if not, it checks the wikipedia page text on the right hand side; if not, it just gets the text description from the first website
except:
try:
try:
# Pretty sure this one's broken because google keeps changing the xpath of the def.
span = driver.find_element_by_xpath("/html/body/div[6]/div[2]/div[9]/div[1]/div[2]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/div/span/div/div/div[3]/div/div[4]/div/div/ol/li/div/div/div[1]/div/div/div[1]/span")
except:
# Text description from first website on Google
span = driver.find_element_by_xpath("/html/body/div[6]/div[2]/div[9]/div[1]/div[2]/div/div[2]/div[2]/div/div/div[1]/div/div[2]/div/span")
if span.text is not None and len(span.text) > 0:
defs.append(span.text)
print(span.text)
except:
#If google fails, it goes to oxford dictionary to find the definition
try:
driver.get((oxford+str(query)))
span = driver.find_element_by_xpath("/html/body/div[1]/div/div[2]/div[1]/div[2]/div/div/div/div/section[1]/ul/li/div/p/span[2]")
if span.text is not None and len(span.text) > 0:
#print('plana')
defs.append(span.text)
print(span.text)
except:
try:
driver.get((google+str("define " + query)))
#--- Fixed in "optimise-wp" branch - changed it from volatile xpath to stable (??) class and tag names. Safeguarded it to prevent outputting blank answers
# This is the one that checks Wikipedia text on the right hand side
wiki_sector = driver.find_element_by_class_name("kp-wholepage")
descs = wiki_sector.find_elements_by_tag_name("span")
for desc in descs:
if len(desc.text)>10 and (desc.text.lower()) != query.lower():
print(desc.text)
defs.append(desc)
except:
# uSeFuL eRrOr mSg LmAo
error_list.append(queries[index])
print()
print("I can't find your word... \n Please make sure that the computer has an active internet connection and retry.\nIf all else fails, call the developer for help if this issue is recurring")
try:
# This part still broken - need to figure out how to fix About Featured Snippets
if ("..." or '' or ' ' or "About Featured Snippets") in defs[-1]:
error_list.append(queries[index])
elif defs[-1] == (None or False):
error_list.append(queries[index])
except:
pass
try:
#Removing 'None' or Null entries
defs.remove('')
except:
pass
defs = list(filter(None, defs))
defs = set(defs)
defs = list(defs)
print()
if len(error_list) > 0:
#print Words that need to be manually searched
print("The following words may need to be manually searched: ")
for word in error_list:
print(word)
error_list.clear()
# if single_def == False:
# for page in pages:
# index = pages.index(page)
# print(queries[index] + ': ', end='')
# driver.get(page)
# for ans in driver.find_elements_by_xpath('/html/body/div[6]/div[2]/div[9]/div[1]/div[2]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/div/span/div/div/div[3]/div/div[4]/div/div/ol/li/div/div'):
#
# abc = ''
# for span in ans.find_elements_by_xpath(".//span"):
# defs.append(span.text)
# print(span.text,end='')
# defs.remove('')
# defs = list(filter(None, defs))
# print()
def main():
loopon = True
while loopon is True:
global queries
words = input("Enter words separated by commas: ")
#Trying to fix user input and removing newliens so that it doesn't break
words = words.replace('\n',',')
words = words.replace('\t',',')
words = words.rstrip("\n")
queries = words.split(',')
#Redundant - Multiple definitions feature:
#n = input("Would you like a single definition? y/n: ")
#if n != 'y' or 'n':
# print("Please retry with either 'y' or 'n' ")
# main()
#gen_pages()
get_ans()
print()
print("Finished!")
print()
exitcon = input("Press any key to go again or type 'exit' or 'quit' to close the program: ")
if exitcon in ('quit', 'exit'):
loopon = False
driver.close()
sys.exit(0)
else:
print("Running Again!")
print()
# Main thing where we call everything
main()
driver.close()
sys.exit(0)