diff --git a/MiniProject3WriteUp.pdf b/MiniProject3WriteUp.pdf
new file mode 100644
index 0000000..aca8543
Binary files /dev/null and b/MiniProject3WriteUp.pdf differ
diff --git a/text_mining.py b/text_mining.py
new file mode 100644
index 0000000..18a7970
--- /dev/null
+++ b/text_mining.py
@@ -0,0 +1,77 @@
+###text_mining.py
+import requests
+bad_links = ["/wiki/Help","/wiki/File","/wiki/Wiki"]
+links = {}
+def find_start(text,start):
+ '''
+ Finds the start of the body and index 'start' of the wikipedia page represented by 'text'
+ >>> find_start('
more
sucess
',10)
+ 15
+ >>> find_start('catmore
sucess
',0)
+ 21
+ '''
+ check = text.find("",start)
+ if check+5 >= len(text):
+ raise ValueError("No appropriate start in string.")
+ elif text[check+3:check+5]==">> find_link('href="/wiki/exploding_kittens"',0)
+ ('/wiki/exploding_kittens', 16)
+ >>> find_link('href="/wiki/nope" href="/wiki/exploding_kittens"',6)
+ ('/wiki/exploding_kittens', 34)
+ >>> find_link('href="googlenope" href="/wiki/exploding_kittens"',6)
+ ('/wiki/exploding_kittens', 34)
+ '''
+ link_start = text.find('href=',start)+6
+ link_end = text.find('"',link_start)
+ first_link = text[link_start:link_end]
+ a = first_link[:5]
+ b = first_link[:10]
+ if a != "/wiki" or b in bad_links: #insures it is a non-file, non-help internal link
+ return find_link(text,link_end)
+ else:
+ return first_link,link_start+10
+
+def crawl(page,depth,width):
+ '''
+ Accepts a starting 'page' and creates a tree of depth 'depth' following the first 'width' Wikipedia article links on each page.
+ Returns a list whose first element is the origin, and each subsequent element is a branch. This pattern is followed recursively for each nested list.
+ >>> print(crawl('/wiki/Turkish_language', 1, 1))
+ /wiki/Turkish_language
+ ### Can't figure out how to make second test work, print statements commented for doctest ease
+ #>>> crawl('/wiki/Turkish_language', 3, 1)
+ ['/wiki/Turkish_language', [['/wiki/Turkic_languages', ['/wiki/Language_family', '/wiki/Native_language']], ['/wiki/Ottoman_Turkish_language', ['/wiki/Register_(sociolinguistics)', '/wiki/Vulgar_Latin']]]]
+ '''
+ next_start = 0
+ if page in links:
+ #print('"'+page+'" was in links')
+ return page
+ else:
+ links[page] = 1
+ if depth <= 1:
+ #print('maximum depth reached')
+ return page
+ text = analyze_page('https://en.wikipedia.org'+page)
+ links_list = []
+ next_link, next_start = find_link(text,find_start(text,next_start))
+ res = []
+ for i in range(0,2):
+ res.append(crawl(next_link,depth-1,width))
+ next_link, next_start = find_link(text,find_start(text,next_start))
+ return([page,res])
+out =crawl('/wiki/Turkish_language', 3, 1)
+print(out)
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod(verbose=True)