|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +#Secure Planet LLC |
| 4 | + |
| 5 | +import urllib2 |
| 6 | +import re |
| 7 | +import re |
| 8 | +import os, sys |
| 9 | + |
| 10 | +from collections import OrderedDict |
| 11 | + |
| 12 | +banner_file = "brutescrape.banner" |
| 13 | +def banner(): |
| 14 | + global banner_file |
| 15 | + open_banner = open(banner_file, "r") |
| 16 | + for line in open_banner: |
| 17 | + print line.rstrip() |
| 18 | + open_banner.close() |
| 19 | + |
| 20 | +def stripHTMLTags (html): |
| 21 | +#Strip HTML tags from any string and transfrom special entities. |
| 22 | + text = html |
| 23 | + |
| 24 | +#Apply rules in given order. |
| 25 | + rules = [ |
| 26 | + { r'>\s+' : u'>'}, # Remove spaces after a tag opens or closes. |
| 27 | + { r'\s+' : u' '}, # Replace consecutive spaces. |
| 28 | + { r'\s*<br\s*/?>\s*' : u'\n'}, # Newline after a <br>. |
| 29 | + { r'</(div)\s*>\s*' : u'\n'}, # Newline after </p> and </div> and <h1/>. |
| 30 | + { r'</(p|h\d)\s*>\s*' : u'\n\n'}, # Newline after </p> and </div> and <h1/>. |
| 31 | + { r'<head>.*<\s*(/head|body)[^>]*>' : u'' }, # Remove <head> to </head>. |
| 32 | + { r'<a\s+href="([^"]+)"[^>]*>.*</a>' : r'\1' }, # Show links instead of texts. |
| 33 | + { r'[ \t]*<[^<]*?/?>' : u'' }, # Remove remaining tags. |
| 34 | + { r'^\s+' : u'' } # Remove spaces at the beginning. |
| 35 | + ] |
| 36 | + |
| 37 | + for rule in rules: |
| 38 | + for (k,v) in rule.items(): |
| 39 | + try: |
| 40 | + regex = re.compile (k) |
| 41 | + text = regex.sub (v, text) |
| 42 | + except: |
| 43 | + pass #Pass up whatever we don't find. |
| 44 | + |
| 45 | + #Replace special strings. |
| 46 | + special = { |
| 47 | + ' ' : ' ', '&' : '&', '"' : '"', |
| 48 | + '<' : '<', '>' : '>' |
| 49 | + } |
| 50 | + |
| 51 | + for (k,v) in special.items(): |
| 52 | + text = text.replace (k, v) |
| 53 | + |
| 54 | + return text |
| 55 | + |
| 56 | +banner() |
| 57 | +#Create an empty list for generation logic. |
| 58 | +y_arr = [] |
| 59 | + |
| 60 | +try: |
| 61 | + file_list = open('sites.scrape','r') |
| 62 | + sites = file_list.read().split(',') |
| 63 | + |
| 64 | +except: |
| 65 | + banner() |
| 66 | + sys.exit() |
| 67 | + |
| 68 | +for site in sites: |
| 69 | + try: |
| 70 | + site = site.strip() |
| 71 | + print "[*] Downloading Content For : " + site |
| 72 | + x_arr = [] |
| 73 | + response = urllib2.urlopen(site) |
| 74 | + x = stripHTMLTags(response.read()) |
| 75 | + #Replace junk found in our response |
| 76 | + x = x.replace('\n',' ') |
| 77 | + x = x.replace(',',' ') |
| 78 | + x = x.replace('.',' ') |
| 79 | + x = x.replace('/',' ') |
| 80 | + x = re.sub('[^A-Za-z0-9]+', ' ', x) |
| 81 | + x_arr = x.split(' ') |
| 82 | + for y in x_arr: |
| 83 | + y = y.strip() |
| 84 | + if y and (len(y) > 4): |
| 85 | + if ((y[0] == '2') and (y[1] == 'F')) or ((y[0] == '2') and (y[1] == '3')) or ((y[0] == '3') and (y[1] == 'F')) or ((y[0] == '3') and (y[1] == 'D')): |
| 86 | + y = y[2:] |
| 87 | + y_arr.append(y) |
| 88 | + except: |
| 89 | + pass |
| 90 | + |
| 91 | +y_arr_unique = OrderedDict.fromkeys(y_arr).keys() |
| 92 | +print "[*] Processing List" |
| 93 | +f_write = open("passwordList.txt","w") |
| 94 | +for yy in y_arr_unique: |
| 95 | + if yy.strip().isdigit(): |
| 96 | + pass |
| 97 | + else: |
| 98 | + #print yy.strip() |
| 99 | + f_write.write(yy.strip() + "\n") |
| 100 | +f_write.close() |
| 101 | +print "[*] Wordlist Generation Complete." |
| 102 | +print "[*] Output Located: passwordList.txt" |
| 103 | +print "[*] Total Count of Passwords >> " + str(len(y_arr_unique)) |
| 104 | + |
0 commit comments