-
Notifications
You must be signed in to change notification settings - Fork 85
/
Copy pathaws_to_db.py
149 lines (132 loc) · 4.97 KB
/
aws_to_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import sqlite3
import requests
import json
import os
import random
import sys
import pandas as pd
from glob import glob
import sys
import datetime
import ast
import tldextract
import re
from urllib.parse import urlsplit
currentDateTime = datetime.datetime.now()
connection = sqlite3.connect('aws.db')
cursor = connection.cursor()
fname = sys.argv[1]
print(fname)
cursor.execute('CREATE TABLE IF NOT EXISTS "AWS" \
( "Id" INTEGER PRIMARY KEY AUTOINCREMENT,\
"host" TEXT ,\
"subject" TEXT ,\
"issuer" TEXT,\
"subject_CN" TEXT,\
"subject_alt_name" TEXT,\
"domain_name" TEXT)')
index_check_query = "SELECT * FROM sqlite_master WHERE name = 'host_index' and type = 'index'"
index_exists = cursor.execute(index_check_query).fetchone()
if not index_exists:
cursor.execute("CREATE INDEX host_index ON AWS(host)")
else:
print("Index host_index already exists, proceeding with further workflow")
index_check_query = "SELECT * FROM sqlite_master WHERE name = 'domain_name_index' and type = 'index'"
index_exists = cursor.execute(index_check_query).fetchone()
if not index_exists:
cursor.execute("CREATE INDEX domain_name_index ON AWS(domain_name)")
else:
print("Index domain_name_index already exists, proceeding with further workflow")
index_check_query = "SELECT * FROM sqlite_master WHERE name = 'issuer_index' and type = 'index'"
index_exists = cursor.execute(index_check_query).fetchone()
if not index_exists:
cursor.execute("CREATE INDEX issuer_index ON AWS(issuer)")
else:
print("Index issuer_index already exists, proceeding with further workflow")
# Create the "SubjectAltName" table
cursor.execute('''
CREATE TABLE IF NOT EXISTS "SubjectAltName" (
"Id" INTEGER PRIMARY KEY AUTOINCREMENT,
"subdomain" TEXT ,
"aws_id" INTEGER,
FOREIGN KEY (aws_id) REFERENCES AWS(Id)
)''')
index_check_query = "SELECT * FROM sqlite_master WHERE name = 'idx_subdomain' and type = 'index'"
index_exists = cursor.execute(index_check_query).fetchone()
if not index_exists:
cursor.execute('CREATE UNIQUE INDEX idx_subdomain ON SubjectAltName (subdomain);')
else:
print("Index idx_subdomain already exists, proceeding with further workflow")
cnt = 0
err = 0
# Extract domain name from subdomain
def extract_domain(string):
pattern = r"(?:CN=|DNS:)?([\w|\*|\.]+)"
match = re.search(pattern, string)
if match:
domain = match.group(1)
else:
domain = None
return domain
def extract_root_domain(subdomain):
url = 'http://' + subdomain
url_parts = urlsplit(url)
domain = url_parts.hostname
return domain
def extract_domain_name(subdomain):
pattern = r"(?:[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}"
match = re.search(pattern, subdomain)
if match:
extracted = tldextract.extract(subdomain)
domain_name = extracted.domain + '.' + extracted.suffix
else:
domain_name = ""
return domain_name
def extract_subdomains(string):
if string is None:
return []
subdomains = string.replace("DNS:", "")
subdomains = subdomains.split(', ')
subdomains = [sub.replace('*.','') for sub in subdomains]
subdomains = list(set(subdomains))
return subdomains
def extract_organization(string):
pattern = r"O=([\w|\s|,|\.]+);"
match = re.search(pattern, string)
if match:
organization = match.group(1)
else:
organization = None
return organization
with open(fname, errors='ignore') as user_file:
for jsonObj in user_file:
try:
something = json.loads(jsonObj)
for k,v in something.items():
if k=="host":
h = v
if k=="certificateChain":
cc = v[0]
sub = cc.get("subject")
issu = extract_organization(cc.get("issuer"))
subcn = cc.get("subjectCN")
subaltnm = cc.get("subjectAltName")
domain_name = extract_domain_name(extract_root_domain(str(subcn)))
cnt+=1
cursor.execute("INSERT INTO AWS (host,subject,issuer,subject_CN, domain_name) values (?,?,?,?,?)", (h,sub,issu,subcn, domain_name))
subdomains = extract_subdomains(subaltnm)
for subdomain in subdomains:
try:
cursor.execute("INSERT INTO SubjectAltName (aws_id,subdomain) values (?,?)", (cnt,subdomain))
except sqlite3.IntegrityError:
pass
except Exception as e:
print('error',e)
print('domain name: ', domain_name)
print('Host:', h)
print('##############################################################')
err+=1
print('Error count: ', err)
connection.commit()
cursor.close()
connection.close()