-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
80 lines (61 loc) · 1.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from data_diff import connect_to_table, diff_tables
from dotenv import load_dotenv
from google.oauth2.service_account import Credentials
from pathlib import Path
import logging
import os
import time
if Path(".env").is_file():
load_dotenv()
else:
raise Exception(".env file does not exist")
GOOGLE_KEY_PATH = "gcloud_key_tzanakis-bigquery.json"
MYSQL_TABLE = "reservations_bookings"
BQ_DATASET = "user_gtzanakis"
ID_MIN = 2000000
ID_MAX = 2103353
logging.basicConfig(level=logging.INFO)
# MySQL
# Alternatively use uri:
# db_info = f"mysql://{user}:{password}@{hostname}/{database}"
db_info_mysql = {
"driver": "mysql",
"user": os.environ["MYSQL_USER"],
"password": os.environ["MYSQL_PASSWORD"],
"database": os.environ["MYSQL_DATABASE"],
"host": os.environ["MYSQL_HOST"],
}
table_mysql_bookings = connect_to_table(
db_info_mysql,
table_name="reservations_bookings",
key_column="id",
min_key=ID_MIN,
max_key=ID_MAX,
where="bookingState='BOOKED'",
)
# BigQuery
credentials_gcloud = Credentials.from_service_account_file(
filename=GOOGLE_KEY_PATH, scopes=["https://www.googleapis.com/auth/bigquery"]
)
db_info_bigquery = {
"driver": "bigquery",
"location": "eu",
"credentials": credentials_gcloud,
"project": os.environ["BQ_PROJECT_ID"],
"dataset": BQ_DATASET,
}
table_bigquery = connect_to_table(
db_info_bigquery,
table_name="booking_ids",
key_column="id",
min_key=ID_MIN,
max_key=ID_MAX,
)
def main():
start = time.time()
for different_row in diff_tables(table_mysql_bookings, table_bigquery):
plus_or_minus, columns = different_row
print(plus_or_minus, columns)
print(f"Elapsed seconds: {time.time() - start}")
if __name__ == "__main__":
main()