-
Notifications
You must be signed in to change notification settings - Fork 0
/
sql_queries.py
145 lines (124 loc) · 3.7 KB
/
sql_queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# DROP TABLES
dim_airport_drop = "DROP TABLE IF EXISTS dim_airport"
dim_country_drop = "DROP TABLE IF EXISTS dim_us_country"
dim_city_drop = "DROP TABLE IF EXISTS dim_us_city"
dim_weather_drop = "DROP TABLE IF EXISTS dim_us_weather"
fact_imm_drop = "DROP TABLE IF EXISTS immigration_us"
# CREATE TABLES
dim_airport_create = """
CREATE TABLE IF NOT EXISTS public.dim_airport
(
id_airport text COLLATE pg_catalog."default" PRIMARY KEY,
type text COLLATE pg_catalog."default",
name text COLLATE pg_catalog."default",
country text COLLATE pg_catalog."default",
state text COLLATE pg_catalog."default",
city text COLLATE pg_catalog."default"
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.dim_airport
OWNER to student;
"""
dim_country_create = """
CREATE TABLE IF NOT EXISTS public.dim_country
(
id_country text COLLATE pg_catalog."default" PRIMARY KEY,
country text COLLATE pg_catalog."default"
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.dim_country
OWNER to student;
"""
dim_us_city_create = """
CREATE TABLE public.dim_us_city
(
id_city SERIAL PRIMARY KEY,
city text COLLATE pg_catalog."default",
state text COLLATE pg_catalog."default",
male_population text COLLATE pg_catalog."default" NOT NULL,
female_population text COLLATE pg_catalog."default" NOT NULL,
total_population text COLLATE pg_catalog."default",
state_prefix text COLLATE pg_catalog."default"
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.dim_us_city
OWNER to student;
"""
dim_weather_create = """
CREATE TABLE public.dim_us_weather
(
id_weather SERIAL PRIMARY KEY,
datetime date,
city text COLLATE pg_catalog."default",
temp text COLLATE pg_catalog."default",
state text COLLATE pg_catalog."default"
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.dim_us_weather
OWNER to student;
"""
fact_imm_create = """
CREATE TABLE public.immigration_us
(
id integer PRIMARY KEY,
year integer,
month integer,
citizen integer,
resident integer,
port_entry text COLLATE pg_catalog."default",
mode_entry integer,
arrival_date date,
dep_date date,
dateadd_to date,
state_addr text COLLATE pg_catalog."default",
birth_year integer,
age integer,
gender text COLLATE pg_catalog."default",
visa_code integer,
visa_type text COLLATE pg_catalog."default",
airline text COLLATE pg_catalog."default"
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.immigration_us
OWNER to student;
"""
# DATA QUALITY QUERIES
select_imm_count = """
SELECT COUNT(1) FROM IMMIGRATION_US;
"""
select_arrival_date = """
SELECT
SUM(CASE WHEN ARRIVAL_DATE IS NULL THEN 1 ELSE 0 END) NULL_VALUES,
SUM(CASE WHEN ARRIVAL_DATE IS NOT NULL THEN 1 ELSE 0 END) NOT_NULL_VALUES
FROM IMMIGRATION_US;
"""
select_year_imm = """
SELECT DISTINCT YEAR
FROM IMMIGRATION_US
ORDER BY YEAR;
"""
select_year_weather = """
SELECT DISTINCT CAST(EXTRACT(YEAR FROM datetime) AS INTEGER) AS YEAR
FROM DIM_US_WEATHER
ORDER BY YEAR;
"""
# query list
drop_table_queries = [dim_airport_drop, dim_country_drop,
dim_city_drop, dim_weather_drop, fact_imm_drop]
create_table_queries = [dim_airport_create, dim_country_create,
dim_us_city_create, dim_weather_create, fact_imm_create]