-
Notifications
You must be signed in to change notification settings - Fork 0
/
no4_order_data.py
46 lines (33 loc) · 1.53 KB
/
no4_order_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# import libraries
from google.cloud import bigquery
# create a client object
client = bigquery.Client()
# Construct a reference to the "nhtsa_traffic_fatalities" dataset
dataset_ref = client.dataset("nhtsa_traffic_fatalities", project="bigquery-public-data")
# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)
# construct a reference to the "accident_2015" table
table_ref = dataset_ref.table("accident_2015")
# API request - fetch the table
table = client.get_table(table_ref)
# Print information on all the columns in the "accident_2015" table in the "nhtsa_traffic_fatalities" dataset
table.schema
# Print information on all the columns in the "accident_2015" table in the "nhtsa_traffic_fatalities" dataset
client.list_rows(table, max_results = 5).to_dataframe()
# query
# Query to find out the number of accidents for each day of the week
query = """
SELECT COUNT(consecutive_number) AS num_accidents,
EXTRACT(DAYOFWEEK FROM timestamp_of_crash) AS day_of_week
FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2015`
GROUP BY day_of_week
ORDER BY num_accidents DESC
"""
# Set up the query (cancel the query if it would use too much of
# your quota, with the limit set to 1 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**9)
query_job = client.query(query, job_config=safe_config)
# API request - run the query, and convert the results to a pandas DataFrame
accidents_by_day = query_job.to_dataframe()
# Print the DataFrame
accidents_by_day