-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBA4318-Homework5.py
85 lines (66 loc) · 2.77 KB
/
BA4318-Homework5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
from pandas.plotting import autocorrelation_plot, lag_plot
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import adfuller
df_brazil = pd.read_csv("sudeste.csv", usecols=["date", "temp"])
df_madrid = pd.read_csv("weather_madrid_LEMD_1997_2015.csv", usecols=["CET", "Mean TemperatureC"])
def prepare_brazil(df):
temp = df.groupby("date").mean().reset_index()
date_series = temp["date"]
temp_series = temp["temp"]
temp_series.index = pd.DatetimeIndex(date_series)
start_date, end_date = date_series.head(1).values[0], date_series.tail(1).values[0]
date_indx = pd.date_range(start_date, end_date)
result = temp_series.reindex(date_indx, fill_value=0)
return result
def prepare_madrid(df):
temp = df
date_series = temp["CET"]
temp_series = temp["Mean TemperatureC"]
temp_series.index = pd.DatetimeIndex(date_series)
start_date, end_date = date_series.head(1).values[0], date_series.tail(1).values[0]
date_indx = pd.date_range(start_date, end_date)
result = temp_series.reindex(date_indx, fill_value=0)
return result
brazil,madrid = prepare_brazil(df_brazil), prepare_madrid(df_madrid)
plt.plot(brazil)
plt.title("brazil")
plt.show()
plt.plot(madrid)
plt.title("madrid")
plt.show()
print("adfuller test of brazil")
print(adfuller(brazil.dropna()))
print("adfuller test of madrid")
print(adfuller(madrid.dropna()))
plt.hist(brazil.dropna())
plt.title("histogram of brazil")
plt.show()
plt.hist(madrid.dropna())
plt.title("histogram of madrid")
plt.show()
X = madrid.dropna().values
low, high = X[:len(X)//2], X[len(X)//2:]
print ("Madrid's low mean is ", low.mean(), "and high mean is", high.mean())
print ("Madrid's low variation is ", low.var(), "and high variation is", high.var())
X = brazil.values
low, high = X[:len(X)//2], X[len(X)//2:]
print ("Brazil's low mean is", low.mean(), "and high mean is", high.mean())
print ("Brazil's low variation is", (low.var()), "and high variation is", (high.var()))
lag_plot(brazil)
plt.title("Lag plot of Brazil")
plt.show()
lag_plot(madrid)
plt.title("Lag plot of Madrid")
plt.show()
autocorrelation_plot(brazil)
plt.title("Autocorrelation of Brazil")
plt.show()
autocorrelation_plot(madrid)
plt.title("Autocorrelation of Madrid")
plt.show()
#We can conclude that there is no increasing trends in temperature datasets
#We observed that Brazil's and Madrid's temperature datasets are both stationary regarding as autocorrelation,plots,lag and histogram.
#Also, according to Augmented Dickey-Fuller test , we reject the null hypothesis in Brazil and Madrid since p-values are smaller than 0.05.
#As a result,we can say that there is global warming both in Madrid and Brazil
#MELİH SARI-2220895