-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathLogistic Regression for Market Trend Forecast of the DJI.R
211 lines (124 loc) · 5.26 KB
/
Logistic Regression for Market Trend Forecast of the DJI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# ===================================================================
# -------------------------------------------------------------------
# Dow Jones Index(DJI) Trend
# From WQU Econometrics: Module 2
# -------------------------------------------------------------------
# ===================================================================
## Is predicted using logistic regression
"
-----------------------------------------------
Algorithm:
-----------------------------------------------
1. DJI data is extracted from Yahoo Finance
2. Different indicators are calculated:
- Moving Average
- Standard Deviation
- Relative Strength Index (RSI)
- MACD
- Bollinger Band
3. Create variable direction:
- Up(1) or down(0) [modelled as a logistic regression so
dependednt variable Y is 1 if the markets are up and 0
if the markets are down]
- Current price > 20 days previous price --> Up Direction
- Current price < 20 days previous price --> Down Direction
4. Data is divided in two parts:
- In-sample data -> Model Building Process
- Out-sample data -> Evaluation
- In-sample and out-sample start and end dates are indicated
5. Data is standardized in order rto avoid the higher scaled
variables which have a higher impact on the results
- Standardized data = (X - mean) / Std(X)
- Mean and SD is calculated for each column
6. Logistic Regression is implemented"
install.packages("quantmod")
library(quantmod)
# ---------------------------------------------
# 1. DJI data is extracted from Yahoo Finance
# ---------------------------------------------
# Get the data from Yahoo! for DJI
getSymbols("^DJI", src = "yahoo")
dow_jones <- DJI[,"DJI.Close"]
# ---------------------------------------------
# 2. Different indicators are calculated
# ---------------------------------------------
# Moving average calculations
## Moving average 10
average_10 <- rollapply(dow_jones, 10, mean)
## Moving average 20
average_20 <- rollapply(dow_jones, 20, mean)
# Standard Deviation calculations
std_10 <- rollapply(dow_jones, 10, sd)
std_20 <- rollapply(dow_jones, 20, sd)
# Relative Strength Index calculations
rsi5 <- RSI(dow_jones, 5, "SMA")
rsi14 <- RSI(dow_jones, 14, "SMA")
# MACD calculations
macd12269 <- MACD(dow_jones, 12, 26, 9, "SMA")
macd7205 <- MACD(dow_jones, 7, 20, 5, "SMA")
# Bollinger Bands calculations
bollinger_bands <- BBands(dow_jones, 20, "SMA", 20)
# ---------------------------------------------
# 3. Create variable direction
# ---------------------------------------------
## No initial direction (initialising the direction)
direction <- NULL
## Up and down direction (compare DJI with its lagged 20 day price)
### replace all up comparisons with a 1, down with a 0
direction[dow_jones > lag(dow_jones, 20)] <- 1
direction[dow_jones < lag(dow_jones, 20)] <- 0
## Bind all the variables into same dataframe
dow_jones <- cbind(dow_jones, average_10, average_20, std_10, std_20,
rsi5, rsi14, macd12269, macd7205, bollinger_bands,
direction)
dimension <- dim(dow_jones)
# ---------------------------------------------
# 4. Divide data into two parts
# ---------------------------------------------
# In-sample dates
is_start_date <- "2010-01-01" #Initial date
is_end_date <- "2014-12-31" #End date
# Out-sample dates
os_start_date <- "2015-01-01"
os_end_date <- "2015-12-1-31"
# Get the index (dates) of rows between the in sample start and end dates
insample_row <- which(index(dow_jones) >= is_start_date & index(dow_jones) <= is_end_date)
outsample_row <- which(index(dow_jones) >= os_start_date & index(dow_jones) <= os_end_date)
# Filter the dji by insample and outsample indices
insample_dji <- dow_jones[insample_row, ]
outsample_dji <- dow_jones[outsample_row, ]
# ---------------------------------------------
# 5. Standardize the data
# ---------------------------------------------
# Mean of the insample data
insample_mean <- apply(insample_dji,2,mean)
# SD of the insample data
insample_std <- apply(insample_dji, 2, sd)
## Create a matrix of 1s from the insample data
## Dimensions are (rxc) so [1] = row and [2] = col
insample_identity <- matrix(1, dim(insample_dji)[1], dim(insample_dji)[2])
# Normalise: z_i = (x_i - mean_i) / (sd_i)
"since:
x = [ ] mean(col vector) = [ ]
[ ] [ ]
[ ] [ ]
1258 x 16 1258 x 1
isme = [ ]
1 x 16
isidn = [ ] t(isidn) = 16 x 1258
[ ] isme * t(isidn) = 1 x 1258
[ ] t(isme*t(isidn)) = 1258 x 1
1258 x 16
"
normalised_insample_dji <- (insample_dji - t(insample_mean*t(insample_identity)) /
t(insample_std*t(insample_identity)))
dm <- dim(insample_dji)
# replace the normalised directions with the old directions
## directions[in the range of the insample dates index]
normalised_insample_dji[, dm[2]] <- direction[insample_row]
# ---------------------------------------------
# 5. Apply Logistic Regression
# ---------------------------------------------
formula <- paste("direction ~ .", sep = "")
model <- glm(direction ~ ., family = "binomial", normalised_insample_dji)
plot(model)