-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPLS1_TestScript2.py
207 lines (141 loc) · 7.14 KB
/
PLS1_TestScript2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 12 18:39:07 2020
@author: thugwithyoyo
"""
import numpy as np
import PartialLeastSquares as pls
import hwfunclib as HFL
import matplotlib.pyplot as plt
# Set parameters for PLS
nLatents = 3
############################## Set parameters #################################
# Set amplitude of noise to be added to seed predictors
epsilon = 0.5
# Set the total number of predictor-outcome (choice) pairs to be generated
nChoices = 300
# Specify the number of unique predictor types on which to test the classifier
nSeeds = 5
# Specify the type of underlying signal of seed vectors to which noise will be
# added to construct predictors
#SignalType = 'sine'
#SignalType = 'gaussian'
SignalType = 'multishape'
# Set the domain boundaries and sampling rate of the time window corresponding
# to predictor vectors.
t_window = [-1., 1.]
nTimeSamples = 100
# Specify the fraction of total predictors to be used to train the model. The
# remaining predictors will be used to test performance of the model.
TrainingFrac = 0.8
############# end set parameters ###########
# Generate the vector of time values to correpond with predictor vectors
t_vec = np.linspace(t_window[0], t_window[1], num=nTimeSamples, endpoint=False)
############ Parameters for sine wave predictor seed functions ################
# Build array of amplitudes
AmpVec = np.ones((nSeeds,), dtype=float)
#AmpVec = np.arange(1, nSeeds + 1, 1, dtype=float)
# Build array of frequencies
FreqVec = np.ones((nSeeds,), dtype=float)
#FreqVec = np.arange(1, nSeeds + 1, 1)
# Build array of phases
#PhaseVec = np.zeros((nSeeds,), dtype=float)
PhaseVec = (np.pi/4.)*np.arange(0, nSeeds, 1)
############## Parameters for Gaussian predictor seed vectors. ################
# Build a shifting mean vector
CombSpacing = np.floor(nTimeSamples/nSeeds)
Comb = np.arange(np.round(CombSpacing/2.), nTimeSamples, CombSpacing, dtype=int)
MeanVec = t_vec[Comb]
# Generate uniform-valued vector of standard deviations
StdDevVec = (1./3.)*np.ones((nSeeds,), dtype=float)
#################### End parameters for seed functions ########################
# Build array of model target values that are symmetric about zero.
if np.mod(nSeeds, 2.) == 1.:
TargetValues = np.arange(-np.floor(nSeeds/2.), np.ceil(nSeeds/2.), 1)
if np.mod(nSeeds, 2.) == 0.:
TargetValues = np.arange(-np.floor(nSeeds/2.), np.ceil(nSeeds/2.), 1) + 0.5
# Randomly sample, with replacement, from the list of target types n=nChoices
# times to generate the list of desired outcomes to be used to train the model.
# Each outcome will correspond to an individual seed vector.
ChoiceVec = np.random.choice(TargetValues, size=(nChoices,))
# Initialize array to contain the seed vectors
SeedVecs = np.empty((nSeeds, t_vec.size))
# Assemble array of seed vectors
for i in np.arange(0, nSeeds):
if SignalType == 'sine':
SeedVecs[i,:] = AmpVec[i]*np.sin(2.*np.pi*FreqVec[i]*t_vec + PhaseVec[i])
if SignalType == 'gaussian':
SeedVecs[i, :] = AmpVec[i]*np.exp(-(1./2.)*((t_vec - MeanVec[i])/(StdDevVec[i]))**2)
if SignalType == 'multishape':
FourierSeries = np.zeros_like(t_vec)
for n in np.arange(1, 2*i + 2, 2):
FourierSeries = FourierSeries + (4./np.pi)*(1./n)*np.sin(2*n*np.pi*FreqVec[i]*t_vec)
SeedVecs[i, :] = AmpVec[i]*FourierSeries
# Initialize array to contain predictor vectors
X = np.empty((nChoices, t_vec.size))
# Generate list of indexes to locate target values
NumTargetValues = TargetValues.size
TargValIndices = np.arange(0, NumTargetValues)
# Populate the array of predictor vectors; each is the seed vector, with added
# noise, that corresponds to the particular choice for each iteration.
for i in np.arange(0, ChoiceVec.size):
j = TargValIndices[TargetValues == ChoiceVec[i]]
X[i, :] = SeedVecs[j, :] + epsilon*(np.random.rand(t_vec.size) - 0.5)
# Construct slice definitions to extract training and testing sets of predictors
TrainingLimitIndex = int(np.round(TrainingFrac*nChoices))
TrainingSubset = slice(0, TrainingLimitIndex, 1)
# Run partial least squares classification using the training subset
B, B_0, _ = pls.PLS1(X[TrainingSubset, :], ChoiceVec[TrainingSubset], nLatents)
# Extract the testing subset of predictors. Run testing subset in trained model.
TestingSubset = slice(TrainingLimitIndex + 1, -1)
Predicted = X[TestingSubset, :] @ B + B_0
# Calculate Euclidean distance
EuclideanDiffVals = np.empty_like(TargetValues)
# Initialize the confusion matrix
ConfusionMatrix = np.zeros((NumTargetValues, NumTargetValues))
# Initialize the array to contain model predictions
PredictedTargets = np.empty_like(ChoiceVec[TestingSubset])
# Initialize the array to contain actual corresponding targets
TrueTargets = np.empty_like(ChoiceVec[TestingSubset])
# For each trail of the test set predict target type by selecting the closest
# value to model output. Compare with correct target list. Construct
# confusion matrix.
for i in np.arange(0, ChoiceVec[TestingSubset].size):
for k in TargValIndices:
EuclideanDiffVals[k] = HFL.EuclideanDistanceCalculator(Predicted[i],
TargetValues[k])
# Acquire index of closest event. This is the model's prediction.
PredictedEventIndex = np.argmin(EuclideanDiffVals)
# Obtain the value of the actual target.
TrueEventIndex = TargValIndices[TargetValues == ChoiceVec[TestingSubset][i]][0]
# Increment the cooresponding element of the confusion matrix (e.g. correct
# prediction (on diagonal), or incorrect prediction (off diagonal))
ConfusionMatrix[TrueEventIndex, PredictedEventIndex] += 1
# Record the prediction.
PredictedTargets[i] = TargetValues[PredictedEventIndex]
# Report the actual corresponding target for the current predictor
TrueTargets[i] = TargetValues[TrueEventIndex]
# Plot output predictor vectors in subplots of each target type. Grey traces
# show predictors that were predicted correctly. Red show predictors that
# were supposed to be classified in that set, but were misclassified by
# the model.
fig, axs = plt.subplots(nrows=nSeeds, ncols=2)
# Iterate through list of target types. For each type, plot correctly-classified
# traces in gray. Plot incorrectly classified traces in red.
for k in TargValIndices:
PredictedTargValFilt = (PredictedTargets == TargetValues[k])
TrueTargetsFilt = (TrueTargets == TargetValues[k])
CorrectFilt = (TrueTargetsFilt & PredictedTargValFilt)
IncorrectFilt = (TrueTargetsFilt & ~PredictedTargValFilt)
if np.sum(CorrectFilt) > 0:
axs[k,0].plot(t_vec, X[TestingSubset, :][CorrectFilt, :].transpose(),
color='gray')
if np.sum(IncorrectFilt) > 0:
axs[k,1].plot(t_vec, X[TestingSubset, :][IncorrectFilt, :].transpose(),
color='red')
if k != TargValIndices[-1]:
axs[k,0].set_xticklabels([])
axs[k,1].set_xticklabels([])
#nIncorrects = np.sum(np.abs(np.squeeze(np.round(Predicted)) - ChoiceVec))
#nIncorrects = np.sum(np.squeeze(np.round(Predicted)) != ChoiceVec[TestingSubset])