-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
Copy pathdata.js
126 lines (113 loc) · 3.62 KB
/
data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
const Papa = require('papaparse');
// Boston Housing data constants:
const BASE_URL =
'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/';
const TRAIN_FEATURES_FN = 'train-data.csv';
const TRAIN_TARGET_FN = 'train-target.csv';
const TEST_FEATURES_FN = 'test-data.csv';
const TEST_TARGET_FN = 'test-target.csv';
/**
* Given CSV data returns an array of arrays of numbers.
*
* @param {Array<Object>} data Downloaded data.
*
* @returns {Promise.Array<number[]>} Resolves to data with values parsed as floats.
*/
const parseCsv = async (data) => {
return new Promise(resolve => {
data = data.map((row) => {
return Object.keys(row).map(key => parseFloat(row[key]));
});
resolve(data);
});
};
/**
* Downloads and returns the csv.
*
* @param {string} filename Name of file to be loaded.
*
* @returns {Promise.Array<number[]>} Resolves to parsed csv data.
*/
export const loadCsv = async (filename) => {
return new Promise(resolve => {
const url = `${BASE_URL}${filename}`;
console.log(` * Downloading data from: ${url}`);
Papa.parse(url, {
download: true,
header: true,
complete: (results) => {
resolve(parseCsv(results['data']));
}
})
});
};
/** Helper class to handle loading training and test data. */
export class BostonHousingDataset {
constructor() {
// Arrays to hold the data.
this.trainFeatures = null;
this.trainTarget = null;
this.testFeatures = null;
this.testTarget = null;
}
get numFeatures() {
// If numFeatures is accessed before the data is loaded, raise an error.
if (this.trainFeatures == null) {
throw new Error('\'loadData()\' must be called before numFeatures')
}
return this.trainFeatures[0].length;
}
/** Loads training and test data. */
async loadData() {
[this.trainFeatures, this.trainTarget, this.testFeatures, this.testTarget] =
await Promise.all([
loadCsv(TRAIN_FEATURES_FN), loadCsv(TRAIN_TARGET_FN),
loadCsv(TEST_FEATURES_FN), loadCsv(TEST_TARGET_FN)
]);
shuffle(this.trainFeatures, this.trainTarget);
shuffle(this.testFeatures, this.testTarget);
}
}
export const featureDescriptions = [
'Crime rate', 'Land zone size', 'Industrial proportion', 'Next to river',
'Nitric oxide concentration', 'Number of rooms per house', 'Age of housing',
'Distance to commute', 'Distance to highway', 'Tax rate', 'School class size',
'School drop-out rate'
];
/**
* Shuffles data and target (maintaining alignment) using Fisher-Yates
* algorithm.flab
*/
function shuffle(data, target) {
let counter = data.length;
let temp = 0;
let index = 0;
while (counter > 0) {
index = (Math.random() * counter) | 0;
counter--;
// data:
temp = data[counter];
data[counter] = data[index];
data[index] = temp;
// target:
temp = target[counter];
target[counter] = target[index];
target[index] = temp;
}
};