-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathindex.js
438 lines (402 loc) · 19.9 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
/**
* TIGER Geocoder
*/
/**
* Module Dependencies
* pg
*/
var pg = require('pg')
, async = require("async")
;
var conString = process.env.HEROKU_POSTGRESQL_BLUE_URL || process.env.POSTGRESQL_URL || "tcp://username:password@localhost/geocoder";
var redis;
if (process.env.REDISCLOUD_URL || process.env.REDISTOGO_URL || process.env.REDIS_URL) {
var redisUrl = require('url').parse(process.env.REDISCLOUD_URL || process.env.REDISTOGO_URL || process.env.REDIS_URL);
redis = require('redis').createClient(redisUrl.port, redisUrl.hostname)
redis.auth(redisUrl.auth.split(":")[1]);
} else {
redis = require('redis').createClient();
}
/**
* PG pool defaults
*/
pg.defaults.poolSize = process.env.PG_POOL_SIZE || 10; //how many connections to keep in the pool (default 10)
pg.defaults.poolIdleTimeout = process.env.PG_POOL_TIMEOUT || 30 //how long to keep an idle conn into the pool (defaults to 30 sec)
/**
* Geocoder
*/
function Geocoder () {}
/**
* Geocoder prototype
*/
Geocoder.prototype = {
/**
* Request geocoordinates of given `location` from PostGIS
*
* @param {String} location, required. any US address,city, state, zipcode
* @param {Function} callback, required
* @param {Object} options, optional
* -> cacheTTL, a time to live in seconds for the redis entry, defaults to 30 days
* -> responseFormat, format the response to match popular providers: google, bing, etc. Defaults to internal JSON format
* -> includegeoid, to include the TIGER unique geoids for cross-referencing with Demographic tables or other external ACS data
* -> limitResults, number to limit the matches returned. defaults to 1
* @api public
*/
geocode: function ( location, options, callback ) {
if ( ! location ) {
return callback( new Error( "Geocoder.geocode requires a location."), null );
}
if (!options) options = {}
options.limitResults = options.limitResults || 1;
options.cacheTTL = options.cacheTTL || 2592000;
var GeocodeResponse = {};
redis.get('geo:' + location, function (err, result) {
if (result) {
result = JSON.parse(result);
if (process.env.development) console.log("Cache hit on: " + location);
return callback(null, result);
}
else {
//geocode it
//use async to handle some magic scenarios here
async.waterfall([
//try to identify if we do an intersection geocoding or go for full address
function(cb) {
//identify cross street requests and try to normalize the components. Generally you see "Main st at Central Ave, New York, NY 02119".
var parsedLoc = location.toLowerCase();
parsedLoc =
location.indexOf(" at ") >= 0 ? location.replace(" at ", " @ ") :
location.indexOf(" & ") >= 0 ? location.replace(" & ", " @ ") :
location.indexOf(" & ") >= 0 ? location.replace(" & ", " @ ") :
location.indexOf(" and ") >= 0 ? location.replace(" and ", " @ "): location
;
if (parsedLoc.indexOf(" @ ") >= 0) {
pg.connect(conString, function (err, client, done) {
if (err) {
return cb(err, null)
}
//use normalize_address to parse out what we can. generally it will come back as street1 and street2, based on this we decide
async.waterfall([
function(cbb){
client.query({
name: 'tiger_parse_address',
text: "SELECT addy.street As street1, addy.street2 As street2, addy.city As city, addy.state As state, addy.zip As zip, addy.country as country " +
"FROM parse_Address($1) As addy",
values: [parsedLoc]
}, function (err, parsedAddress) {
return cbb(err, parsedAddress);
});
},
function(parsedAddress, cbb) {
//if we have enough data, we go for it.
if (!parsedAddress || parsedAddress.rows.length === 0) return cbb(err,null); //no results
var loc = parsedAddress.rows[0];
if (loc.street1 && loc.street2 && loc.state) { //state is mandatory, won't return anything w/o state
//check if street2 has an & in it. badly formatted but we need to clean up such cases and keep the first part before &
if (loc.street2.indexOf(" & ")>= 0){
loc.street2 = loc.street2.substring(0,loc.street2.indexOf(" & "));
}
//check to see if first street has in fact a street number and it's a badly merged address (streetnumber + streetname at street 1 & street2)
var streetParts = loc.street1.split(" ");
if (streetParts.length > 0 && isNumber(streetParts[0])){
//reformat location before passing down to next function
location = loc.street1 + ", " + loc.city + ", " + loc.state + (loc.zip ? " " + loc.zip : '');
cbb(null, null); //allow normal address geocoding
} else {
//go for intersection geocode
client.query({
name: 'tiger_geocode_intersection',
text: "SELECT g.rating, ST_X(g.geomout) As lon, ST_Y(g.geomout) As lat," +
"(addy).streetname As street, " +
"(addy).streettypeabbrev As streettype, (addy).location As city, (addy).stateabbrev As state, (addy).zip As zip, " +
"(pprint_addy(addy)) As normalized_address " +
"FROM geocode_intersection($1, $2, $3, $4, $5, $6) As g ORDER BY (addy).zip ASC",
values: [loc.street1, loc.street2, loc.state || '', loc.city || '', loc.zip || '', (options.limitResults > 2 ? options.limitResults : 2)] //must pass empty string param or else we get no tesults
}, function (err, geocoderResult) {
//massage the normalized display address to reflect the fact its an intersection
if (geocoderResult && geocoderResult.rows.length > 0) {
geocoderResult.rows[0].normalized_address = geocoderResult.rows[0].street + " " + geocoderResult.rows[0].streettype + " @ " + loc.street2.capitalize() + ', ' + geocoderResult.rows[0].city + ", " + geocoderResult.rows[0].state + (geocoderResult.rows[0].zip ? " " + geocoderResult.rows[0].zip : '');
}
return cbb(err, geocoderResult);
});
}
} else {
//malformed intersection w/o state or missing one street. return err to prevent further geocoding
return cbb(new Error("Malformed Address", 400));
}
}
], function(err, geocoderResult){
done(); //disconnect from pg and return the client to the pool to avoid leaking it
//evaluate the result and decide how to continue main flow
return cb(err, geocoderResult);
});
})
} else {
return cb(null, null); //nada, allow normal address geocoding to give it a shot
}
},
function(geocoderResult, cb) {
//if no redis result proceed with geocoding using tiger-geocoder. Here's the trick:
//address normalizers are not perfect, we use both pagc_normalize_address and the PostGIS normalize_address
//PAGC fails some simple parsing when street direction is provided such as 122 S. Main St while PostGIS one succeeds
//hence, we observed that PostGIS one succeeds more often hence we use it first, and in case we don't get a result under rank 20, we will make a second call using PAGC one
if (!geocoderResult || (geocoderResult && (geocoderResult.rows.length == 0 || (geocoderResult.rows.length > 0 && geocoderResult.rows[0].rating >= 20)))) {
pg.connect(conString, function (err, client, done) {
if (err) {
return cb(err, null)
}
client.query({
name: 'tiger_geocode_postgis',
text: "SELECT g.rating, ST_X(g.geomout) As lon, ST_Y(g.geomout) As lat," +
"(addy).address As streetnumber, (addy).streetname As street, " +
"(addy).streettypeabbrev As streettype, (addy).location As city, (addy).stateabbrev As state, (addy).zip As zip, (pprint_addy(addy)) As normalized_address " +
"FROM geocode(normalize_address($1), $2) As g",
values: [location, options.limitResults]
}, function (err, results) {
done(); //disconnect from pg and return the client to the pool
return cb(err, results);
});
})
} else {
return cb(null, geocoderResult);
}
},
function(geocoderResult, cb) {
//PAGC call if needed
if (process.env.PAGC && (!geocoderResult || (geocoderResult && (geocoderResult.rows.length == 0 || (geocoderResult.rows.length > 0 && geocoderResult.rows[0].rating >= 20))))) {
//try PAGC parser
if (process.env.development) console.log("Trying PAGC for address: " + location);
pg.connect(conString, function (err, client, done) {
if (err) {
return cb(err, null)
}
client.query({name: 'tiger_geocode_pagc', text: "SELECT g.rating, ST_X(g.geomout) As lon, ST_Y(g.geomout) As lat," +
"(addy).address As streetnumber, (addy).streetname As street, " +
"(addy).streettypeabbrev As streettype, (addy).location As city, (addy).stateabbrev As state, (addy).zip As zip, (pprint_addy(addy)) As normalized_address " +
"FROM geocode(pagc_normalize_address($1), $2) As g",
values: [location, options.limitResults]},
function (err, results) {
done(); //disconnect from pg and return the client to the pool
//if we had a previous result compare the rating with this one and return the better one (lower)
if (!err &&
(geocoderResult && results.rows.length == 0) ||
(geocoderResult.rows.count > 0 && results.rows.length > 0 && geocoderResult.rows[0].rating > results.rows[0].rating)
) results = geocoderResult;
return cb(err, results)
}
);
})
} else {
return cb(null, geocoderResult);
}
}],
//handle final processing here
function(err, results){
if (err) return callback(err);
//see if we have any result here and parse it
var result = results.rows[0];
if (!result) return callback(null, null); //nada
//hydrate GeocodeResponse
Geocoder.prototype.parseResult(options, result, function(err, GeocodeResponse){
if (err) return callback(err);
redis.set('geo:' + location, JSON.stringify(GeocodeResponse), function(err, msg){
redis.expire('geo:' + location, options.cacheTTL); //if ttl is not provided we expire it in 30 days
callback(null, GeocodeResponse); //no need to wait for redis (maybe it's down?)
});
});
});
} //end redis check callback
})
},
//TODO: implement it based on reverse_geocode function in PostGIS
reverseGeocode: function ( lat, lng, options, callback ) {
if ( !lat || !lng ) {
return callback( new Error( "Geocoder.reverseGeocode requires a latitude and longitude." ), null );
}
if (!options) options = {}
options.limitResults = options.limitResults || 1;
options.cacheTTL = options.cacheTTL || 2592000;
redis.get('geo:' + lat + '-' + lng, function (err, result){
if(result){
Geocoder.prototype.parseResult(options, JSON.parse(result), function(err, GeocodeResponse) {
return callback(err, GeocodeResponse);
});
}
else {
pg.connect(conString, function(err, client, done){
if(err) {return callback( err, null )}
client.query({name:"tiger_reverse_geocode", text: "SELECT (pprint_addy(rg.addy[1])) as normalized_address, $1 as lat, $2 as lon, "+
"rg.addy[1].address As streetnumber, rg.addy[1].streetname As street, "+
"rg.addy[1].streettypeabbrev As styp, rg.addy[1].location As city, rg.addy[1].stateabbrev As state, rg.addy[1].zip "+
"FROM reverse_geocode(ST_SetSRID(ST_Point($2, $1),4326)) rg LIMIT $3",
values:[lat, lng, options.limitResults]}, function(err, results){
done();
if (err) {
return callback(err, results)
}
if (!results || !results.rows) {
return callback(new Error('no rows found'), results)
}
if (results.rows.length == 0) {
return callback(new Error('no rows found'), results)
}
var result = results.rows[0];
//hydrate GeocodeResponse, a structure that follows Google Maps API v3 format
//Geocoder.prototype.parseResult(options, result, GeocodeResponse);
Geocoder.prototype.parseResult(options, result, function(err, GeocodeResponse) {
if (err) return callback(err);
//push to redis, if available
redis.set('geo:' + lat + '-' + lng, JSON.stringify(result), function (err, res) {
redis.expire('geo:' + lat + '-' + lng, options.cacheTTL); //if ttl is not provided we expire it in 30 days
return callback(null, GeocodeResponse);
});
})
})
})
}
});
},
parseResult: function (options, result, cb){
var callback = {};
var format = options.responseFormat || '';
switch (format.toLowerCase()){
case 'google':
callback.result = {
'accuracy': result.rating, //accuracy as provided by PostGIS rating result. lower more accurate. from 1 to 100.
'formatted_address':result.normalized_address,
'geometry':{
'location': {
'lat': result.lat,
'lon': result.lon
}
},
'address_component':[]
};
//test for address parts and push them into the result
if (result.streetnumber){
if (!callback.result.types) callback.result.types = ['street_address'],
callback.result.address_component.push({
'type':['street_number'],
'long_name':result.streetnumber,
'short_name':result.streetnumber
})
}
if (result.street){
if (!callback.result.types) callback.result.types = ['route'],
callback.result.address_component.push({
'type':['route'],
'long_name':result.street,
'short_name':result.street
})
}
if (result.city){
if (!callback.result.types) callback.result.types = ['locality'],
callback.result.address_component.push({
'type':['locality'],
'long_name':result.city,
'short_name':result.city
});
}
if (result.zip){
if (!callback.result.types) callback.result.types = ['postal_code'],
callback.result.address_component.push({
'type':['postal_code'],
'long_name':result.zip,
'short_name':result.zip
});
}
if (result.state){
if (!callback.result.types) callback.result.types = ['administrative_area_level_1'],
callback.result.address_component.push({
'type':['administrative_area_level_1'],
//'long_name':,
'short_name':result.state
});
}
break;
default:
callback.result = {
'accuracy': result.rating, //accuracy as provided by PostGIS rating result. lower more accurate. from 1 to 100.
'formatted_address': result.normalized_address,
'location': {
'lat': result.lat,
'lon': result.lon
}};
if (result.streetnumber){
callback.result.streetNumber = result.streetnumber;
}
if (result.street){
callback.result.street = result.street;
}
if (result.streettype){
callback.result.streetType = result.streettype;
}
if (result.city){
callback.result.city = result.city;
}
if (result.state){
callback.result.state = result.state;
}
if (result.zip){
callback.result.zipcode = result.zip;
}
}
//attach GeoIds if user user requested it options.includegeoid
if (options.includegeoid){
Geocoder.prototype.attachGeoIds (callback, function(err, result){
cb(null, result); //assign to original one to override and return it
});
}
else
cb(null, callback);
},
//attaches TIGER specific unique IDs to help cross-referencing external data in Demographic / Economic tables. Also includes Zillow neighborhoods (if loaded).
attachGeoIds: function (GeocodeResponse, callback){
pg.connect(conString, function(err, client, done){
if(err) {return callback( err, null )}
//select get_geoids(ST_GeomFromText('POINT(-121.93830710000000295 37.272289700000001744 )', 4269), normalize_address('2731 montavo pl, Campbell, ca, 95008'))
client.query({name:"tiger_get_geoids", text: "SELECT * FROM get_geoids(ST_SetSRID(ST_Point($2, $1),4326), $3, $4, $5 ) addy_ex",
values:[GeocodeResponse.result.location.lat, GeocodeResponse.result.location.lon, GeocodeResponse.result.city, GeocodeResponse.result.state, GeocodeResponse.result.zipcode]}, function(err, results) {
done(); //disconnect from pg and return the client to the pool
if (err) {
return callback(err)
}
if (!results || !results.rows) {
return callback(null, null)
}
if (results && results.rows && results.rows.length > 0) {
var result = results.rows[0];
if (result.locationid) GeocodeResponse.result.cityId = result.locationid;
if (result.stateid) GeocodeResponse.result.stateId = result.stateid;
if (result.neighborhoodid) {
GeocodeResponse.result.neighborhoodId = result.neighborhoodid;
GeocodeResponse.result.neighborhood = result.neighborhood;
}
if (result.tractid) {
GeocodeResponse.result.tractId = result.tractid;
GeocodeResponse.result.tract = result.tract;
}
if (result.countyid) {
GeocodeResponse.result.countyId = result.countyid;
GeocodeResponse.result.county = result.county;
}
if (result.metroid) {
GeocodeResponse.result.metroId = result.metroid;
GeocodeResponse.result.metro = result.metro;
}
}
callback(null, GeocodeResponse);
})
});
}
}
/**
* Export
*/
module.exports = new Geocoder();
String.prototype.capitalize = function() {
return this.charAt(0).toUpperCase() + this.slice(1);
};
function isNumber(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
};