-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapeDeep.js
88 lines (76 loc) · 2.43 KB
/
scrapeDeep.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
const apiUrl = require('./apiUrl');
const cheerio = require('cheerio');
const {
get,
} = require('https');
const iconvLite = require('iconv-lite');
module.exports = (elem, verbose) => new Promise((resolve, reject) => get(
`${apiUrl}/${elem.find('a').attr('href')}`,
(res) => {
if (verbose) {
console.log(`Deep-scraping ${apiUrl}/${elem.find('a').attr('href')}...`);
}
let doc = '';
res.on('error', reject);
res.on('data', (data) => doc += iconvLite.decode(data, 'ISO-8859-1'));
res.on('close', () => {
const $ = cheerio.load(doc);
const downloadElems = $('.downloaditem');
const downloads = [];
downloadElems.each((index, _dli) => {
const dli = $(_dli);
downloads.push({
href: $(dli.find('a').get(0)).attr('href'),
note: dli.find('.dlnotes').text(),
type: $(dli.find('a').get(0)).text(),
});
});
const detailsElems = $('.indented .notes');
if (!detailsElems.length) {
return reject(`No details pane. Cannot deep-parse entry at ${apiUrl}/${elem.find('a').attr('href')}`);
}
const starRatingContainer = $('td > p + p img');
const ratingAttr = starRatingContainer.attr('title');
const approximateRating = ratingAttr ? Number(
/\.|½/.test(ratingAttr[1]) ? `${ratingAttr[0]}.5` : ratingAttr[0]
) : null;
const totalRatings = Number(
$('a[title^="View all ratings and reviews"]')
.text()
.split(' ')[0]
);
const totalReviews = Number($('a[href="#memberReviews"]').text().split(' ')[0]);
let ifid;
let firstPublicationDate;
let language;
let system;
const details = detailsElems
.text()
.split(/\s\s/)
.filter(Boolean)
.map((aa) => aa.trim());
details.forEach((line) => {
if (/^Development system: /i.test(line)) {
system = line.slice(20);
} else if (/^First Publication Date: /i.test(line)) {
firstPublicationDate = line.slice(24);
} else if (/^Language: /i.test(line)) {
language = line.slice(10);
} else if (/^IFID: /i.test(line)) {
ifid = line.slice(6);
}
});
const ret = {
approximateRating,
downloads,
firstPublicationDate,
ifid,
language,
system,
totalRatings,
totalReviews,
};
return resolve(ret);
});
},
));