forked from noracast/podcastfreaks.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprebuild.js
241 lines (210 loc) · 6.86 KB
/
prebuild.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
"use strict";
import _ from 'lodash'
import consola from 'consola'
import 'date-utils'
import fetchTwitter from './scripts/fetch-twitter'
import fileExtension from 'file-extension'
import fs from 'fs'
import moment from 'moment'
import nodeCleanup from 'node-cleanup'
import PFUtil from './scripts/pf-util'
import rss from './data/rss.json'
import serializeError from 'serialize-error'
import shell from 'shelljs'
import { sleep } from 'sleep'
import wget from 'node-wget-promise'
import xml2js from 'xml2js'
import { promisify } from 'util'
import {
RFC822,
DOWNLOADS_DIR,
RSS_DIR,
COVER_DIR,
BUILD_INFO
} from './scripts/constants'
// ----------------
// Detect arguments
const args = process.argv.slice() // copy
args.splice(0, 2) // remove not 'arg' values
// CLI arguments list
const NO_TWITTER = args.includes('--no-twitter') // to cancel twitter data fetching
// ----------------
const util = new PFUtil()
const readFile = promisify(fs.readFile)
const xmlToJSON = promisify((new xml2js.Parser({explicitArray: false})).parseString)
const writeFile = promisify(fs.writeFile)
let episodes_in_2weeks = []
let latest_pubdates = []
let channels = {}
let covers = {}
let episodeCount = 0
let errors = []
let downloads_backup = null
const error = function(label, rss, error){
if(error) {
consola.error(`${label} | ${rss} | ${error}`)
errors.push({label, rss, error: serializeError(error)})
}
else {
consola.error(`${label} | ${rss}`)
errors.push({label, rss})
}
}
process.on('unhandledRejection', console.dir)
const fetchFeed = async key => {
const src = rss[key].feed
const dist_rss = `${RSS_DIR}/${key}.rss`
// Handling errors
//------------------
// Download RSS (try 3 times)
let err = ''
let download = false
let triesCounter = 0
while (triesCounter < 2) {
try {
download = await wget(src, { output: dist_rss }).catch((e) => { err = e })
break
} catch (e) {
consola.error(e)
}
consola.log(`wget fail : #${triesCounter}`)
await sleep(2)
triesCounter++
}
if (!download) {
error('wget', dist_rss, err)
return // catch内では、fetchFeedを抜けられないのでここでreturn
}
// Read RSS
const xml = await readFile(`${__dirname}/${dist_rss}`).catch(() => { return })
if(!xml){
error('readFile', dist_rss)
return // catch内では、fetchFeedを抜けられないのでここでreturn
}
const json = await xmlToJSON(xml).catch(() => { return })
if(!json){
error('xmlToJSON', dist_rss)
return // catch内では、fetchFeedを抜けられないのでここでreturn
}
// json.rss.channel.item must be Array
if(!(json.rss.channel.item instanceof Array)) {
json.rss.channel.item = [json.rss.channel.item]
}
// Get cover image urls
const cover_url = util.removeQuery(_.get(json, 'rss.channel[itunes:image].$.href') || _.get(json, 'rss.channel[itunes:image].href') || _.get(json, 'rss.channel.image.url'))
if(cover_url){
covers[key] = {
src: cover_url,
dist: `${COVER_DIR}/${key}.${fileExtension(cover_url)}`
}
}
const channel = json.rss.channel
const episodes = channel.item
const title = channel.title
// count episodes
episodeCount += episodes.length // TODO ここではなく、必要になる所で計測して依存関係を切る
// Get the latest episode's publish date
latest_pubdates.push({
id: key,
pubDate: episodes.pubDate
})
episodes_in_2weeks = episodes_in_2weeks.concat(util.getEpisodesIn2Weeks(episodes, key, title))
// Save data
channels[key] = {
key,
title,
twitter: rss[key].twitter,
feed: rss[key].feed,
link: channel.link ? channel.link : null,
hashtag: rss[key].hashtag,
cover: covers[key] ? covers[key].dist.replace(/^static/,'') : null,
total: episodes.length,
firstEpisodeDate: moment(_.last(episodes).pubDate, RFC822).format(moment.HTML5_FMT.DATETIME_LOCAL_SECONDS),
lastEpisodeDate: moment(_.first(episodes).pubDate, RFC822).format(moment.HTML5_FMT.DATETIME_LOCAL_SECONDS),
firstEpisodeLink: _.last(episodes).link,
lastEpisodeLink: _.first(episodes).link,
recentEpisodes: _.take(episodes, 5),
fileServer: util.getFileServer(episodes),
durationAverage: util.getDurationAverage(episodes, dist_rss),
durationMedian: util.getDurationMedian(episodes, dist_rss),
desciprtion: channel.description ? channel.description : null
}
}
(async () => {
// Make sure parent dir existence and its clean
try {
await readFile(BUILD_INFO)
downloads_backup = `${DOWNLOADS_DIR}(backup ${new Date().toFormat('YYYYMMDD-HH24MISS')})/`
shell.mv(`${DOWNLOADS_DIR}/`, downloads_backup)
shell.mkdir('-p', RSS_DIR)
shell.mkdir('-p', COVER_DIR)
consola.log(`-> Create backup to ${downloads_backup}`)
} catch (err) {
shell.rm('-rf', DOWNLOADS_DIR)
shell.mkdir('-p', RSS_DIR)
shell.mkdir('-p', COVER_DIR)
}
// Parallel Execution https://qiita.com/jkr_2255/items/62b3ee3361315d55078a
await Promise.all(Object.keys(rss).map(async key => await fetchFeed(key))).catch((err) => { error('fetchFeed', err) })
if(!NO_TWITTER){
consola.log('Start fetching twitter data...')
const accounts = {}
for(let key in rss) {
if(rss[key]){
if(rss[key].twitter) {
accounts[key] = {
twitter: rss[key].twitter.replace('@','')
}
}
if(rss[key].hashtag) {
if(!accounts[key]) {
accounts[key] = {}
}
accounts[key]['hashtag'] = rss[key].hashtag
}
}
}
const twitterData = await fetchTwitter(accounts)
for(let key in twitterData) {
// Ignore if key is not exist in channels (maybe it couldn't get with error)
if(channels[key]){
for(let prop in twitterData[key]){
channels[key][prop] = twitterData[key][prop]
}
}
}
}
consola.log('Export to list file ordered by pubDate')
latest_pubdates.sort(function(a, b) {
return new Date(b.pubDate) - new Date(a.pubDate)
})
episodes_in_2weeks.sort(function(a, b) {
return new Date(b.pubDate) - new Date(a.pubDate)
})
const load_order = latest_pubdates.map(function(element, index, array) {
return element.id;
});
consola.log('Download cover images serially to avoid 404')
for(let key of Object.keys(covers)) await util.downloadAndResize(key, covers[key].src, covers[key].dist)
const data = {
load_order,
episodes_in_2weeks,
channels,
updated: new Date(),
episodeCount,
errors
}
// Save to file
await writeFile(BUILD_INFO, JSON.stringify(data), 'utf8')
})();
nodeCleanup(function (exitCode, signal) {
if (signal == 'SIGINT' && downloads_backup) {
consola.log(`-> Restore from backup`)
shell.rm('-rf', DOWNLOADS_DIR)
shell.mv(downloads_backup, `${DOWNLOADS_DIR}/`)
}
else if (signal == 0 && downloads_backup) {
consola.log(`-> Remove backup`)
shell.rm('-rf', downloads_backup)
}
});