Skip to content

Commit

Permalink
fix: use more reliable json endpoint (#605)
Browse files Browse the repository at this point in the history
fixes #523
fixes #495

* refactor: new endpoint

* refactor: clean up remnants from watch page endpoint

* fix: `urllib` reference

* style: lint

* test: update info-extra tests

* test: update `getInfo` tests

* test: remove unneeded test files

* test: update download tests

* test: coverage

* fix: backup for `author.id`, add `author.external_channel_url`

* test: remove unused test files
  • Loading branch information
fent authored May 19, 2020
1 parent 0d6e8e5 commit beb8d7a
Show file tree
Hide file tree
Showing 92 changed files with 72,738 additions and 69,903 deletions.
16 changes: 13 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,23 @@ const createStream = options => {
*/
const downloadFromInfoCallback = (stream, info, options) => {
options = options || {};

let playability = info.playerResponse.playabilityStatus;
if (playability && playability.status === 'UNPLAYABLE') {
stream.emit('error', Error(util.stripHTML(playability.reason)));
return;
}

if (!info.formats.length) {
stream.emit('error', Error('This video is unavailable'));
return;
}

let format;
try {
format = util.chooseFormat(info.formats, options);
} catch (e) {
setImmediate(() => {
stream.emit('error', e);
});
stream.emit('error', e);
return;
}
stream.emit('info', info, format);
Expand Down
208 changes: 99 additions & 109 deletions lib/info-extras.js
Original file line number Diff line number Diff line change
@@ -1,138 +1,128 @@
const qs = require('querystring');
const url = require('url');
const Entities = require('html-entities').AllHtmlEntities;
const util = require('./util');
const qs = require('querystring');
const urllib = require('url');
const parseTime = require('m3u8stream/dist/parse-time');


const VIDEO_URL = 'https://www.youtube.com/watch?v=';
const getMetaItem = (body, name) => util.between(body, `<meta itemprop="${name}" content="`, '">');


/**
* Get video description from html
*
* @param {string} html
* @returns {string}
*/
exports.getVideoDescription = html => {
const regex = /<p.*?id="eow-description".*?>(.+?)<\/p>[\n\r\s]*?<\/div>/im;
const description = html.match(regex);
return description ? Entities.decode(util.stripHTML(description[1])) : '';
};

/**
* Get video media (extra information) from html
* Get video media.
*
* @param {string} body
* @param {Object} info
* @returns {Object}
*/
exports.getVideoMedia = body => {
let mediainfo = util.between(body,
'<div id="watch-description-extras">',
'<div id="watch-discussion" class="branded-page-box yt-card">');
if (mediainfo === '') {
return {};
exports.getMedia = info => {
let media = {};
let results = [];
try {
results = info.response.contents.twoColumnWatchNextResults.results.results.contents;
} catch (err) {
// Do nothing
}

const regexp = /<h4 class="title">([\s\S]*?)<\/h4>[\s\S]*?<ul .*?class=".*?watch-info-tag-list">[\s\S]*?<li>([\s\S]*?)<\/li>(?:\s*?<li>([\s\S]*?)<\/li>)?/g; // eslint-disable-line max-len
const contentRegexp = /(?: - (\d{4}) \()?<a .*?(?:href="([^"]+)")?.*?>(.*?)<\/a>/;
const imgRegexp = /<img src="([^"]+)".*?>/;
const media = {};

const image = imgRegexp.exec(mediainfo);
if (image) {
media.image = url.resolve(VIDEO_URL, image[1]);
}
let result = results.find(v => v.videoSecondaryInfoRenderer);
if (!result) { return {}; }

let match;
while ((match = regexp.exec(mediainfo)) !== null) {
let [, key, value, detail] = match;
key = Entities.decode(key).trim().replace(/\s/g, '_')
.toLowerCase();
const content = contentRegexp.exec(value);
if (content) {
let [, year, mediaUrl, value2] = content;
if (year) {
media.year = parseInt(year);
} else if (detail) {
media.year = parseInt(detail);
}
value = value.slice(0, content.index);
if (key !== 'game' || value2 !== 'YouTube Gaming') {
value += value2;
try {
let metadataRows =
(result.metadataRowContainer || result.videoSecondaryInfoRenderer.metadataRowContainer)
.metadataRowContainerRenderer.rows;
for (let row of metadataRows) {
if (row.metadataRowRenderer) {
let title = row.metadataRowRenderer.title.simpleText.toLowerCase();
let contents = row.metadataRowRenderer.contents[0];
let runs = contents.runs;
media[title] = runs ? runs[0].text : contents.simpleText;
if (runs && runs[0].navigationEndpoint) {
media[`${title}_url`] = urllib.resolve(VIDEO_URL,
runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url);
}
} else if (row.richMetadataRowRenderer) {
let contents = row.richMetadataRowRenderer.contents;
for (let content of contents) {
let meta = content.richMetadataRenderer;
media.thumbnails = meta.thumbnail.thumbnails;
// TODO: Added for backwards compatibility. Remove later.
media.image = urllib.resolve(VIDEO_URL, media.thumbnails[0].url);
}
let richMeta = contents
.filter(meta => meta.richMetadataRenderer.style === 'RICH_METADATA_RENDERER_STYLE_BOX_ART');
for (let { richMetadataRenderer } of richMeta) {
let meta = richMetadataRenderer;
media.year = meta.subtitle.simpleText;
let type = meta.callToAction.simpleText.split(' ')[1];
media[type] = meta.title.simpleText;
media[`${type}_url`] = urllib.resolve(VIDEO_URL,
meta.endpoint.commandMetadata.webCommandMetadata.url);
}
}
media[`${key}_url`] = url.resolve(VIDEO_URL, mediaUrl);
}
media[key] = Entities.decode(value);
} catch (err) {
// Do nothing.
}

return media;
};

/**
* Get video Owner from html.
* Get video author.
*
* @param {string} body
* @return {Object}
* @param {Object} info
* @returns {Object}
*/
const userRegexp = /<a href="\/user\/([^"]+)/;
const verifiedRegexp = /<span .*?(aria-label="Verified")(.*?(?=<\/span>))/;
exports.getAuthor = body => {
let ownerinfo = util.between(body,
'<div id="watch7-user-header" class=" spf-link ">',
'<div id="watch8-action-buttons" class="watch-action-buttons clearfix">');
if (ownerinfo === '') {
exports.getAuthor = info => {
let channelId, avatar, subscriberCount, verified = false;
try {
let results = info.response.contents.twoColumnWatchNextResults.results.results.contents;
let v = results.find(v2 =>
v2.videoSecondaryInfoRenderer &&
v2.videoSecondaryInfoRenderer.owner &&
v2.videoSecondaryInfoRenderer.owner.videoOwnerRenderer);
let videoOwnerRenderer = v.videoSecondaryInfoRenderer.owner.videoOwnerRenderer;
channelId = videoOwnerRenderer.navigationEndpoint.browseEndpoint.browseId;
avatar = urllib.resolve(VIDEO_URL, videoOwnerRenderer.thumbnail.thumbnails[0].url);
subscriberCount = util.parseAbbreviatedNumber(
videoOwnerRenderer.subscriberCountText.runs[0].text);
verified = !!videoOwnerRenderer.badges.find(b => b.metadataBadgeRenderer.tooltip === 'Verified');
} catch (err) {
// Do nothing.
}
try {
let videoDetails = info.player_response.microformat.playerMicroformatRenderer;
let id = videoDetails.channelId || channelId;
return {
id: id,
name: videoDetails.ownerChannelName,
user: videoDetails.ownerProfileUrl.split('/').slice(-1)[0],
channel_url: `https://www.youtube.com/channel/${id}`,
external_channel_url: `https://www.youtube.com/channel/${videoDetails.externalChannelId}`,
user_url: urllib.resolve(VIDEO_URL, videoDetails.ownerProfileUrl),
avatar: avatar,
verified: verified,
subscriber_count: subscriberCount,
};
} catch (err) {
return {};
}
const channelName = Entities.decode(util.between(util.between(
ownerinfo, '<div class="yt-user-info">', '</div>'), '>', '</a>'));
const userMatch = ownerinfo.match(userRegexp);
const verifiedMatch = ownerinfo.match(verifiedRegexp);
const channelID = getMetaItem(body, 'channelId');
const username = userMatch ? userMatch[1] : util.between(
util.between(body, '<span itemprop="author"', '</span>'), '/user/', '">');
const subscriberCountText = util.between(util.between(ownerinfo,
'<span class="yt-subscription-button-subscriber-count-branded-horizontal yt-subscriber-count"',
'/span>'), '>', '<');
const subscriberCount = util.parseAbbreviatedNumber(subscriberCountText);
return {
id: channelID,
name: channelName,
avatar: url.resolve(VIDEO_URL, util.between(ownerinfo,
'data-thumb="', '"')),
verified: !!verifiedMatch,
user: username,
channel_url: `https://www.youtube.com/channel/${channelID}`,
user_url: `https://www.youtube.com/user/${username}`,
subscriber_count: subscriberCount,
};
};


/**
* Get video published at from html.
* Get related videos.
*
* @param {string} body
* @returns {string}
*/
exports.getPublished = body => Date.parse(getMetaItem(body, 'datePublished'));


/**
* Get video published at from html.
* Credits to https://github.com/paixaop.
*
* @param {string} body
* @param {Object} info
* @returns {Array.<Object>}
*/
exports.getRelatedVideos = body => {
let jsonStr = util.between(body, '\'RELATED_PLAYER_ARGS\': ', /,[\n\r]/);
let watchNextJson, rvsParams, secondaryResults;
exports.getRelatedVideos = info => {
let rvsParams = [], secondaryResults;
try {
rvsParams = info.response.webWatchNextResponseExtensionData.relatedVideoArgs.split(',').map(e => qs.parse(e));
} catch (err) {
// Do nothing.
}
try {
jsonStr = JSON.parse(jsonStr);
watchNextJson = JSON.parse(jsonStr.watch_next_response);
rvsParams = jsonStr.rvs.split(',').map(e => qs.parse(e));
secondaryResults = watchNextJson.contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results;
secondaryResults = info.response.contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results;
} catch (err) {
return [];
}
Expand Down Expand Up @@ -170,25 +160,25 @@ exports.getRelatedVideos = body => {
};

/**
* Get like count from html.
* Get like count.
*
* @param {string} body
* @return {number}
*/
const getLikesRegex = /\\"likeCount\\":(\d+?),\\"likeCountText\\"/;
const getLikesRegex = /"label":"([\d,]+?) likes"/;
exports.getLikes = body => {
const likes = body.match(getLikesRegex);
return likes ? parseInt(likes[1]) : null;
return likes ? parseInt(likes[1].replace(/,/g, '')) : null;
};

/**
* Get dislike count from html.
* Get dislike count.
*
* @param {string} body
* @return {number}
*/
const getDislikesRegex = /\\"dislikeCount\\":(\d+?),\\"dislikeCountText\\"/;
const getDislikesRegex = /"label":"([\d,]+?) dislikes"/;
exports.getDislikes = body => {
const dislikes = body.match(getDislikesRegex);
return dislikes ? parseInt(dislikes[1]) : null;
return dislikes ? parseInt(dislikes[1].replace(/,/g, '')) : null;
};
Loading

0 comments on commit beb8d7a

Please sign in to comment.