Skip to content

Commit

Permalink
Feature bernardro#17: Minor updates regarding INPUT_SCHEMA.json
Browse files Browse the repository at this point in the history
  • Loading branch information
X0R0X committed Jun 17, 2021
1 parent 88d1207 commit 0f06335
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
8 changes: 4 additions & 4 deletions INPUT_SCHEMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@
"description": "If set to `true` and language is provided, we download automatically generated subtitles rather then user ones. If no language is provided, this settings takes no effect",
"type": "boolean"
},
"scrapeCommentCount": {
"title" : "Attempt to scrape N comments from video detail page.",
"maxComments": {
"title" : "Max comments",
"type" : "integer",
"description" : "Try to scrape N comments from video detail page. -1 ~ Don't scrape any, 0 ~ Scrape all available comments, 1-x ~ scrape (1-x) comments.",
"default": -1
"description" : "Limit the number of comments that will get scraped. Scraping comments requires scrolling and takes time. 0 means we will not scrape any comments at all. ",
"default": 0
}
},
"required": [
Expand Down
2 changes: 1 addition & 1 deletion src/crawler_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ exports.handleDetail = async (page, request, extendOutputFunction, subtitlesSett
}

let comments = null;
if (scrapeCommentCount > -1) {
if (scrapeCommentCount > 0) {
comments = await utils.getVideoComments(page);
}

Expand Down
4 changes: 2 additions & 2 deletions src/subtitles.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class SrtConvert {
* @param preferAutoGenerated If set to true, we prefer automatically generated subtitles before the user provided.
* If set to false and only automatically generated subtitles are available, we fetch at least them.
* @returns {Promise<*[]>} Promise representing the whole fetching and srt generating process. Promise result is
* list of `SrtConvert` classes containing already converted .srt data. See `SrtConvert` class.
* list of `SrtConvert` instances containing already converted .srt data. See `SrtConvert` class.
*/
async function fetchSubtitles(page, language = null, preferAutoGenerated = false) {
log.debug(`Fetching subtitles for ${page.url()},lang:${language}...`);
Expand Down Expand Up @@ -102,7 +102,7 @@ async function fetchSubtitles(page, language = null, preferAutoGenerated = false
subtitlesToDl.push({
lang: track['languageCode'],
url: `${track['baseUrl']}&fmt=json3`,
type: track['kind'] ? 'auto_generated' : 'user_generated'
type: track['kind'] ? SrtConvert.TYPE_AUTO_GENERATED : SrtConvert.TYPE_USER_GENERATED,
});
}
} else {
Expand Down
6 changes: 3 additions & 3 deletions src/utility.js
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ module.exports.getVideoComments = async (page, maxCommentCount=0) => {
return document.body.querySelectorAll(commentSelector).length;
});
log.debug(`Got ${commentCount}/${maxCommentCount} comments for ${page.url()}`)
return commentCount >= maxCommentCount && maxCommentCount > 0;
return commentCount >= maxCommentCount;
}}
);
const comments = await page.evaluate((max) => {
Expand All @@ -612,11 +612,11 @@ module.exports.getVideoComments = async (page, maxCommentCount=0) => {
const author = e.querySelector('#author-text > span').innerHTML.trim()
.replace(/\\n/g, '');
if (author) {
const comment = e.querySelector('#content-text').innerHTML.trim()
const text = e.querySelector('#content-text').innerHTML.trim()
.replace(/\\n/g, '');
a.push({
author: author,
comment: comment,
comment: text,
});
}
if (a.length >= max) {
Expand Down

0 comments on commit 0f06335

Please sign in to comment.