Skip to content

Commit

Permalink
Merge pull request #236 from jiaojiaodubai/CNBKSY
Browse files Browse the repository at this point in the history
add (CNBKSY.js) 全国报刊索引 (#235)
  • Loading branch information
l0o0 authored Nov 29, 2023
2 parents e8dd53f + 36be3e7 commit 6c431af
Show file tree
Hide file tree
Showing 3 changed files with 345 additions and 0 deletions.
340 changes: 340 additions & 0 deletions CNBKSY.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
{
"translatorID": "bb0d0e84-66b4-46e3-9089-ebc975a86111",
"label": "CNBKSY",
"creator": "jiaojiaodubai23",
"target": "^https?://.*(www\\.)?cnbksy\\.(cn|com)",
"minVersion": "5.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2023-11-28 03:52:36"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2023 [email protected]
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/


function detectWeb(doc, url) {
let searchBox = doc.querySelector('.body_box');
if (searchBox) {
Z.monitorDOMChanges(searchBox, { childList: true, subtree: true });
}
if (url.includes('/detail/')) {
if (doc.querySelector('.srTable a[href*="/literature/newspaper/"]')) {
return 'newspaperArticle';
}
return 'journalArticle';
}
else if (url.includes('/picDetail/')) {
return 'artwork';
}
else if (getSearchResults(doc, true)) {
return 'multiple';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = doc.querySelectorAll('a[href*="/search/detail/"],a[href*="/search/picDetail/"]');
for (let row of rows) {
let href = row.href;
let title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}

async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
// 注意:该request依赖浏览器cookies
await scrape(await requestDocument(url));
}
}
else {
await scrape(doc, url);
}
}

function matchCreator(creator) {
creator = {
lastName: creator,
creatorType: 'author',
fieldMode: 1
};
return creator;
}

function tryMatch(string, pattern, index = 1) {
let result = string.match(pattern);
return result ? result[index] : '';
}

class Table {
constructor(doc) {
this.innerData = {};
let table = doc.querySelector('.srTable').rows;
// 最后一行是下载/购买按钮
for (let i = 0; i < table.length - 1; i++) {
this.innerData[table[i].cells[0].innerText.replace(/[:]$/gm, '')] = ZU.trimInternal(table[i].cells[1].innerText);
}
}

get(key) {
return this.innerData.hasOwnProperty(key) ? this.innerData[key] : '';
}

fuzzyGet(key) {
let realKey = this.innerData.keys().find(element => element.includes(key));
return realKey
? this.get(realKey)
: '';
}
}

async function scrape(doc, url = doc.location.href) {
var data = new Table(doc);
// Z.debug(data);
let type = detectWeb(doc, url);
var newItem = new Z.Item(type);
switch (type) {
case 'journalArticle':
newItem.title = data.get('题名');
newItem.creators = data.get('作者').split(/\s/)
.filter(element => !element.match(/[]$/))
.map(element => matchCreator(element));
newItem.publicationTitle = data.get('文献来源').replace(/^|$/g, '');
newItem.date = data.get('出版时间').replace(/$/, '');
newItem.volume = tryMatch(data.get('卷期(页)'), /(\d*)/);
newItem.issue = tryMatch(data.get('卷期(页)'), /(\d*)/);
newItem.pages = tryMatch(data.get('卷期(页)'), /([\d-.+]*)/);
newItem.abstractNote = data.get('摘要');
newItem.tags = data.get('主题词').slice(1, -1).split(/[,;]/)
.map(element => ({ tag: element }));
break;
case 'newspaperArticle':
newItem.title = data.fuzzyGet('标题');
newItem.shortTitle = data.get('标题2');
newItem.publicationTitle = data.get('文献来源').replace(/^|$/g, '');
newItem.place = data.get('新闻发布地');
newItem.data = data.get('出版时间')
.replace(/(^\D*)|(\D*$)/g, '')
.replace(/\D+/g, '-')
.replace(/-(\d)(?:\D|$)/g, '-0$1');
newItem.pages = data.get('版次').replace(/^0*/, '');
newItem.creators = matchCreator(data.get('新闻来源'));
newItem.extra += `\n类别: ${data.get('类别')}`;
break;
case 'artwork':
newItem.title = data.get('图片标题');
newItem.abstractNote = data.get('图片描述');
newItem.creators = data.get('图片责任者').split(/\s/).map(element => matchCreator(element));
newItem.date = data.get('出版年份');
newItem.artworkMedium = data.get('图片类型');
newItem.artworkSize = data.get('图片尺寸');
newItem.archive = data.get('图片来源');
newItem.archiveLocation = `${data.get('收录卷期')} ${data.get('所属正文篇名')}`;
newItem.url = url;
newItem.attachments.push({
title: 'Snapshot',
document: doc
});
newItem.complete();
break;
default:
break;
}
newItem.url = url;
newItem.complete();
}


/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "https://www.cnbksy.cn/search/detail/25b6c69a90d8c868d26d34432adf0a14/7/65632869f74f7f47007f6262",
"items": [
{
"itemType": "journalArticle",
"title": "我们为什么不了解苏联?",
"creators": [
{
"lastName": "斯诺",
"creatorType": "author",
"fieldMode": 1
},
{
"lastName": "钱华",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "1947",
"issue": "22",
"libraryCatalog": "CNBKSY",
"pages": "13-18",
"publicationTitle": "文萃",
"volume": "2",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.cnbksy.cn/search/detail/31f4a4381418a4f24a341dea6e355fa0/8/656326927fa00c4f558f5dfe",
"items": [
{
"itemType": "journalArticle",
"title": "记念刘和珍君",
"creators": [
{
"lastName": "鲁迅",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "1996",
"issue": "2",
"libraryCatalog": "CNBKSY",
"pages": "40-41.60",
"publicationTitle": "名作欣赏(太原)",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.cnbksy.cn/search/detail/6352bccafc25f907edbed671da67097a/7/65632869f74f7f47007f6262",
"items": [
{
"itemType": "journalArticle",
"title": "走向自由祖国:歌曲",
"creators": [
{
"lastName": "许幸之",
"creatorType": "author",
"fieldMode": 1
},
{
"lastName": "余森强",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "1946",
"issue": "36",
"libraryCatalog": "CNBKSY",
"pages": "23",
"publicationTitle": "文萃",
"shortTitle": "走向自由祖国",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.cnbksy.cn/search/detail/79d1aa3b33a678c39fc0e5b135771ebf/8/65632cd923b0997d8ad78b38",
"items": [
{
"itemType": "journalArticle",
"title": "青年“无所谓”心态探微",
"creators": [
{
"lastName": "唐美云",
"creatorType": "author",
"fieldMode": 1
},
{
"lastName": "华静",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "2004",
"abstractNote": "青年无所谓心态是青年试图解决问题时产生的两难境地时选择的第三条道路,有着其深厚的社会原因.然而青年自身在接受社会纪过程中产生的“自我意识”误读、价值失衡、责任薄弱、信仰却是其更为本质原因.针对这些原因和根据的青年的特点,对无所谓心态应加以引导.",
"issue": "1",
"libraryCatalog": "CNBKSY",
"pages": "52-56",
"publicationTitle": "当代青年研究(上海)",
"attachments": [],
"tags": [
{
"tag": " 青年特点"
},
{
"tag": "青年"
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.cnbksy.cn/search/picDetail/bcce04512712dd6ee0d457bfdd2ed8de/15/null",
"items": [
{
"itemType": "artwork",
"title": "孙总理肖像浮雕",
"creators": [],
"date": "1930 年",
"archive": "良友",
"archiveLocation": "孫總理肖像浮雕-第52 期",
"artworkMedium": "雕塑",
"artworkSize": "1230*1611像素",
"libraryCatalog": "CNBKSY",
"url": "https://www.cnbksy.cn/search/picDetail/bcce04512712dd6ee0d457bfdd2ed8de/15/null",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
- [x] 支持单条目
- [x] 支持多条目
- [x] PDF附件下载
+ [全国报刊索引](https://www.cnbksy.cn/home) -> [CNBKSY](./CNBKSY.js)
- [x] 抓取引文信息
- [x] 支持单条目
- [x] 支持多条目
+ 来自[道格学社仓库](https://github.com/gezhongran/DougSociety)的翻译器
- [Bilibili 视频网站](./BiliBili.js)
- [当当网图书](./Dangdang.js)
Expand Down
1 change: 1 addition & 0 deletions data/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@
"CCPINFO": "国家出版发行信息公共服务平台",
"Publications Data Center - China": "国家版本馆版权数据中心",
"Pishu Data": "皮书数据库",
"CNBKSY": "全国报刊索引",
"People's Daily": "人民日报"
}

0 comments on commit 6c431af

Please sign in to comment.