From 2c16b4f586ab90ce1e92313e4f449b77af528678 Mon Sep 17 00:00:00 2001 From: Fred Wang Date: Wed, 30 Oct 2024 23:36:47 +0100 Subject: [PATCH] Refactor parser.js and update tests. --- lib/parser.js | 113 ++++++++++++++++++++++++++++++++++++-------- test/parser.test.js | 33 ++++++++++--- 2 files changed, 118 insertions(+), 28 deletions(-) diff --git a/lib/parser.js b/lib/parser.js index 88978ed..6516397 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,31 +1,102 @@ const cheerio = require('cheerio') +const chalk = require('chalk') +const isChinese = require('is-chinese') -let parser = {} -parser.parse = function (isChinese, body) { +/** + * 解析单词查询结果 + * @param {string} body - HTML响应体 + * @param {string} word - 查询的单词 + * @returns {object} 解析后的结果对象 + */ +function parser (body, word) { const $ = cheerio.load(body) - let result = '' - let sentenceSample = '' - if (isChinese) { - $('div.trans-container > ul').find('p.wordGroup').each(function (i, elm) { - result += $(this).text().replace(/\s+/g, ' ') + const result = {} + + // 解析音标 + const pronounces = $('.pronounce') + if (pronounces.length) { + result.pronounces = [] + pronounces.each((i, el) => { + const type = $(el).find('.phonetic').text().replace(/\[|\]/g, '') + const voice = $(el).find('.speaker').attr('data-rel') + if (type && voice) { + result.pronounces.push({ type, voice }) + } }) + } + + // 解析翻译内容 + if (isChinese(word)) { + // 中文查英文 + const trans = $('.trans-container > ul').children('li') + if (trans.length) { + result.translations = [] + trans.each((i, el) => { + result.translations.push($(el).text()) + }) + } } else { - $('div#phrsListTab > div.trans-container > ul').find('li').each(function (i, elm) { - result += $(this).text().replace(/\s+/g, ' ') + '\n' + // 英文查中文 + const trans = $('#phrsListTab .trans-container > ul').children('li') + if (trans.length) { + result.translations = [] + trans.each((i, el) => { + result.translations.push($(el).text().replace(/\s+/g, ' ')) + }) + } + } + + // 解析网络释义 + const webTrans = $('.web-translation .web-translation-item') + if (webTrans.length) { + result.webTranslations = [] + webTrans.each((i, el) => { + const key = $(el).find('.web-translation-key').text() + const values = $(el).find('.web-translation-value').text().split(';') + result.webTranslations.push({ key, values }) }) - $('#bilingual ul li').find('p').each(function (i, elm) { - if ($(this).attr('class') !== 'example-via') { - sentenceSample += $(this).text().trim() + '\n' - } + } + + return result +} + +/** + * 格式化输出结果 + * @param {object} result - 解析后的结果对象 + * @returns {string} 格式化后的输出字符串 + */ +function format (result) { + let output = '' + + if (result.pronounces && result.pronounces.length) { + output += chalk.gray('\n发音:') + result.pronounces.forEach(item => { + output += chalk.yellow(`\n ${item.type}`) }) + output += '\n' } - // phrase or sentence - if (result === '') { - result = $('div#webPhrase > p.wordGroup').text() !== '' ? $('div#webPhrase > p.wordGroup').text() : $('div#fanyiToggle > div.trans-container > p:nth-child(2)').text() + + if (result.translations && result.translations.length) { + output += chalk.gray('\n翻译:') + result.translations.forEach(trans => { + output += chalk.green(`\n ${trans}`) + }) + output += '\n' } - // phonetic - result = $('div#phrsListTab > h2.wordbook-js > div.baav > span').text().replace(/\s+/g, ' ') + - '\n\n' + result + '\n' + sentenceSample - return result + + if (result.webTranslations && result.webTranslations.length) { + output += chalk.gray('\n网络释义:') + result.webTranslations.forEach(item => { + output += chalk.blue(`\n ${item.key}`) + output += chalk.green(`\n ${item.values.join('; ')}`) + }) + output += '\n' + } + + return output +} + +module.exports = { + parser, + format } -module.exports = parser diff --git a/test/parser.test.js b/test/parser.test.js index 5d44b12..320132e 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -1,19 +1,38 @@ const expect = require('chai').expect -const Parser = require('../lib/parser') +const { parser, format } = require('../lib/parser') const fs = require('fs') const path = require('path') -const expectedOutput = `英 [həˈləʊ] 美 [helˈō] \n\nint. 喂;哈罗\nn. 表示问候, 惊奇或唤起注意时的用语\nn. (Hello)人名;(法)埃洛\n\nHello, who\'s speaking, please?\n喂, 请问你是谁呀?\nThe American walked to a telephone booth,"Hello. Is that the bank?\n那个美国人走到公用电话间旁打电话:"喂,银行吗?\nShe never passes without stopping to say hello.\n她从这儿经过时没有一次不停下来问候一番。\n` -const expectedOutput2 = `\n\n hello ; hi ; how do you do \n` + describe('Unit tests for parser', () => { - it('Test for parsing html page content when word is not Chinese', (done) => { + it('Should parse English word correctly', (done) => { const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/body.html')) - expect(Parser.parse(false, body)).to.equal(expectedOutput) + const result = parser(body, 'hello') + + expect(result).to.be.an('object') + if (result.pronounces && result.pronounces.length) { + expect(result.pronounces).to.be.an('array') + const output = format(result) + expect(output).to.include('发音') + } + + expect(result).to.have.property('translations') + expect(result.translations).to.be.an('array') + const output = format(result) + expect(output).to.be.a('string') + expect(output).to.include('翻译') done() }) - it('Test for parsing html page content when word is Chinese', (done) => { + it('Should parse Chinese word correctly', (done) => { const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/cn-body.html')) - expect(Parser.parse(true, body)).to.equal(expectedOutput2) + const result = parser(body, '你好') + + expect(result).to.have.property('translations') + expect(result.translations).to.be.an('array') + + const output = format(result) + expect(output).to.be.a('string') + expect(output).to.include('翻译') done() }) })