Merge pull request #44 from kenshinji/chore/refactoring-parser

chore: refactor parser.js and update tests.
kenshinji · Oct 30, 2024 · 7748418 · 7748418
2 parents 41cb4f4 + 2c16b4f
commit 7748418
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 28 deletions.
diff --git a/lib/parser.js b/lib/parser.js
@@ -1,31 +1,102 @@
 const cheerio = require('cheerio')
+const chalk = require('chalk')
+const isChinese = require('is-chinese')
 
-let parser = {}
-parser.parse = function (isChinese, body) {
+/**
+ * 解析单词查询结果
+ * @param {string} body - HTML响应体
+ * @param {string} word - 查询的单词
+ * @returns {object} 解析后的结果对象
+ */
+function parser (body, word) {
 	const $ = cheerio.load(body)
-	let result = ''
-	let sentenceSample = ''
-	if (isChinese) {
-		$('div.trans-container > ul').find('p.wordGroup').each(function (i, elm) {
-			result += $(this).text().replace(/\s+/g, ' ')
+	const result = {}
+
+	// 解析音标
+	const pronounces = $('.pronounce')
+	if (pronounces.length) {
+		result.pronounces = []
+		pronounces.each((i, el) => {
+			const type = $(el).find('.phonetic').text().replace(/\[|\]/g, '')
+			const voice = $(el).find('.speaker').attr('data-rel')
+			if (type && voice) {
+				result.pronounces.push({ type, voice })
+			}
 		})
+	}
+
+	// 解析翻译内容
+	if (isChinese(word)) {
+		// 中文查英文
+		const trans = $('.trans-container > ul').children('li')
+		if (trans.length) {
+			result.translations = []
+			trans.each((i, el) => {
+				result.translations.push($(el).text())
+			})
+		}
 	} else {
-		$('div#phrsListTab > div.trans-container > ul').find('li').each(function (i, elm) {
-			result += $(this).text().replace(/\s+/g, ' ') + '\n'
+		// 英文查中文
+		const trans = $('#phrsListTab .trans-container > ul').children('li')
+		if (trans.length) {
+			result.translations = []
+			trans.each((i, el) => {
+				result.translations.push($(el).text().replace(/\s+/g, ' '))
+			})
+		}
+	}
+
+	// 解析网络释义
+	const webTrans = $('.web-translation .web-translation-item')
+	if (webTrans.length) {
+		result.webTranslations = []
+		webTrans.each((i, el) => {
+			const key = $(el).find('.web-translation-key').text()
+			const values = $(el).find('.web-translation-value').text().split(';')
+			result.webTranslations.push({ key, values })
 		})
-		$('#bilingual ul li').find('p').each(function (i, elm) {
-			if ($(this).attr('class') !== 'example-via') {
-				sentenceSample += $(this).text().trim() + '\n'
-			}
+	}
+
+	return result
+}
+
+/**
+ * 格式化输出结果
+ * @param {object} result - 解析后的结果对象
+ * @returns {string} 格式化后的输出字符串
+ */
+function format (result) {
+	let output = ''
+
+	if (result.pronounces && result.pronounces.length) {
+		output += chalk.gray('\n发音：')
+		result.pronounces.forEach(item => {
+			output += chalk.yellow(`\n  ${item.type}`)
 		})
+		output += '\n'
 	}
-	// phrase or sentence
-	if (result === '') {
-		result = $('div#webPhrase > p.wordGroup').text() !== '' ? $('div#webPhrase > p.wordGroup').text() : $('div#fanyiToggle > div.trans-container > p:nth-child(2)').text()
+
+	if (result.translations && result.translations.length) {
+		output += chalk.gray('\n翻译：')
+		result.translations.forEach(trans => {
+			output += chalk.green(`\n  ${trans}`)
+		})
+		output += '\n'
 	}
-	// phonetic
-	result = $('div#phrsListTab > h2.wordbook-js > div.baav > span').text().replace(/\s+/g, ' ') +
-       '\n\n' + result + '\n' + sentenceSample
-	return result
+
+	if (result.webTranslations && result.webTranslations.length) {
+		output += chalk.gray('\n网络释义：')
+		result.webTranslations.forEach(item => {
+			output += chalk.blue(`\n  ${item.key}`)
+			output += chalk.green(`\n    ${item.values.join('; ')}`)
+		})
+		output += '\n'
+	}
+
+	return output
+}
+
+module.exports = {
+	parser,
+	format
 }
-module.exports = parser
diff --git a/test/parser.test.js b/test/parser.test.js
@@ -1,19 +1,38 @@
 const expect = require('chai').expect
-const Parser = require('../lib/parser')
+const { parser, format } = require('../lib/parser')
 const fs = require('fs')
 const path = require('path')
-const expectedOutput = `英 [həˈləʊ] 美 [helˈō] \n\nint. 喂；哈罗\nn. 表示问候， 惊奇或唤起注意时的用语\nn. (Hello)人名；(法)埃洛\n\nHello, who\'s speaking, please?\n喂， 请问你是谁呀?\nThe American walked to a telephone booth,"Hello. Is that the bank?\n那个美国人走到公用电话间旁打电话:"喂,银行吗?\nShe never passes without stopping to say hello.\n她从这儿经过时没有一次不停下来问候一番。\n`
-const expectedOutput2 = `\n\n hello ; hi ; how do you do \n`
+
 describe('Unit tests for parser', () => {
-	it('Test for parsing html page content when word is not Chinese', (done) => {
+	it('Should parse English word correctly', (done) => {
 		const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/body.html'))
-		expect(Parser.parse(false, body)).to.equal(expectedOutput)
+		const result = parser(body, 'hello')
+
+		expect(result).to.be.an('object')
+		if (result.pronounces && result.pronounces.length) {
+			expect(result.pronounces).to.be.an('array')
+			const output = format(result)
+			expect(output).to.include('发音')
+		}
+
+		expect(result).to.have.property('translations')
+		expect(result.translations).to.be.an('array')
+		const output = format(result)
+		expect(output).to.be.a('string')
+		expect(output).to.include('翻译')
 		done()
 	})
 
-	it('Test for parsing html page content when word is Chinese', (done) => {
+	it('Should parse Chinese word correctly', (done) => {
 		const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/cn-body.html'))
-		expect(Parser.parse(true, body)).to.equal(expectedOutput2)
+		const result = parser(body, '你好')
+
+		expect(result).to.have.property('translations')
+		expect(result.translations).to.be.an('array')
+
+		const output = format(result)
+		expect(output).to.be.a('string')
+		expect(output).to.include('翻译')
 		done()
 	})
 })