Skip to content

Commit

Permalink
Merge pull request #44 from kenshinji/chore/refactoring-parser
Browse files Browse the repository at this point in the history
chore: refactor parser.js and update tests.
  • Loading branch information
kenshinji authored Oct 30, 2024
2 parents 41cb4f4 + 2c16b4f commit 7748418
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 28 deletions.
113 changes: 92 additions & 21 deletions lib/parser.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,102 @@
const cheerio = require('cheerio')
const chalk = require('chalk')
const isChinese = require('is-chinese')

let parser = {}
parser.parse = function (isChinese, body) {
/**
* 解析单词查询结果
* @param {string} body - HTML响应体
* @param {string} word - 查询的单词
* @returns {object} 解析后的结果对象
*/
function parser (body, word) {
const $ = cheerio.load(body)
let result = ''
let sentenceSample = ''
if (isChinese) {
$('div.trans-container > ul').find('p.wordGroup').each(function (i, elm) {
result += $(this).text().replace(/\s+/g, ' ')
const result = {}

// 解析音标
const pronounces = $('.pronounce')
if (pronounces.length) {
result.pronounces = []
pronounces.each((i, el) => {
const type = $(el).find('.phonetic').text().replace(/\[|\]/g, '')
const voice = $(el).find('.speaker').attr('data-rel')
if (type && voice) {
result.pronounces.push({ type, voice })
}
})
}

// 解析翻译内容
if (isChinese(word)) {
// 中文查英文
const trans = $('.trans-container > ul').children('li')
if (trans.length) {
result.translations = []
trans.each((i, el) => {
result.translations.push($(el).text())
})
}
} else {
$('div#phrsListTab > div.trans-container > ul').find('li').each(function (i, elm) {
result += $(this).text().replace(/\s+/g, ' ') + '\n'
// 英文查中文
const trans = $('#phrsListTab .trans-container > ul').children('li')
if (trans.length) {
result.translations = []
trans.each((i, el) => {
result.translations.push($(el).text().replace(/\s+/g, ' '))
})
}
}

// 解析网络释义
const webTrans = $('.web-translation .web-translation-item')
if (webTrans.length) {
result.webTranslations = []
webTrans.each((i, el) => {
const key = $(el).find('.web-translation-key').text()
const values = $(el).find('.web-translation-value').text().split(';')
result.webTranslations.push({ key, values })
})
$('#bilingual ul li').find('p').each(function (i, elm) {
if ($(this).attr('class') !== 'example-via') {
sentenceSample += $(this).text().trim() + '\n'
}
}

return result
}

/**
* 格式化输出结果
* @param {object} result - 解析后的结果对象
* @returns {string} 格式化后的输出字符串
*/
function format (result) {
let output = ''

if (result.pronounces && result.pronounces.length) {
output += chalk.gray('\n发音:')
result.pronounces.forEach(item => {
output += chalk.yellow(`\n ${item.type}`)
})
output += '\n'
}
// phrase or sentence
if (result === '') {
result = $('div#webPhrase > p.wordGroup').text() !== '' ? $('div#webPhrase > p.wordGroup').text() : $('div#fanyiToggle > div.trans-container > p:nth-child(2)').text()

if (result.translations && result.translations.length) {
output += chalk.gray('\n翻译:')
result.translations.forEach(trans => {
output += chalk.green(`\n ${trans}`)
})
output += '\n'
}
// phonetic
result = $('div#phrsListTab > h2.wordbook-js > div.baav > span').text().replace(/\s+/g, ' ') +
'\n\n' + result + '\n' + sentenceSample
return result

if (result.webTranslations && result.webTranslations.length) {
output += chalk.gray('\n网络释义:')
result.webTranslations.forEach(item => {
output += chalk.blue(`\n ${item.key}`)
output += chalk.green(`\n ${item.values.join('; ')}`)
})
output += '\n'
}

return output
}

module.exports = {
parser,
format
}
module.exports = parser
33 changes: 26 additions & 7 deletions test/parser.test.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
const expect = require('chai').expect
const Parser = require('../lib/parser')
const { parser, format } = require('../lib/parser')
const fs = require('fs')
const path = require('path')
const expectedOutput = `英 [həˈləʊ] 美 [helˈō] \n\nint. 喂;哈罗\nn. 表示问候, 惊奇或唤起注意时的用语\nn. (Hello)人名;(法)埃洛\n\nHello, who\'s speaking, please?\n喂, 请问你是谁呀?\nThe American walked to a telephone booth,"Hello. Is that the bank?\n那个美国人走到公用电话间旁打电话:"喂,银行吗?\nShe never passes without stopping to say hello.\n她从这儿经过时没有一次不停下来问候一番。\n`
const expectedOutput2 = `\n\n hello ; hi ; how do you do \n`

describe('Unit tests for parser', () => {
it('Test for parsing html page content when word is not Chinese', (done) => {
it('Should parse English word correctly', (done) => {
const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/body.html'))
expect(Parser.parse(false, body)).to.equal(expectedOutput)
const result = parser(body, 'hello')

expect(result).to.be.an('object')
if (result.pronounces && result.pronounces.length) {
expect(result.pronounces).to.be.an('array')
const output = format(result)
expect(output).to.include('发音')
}

expect(result).to.have.property('translations')
expect(result.translations).to.be.an('array')
const output = format(result)
expect(output).to.be.a('string')
expect(output).to.include('翻译')
done()
})

it('Test for parsing html page content when word is Chinese', (done) => {
it('Should parse Chinese word correctly', (done) => {
const body = fs.readFileSync(path.join(__dirname, '../test/fixtures/cn-body.html'))
expect(Parser.parse(true, body)).to.equal(expectedOutput2)
const result = parser(body, '你好')

expect(result).to.have.property('translations')
expect(result.translations).to.be.an('array')

const output = format(result)
expect(output).to.be.a('string')
expect(output).to.include('翻译')
done()
})
})

0 comments on commit 7748418

Please sign in to comment.