diff --git a/.eslintrc.json b/.eslintrc.json index bef0e9e202..0ccafafbf4 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -30,6 +30,9 @@ "addons/xterm-addon-serialize/benchmark/tsconfig.json", "addons/xterm-addon-unicode11/src/tsconfig.json", "addons/xterm-addon-unicode11/test/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/src/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/test/tsconfig.json", + "addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json", "addons/xterm-addon-web-links/src/tsconfig.json", "addons/xterm-addon-web-links/test/tsconfig.json", "addons/xterm-addon-webgl/src/tsconfig.json", @@ -38,6 +41,7 @@ "sourceType": "module" }, "ignorePatterns": [ + "addons/*/src/third-party/*.ts", "**/inwasm-sdks/*", "**/typings/*.d.ts", "**/node_modules", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb003eb7d8..eea525d063 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,8 @@ jobs: ./addons/xterm-addon-serialize/out-test/* \ ./addons/xterm-addon-unicode11/out/* \ ./addons/xterm-addon-unicode11/out-test/* \ + ./addons/xterm-addon-unicode-graphemes/out/* \ + ./addons/xterm-addon-unicode-graphemes/out-test/* \ ./addons/xterm-addon-web-links/out/* \ ./addons/xterm-addon-web-links/out-test/* \ ./addons/xterm-addon-webgl/out/* \ @@ -68,6 +70,8 @@ jobs: yarn --frozen-lockfile yarn install-addons - name: Lint code + env: + NODE_OPTIONS: --max_old_space_size=4096 run: yarn lint - name: Lint API run: yarn lint-api diff --git a/addons/xterm-addon-unicode-graphemes/.gitignore b/addons/xterm-addon-unicode-graphemes/.gitignore new file mode 100644 index 0000000000..03c051b3c8 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/.gitignore @@ -0,0 +1,3 @@ +lib +node_modules +out-benchmark diff --git a/addons/xterm-addon-unicode-graphemes/.npmignore b/addons/xterm-addon-unicode-graphemes/.npmignore new file mode 100644 index 0000000000..b203232aff --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/.npmignore @@ -0,0 +1,29 @@ +# Blacklist - exclude everything except npm defaults such as LICENSE, etc +* +!*/ + +# Whitelist - lib/ +!lib/**/*.d.ts + +!lib/**/*.js +!lib/**/*.js.map + +!lib/**/*.css + +# Whitelist - src/ +!src/**/*.ts +!src/**/*.d.ts + +!src/**/*.js +!src/**/*.js.map + +!src/**/*.css + +# Blacklist - src/ test files +src/**/*.test.ts +src/**/*.test.d.ts +src/**/*.test.js +src/**/*.test.js.map + +# Whitelist - typings/ +!typings/*.d.ts diff --git a/addons/xterm-addon-unicode-graphemes/LICENSE b/addons/xterm-addon-unicode-graphemes/LICENSE new file mode 100644 index 0000000000..b6c38b1547 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2023, The xterm.js authors (https://github.com/xtermjs/xterm.js) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/addons/xterm-addon-unicode-graphemes/README.md b/addons/xterm-addon-unicode-graphemes/README.md new file mode 100644 index 0000000000..d5922a6fd9 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/README.md @@ -0,0 +1,24 @@ +## xterm-addon-unicode-graphemes + +⚠️ **This addon is currently experimental and may introduce unexpected and non-standard behavior** + +An addon providing enhanced Unicode support (include grapheme clustering) for xterm.js. + +The file `src/UnicodeProperties.ts` is generated and depends on the Unicode version. See [the unicode-properties project](https://github.com/PerBothner/unicode-properties) for credits and re-generation instructions. + +### Install + +```bash +npm install --save xterm-addon-unicode-graphemes +``` + +### Usage + +```ts +import { Terminal } from 'xterm'; +import { UnicodeGraphemeAddon } from 'xterm-addon-unicode-graphemes'; + +const terminal = new Terminal(); +const unicodeGraphemeAddon = new UnicodeGraphemeAddon(); +terminal.loadAddon(unicodeGraphemeAddon); +``` diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/UnicodeGraphemeAddon.benchmark.ts b/addons/xterm-addon-unicode-graphemes/benchmark/UnicodeGraphemeAddon.benchmark.ts new file mode 100644 index 0000000000..6bdd3a9267 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/UnicodeGraphemeAddon.benchmark.ts @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { perfContext, before, ThroughputRuntimeCase } from 'xterm-benchmark'; + +import { spawn } from 'node-pty'; +import { Utf8ToUtf32, stringFromCodePoint } from 'common/input/TextDecoder'; +import { Terminal } from 'browser/Terminal'; +import { UnicodeGraphemeProvider } from 'UnicodeGraphemeProvider'; + + +function fakedAddonLoad(terminal: any): void { + // resembles what UnicodeGraphemesAddon.activate does under the hood + terminal.unicodeService.register(new UnicodeGraphemeProvider()); + terminal.unicodeService.activeVersion = '15-graphemes'; +} + + +perfContext('Terminal: ls -lR /usr/lib', () => { + let content = ''; + let contentUtf8: Uint8Array; + + before(async () => { + // grab output from "ls -lR /usr" + const p = spawn('ls', ['--color=auto', '-lR', '/usr/lib'], { + name: 'xterm-256color', + cols: 80, + rows: 25, + cwd: process.env.HOME, + env: process.env, + encoding: (null as unknown as string) // needs to be fixed in node-pty + }); + const chunks: Buffer[] = []; + let length = 0; + p.on('data', data => { + chunks.push(data as unknown as Buffer); + length += data.length; + }); + await new Promise(resolve => p.on('exit', () => resolve())); + contentUtf8 = Buffer.concat(chunks, length); + // translate to content string + const buffer = new Uint32Array(contentUtf8.length); + const decoder = new Utf8ToUtf32(); + const codepoints = decoder.decode(contentUtf8, buffer); + for (let i = 0; i < codepoints; ++i) { + content += stringFromCodePoint(buffer[i]); + // peek into content to force flat repr in v8 + if (!(i % 10000000)) { + content[i]; + } + } + }); + + perfContext('write/string/async', () => { + let terminal: Terminal; + before(() => { + terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); + fakedAddonLoad(terminal); + }); + new ThroughputRuntimeCase('', async () => { + await new Promise(res => terminal.write(content, res)); + return { payloadSize: contentUtf8.length }; + }, { fork: false }).showAverageThroughput(); + }); + + perfContext('write/Utf8/async', () => { + let terminal: Terminal; + before(() => { + terminal = new Terminal({ cols: 80, rows: 25, scrollback: 1000 }); + }); + new ThroughputRuntimeCase('', async () => { + await new Promise(res => terminal.write(content, res)); + return { payloadSize: contentUtf8.length }; + }, { fork: false }).showAverageThroughput(); + }); +}); diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json new file mode 100644 index 0000000000..1333eac883 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/benchmark.json @@ -0,0 +1,19 @@ +{ + "APP_PATH": ".benchmark", + "evalConfig": { + "tolerance": { + "*": [0.75, 1.5], + "*.dev": [0.01, 1.5], + "*.cv": [0.01, 1.5], + "EscapeSequenceParser.benchmark.js.*.averageThroughput.mean": [0.9, 5] + }, + "skip": [ + "*.median", + "*.runs", + "*.dev", + "*.cv", + "EscapeSequenceParser.benchmark.js.*.averageRuntime", + "Terminal.benchmark.js.*.averageRuntime" + ] + } +} diff --git a/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json b/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json new file mode 100644 index 0000000000..9bc532d3fe --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/benchmark/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "lib": ["dom", "es6"], + "outDir": "../out-benchmark", + "types": ["../../../node_modules/@types/node"], + "moduleResolution": "node", + "strict": false, + "target": "es2015", + "module": "commonjs", + "baseUrl": ".", + "paths": { + "common/*": ["../../../src/common/*"], + "browser/*": ["../../../src/browser/*"], + "UnicodeGraphemeProvider": ["../src/UnicodeGraphemeProvider"] + } + }, + "include": ["../**/*", "../../../typings/xterm.d.ts"], + "exclude": ["../../../**/*test.ts", "../../**/*api.ts"], + "references": [ + { "path": "../../../src/common" }, + { "path": "../../../src/browser" } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/package.json b/addons/xterm-addon-unicode-graphemes/package.json new file mode 100644 index 0000000000..38ea7271f2 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/package.json @@ -0,0 +1,29 @@ +{ + "name": "xterm-addon-unicode-graphemes", + "version": "0.1.0", + "author": { + "name": "The xterm.js authors", + "url": "https://xtermjs.org/" + }, + "main": "lib/xterm-addon-unicode-graphemes.js", + "types": "typings/xterm-addon-unicode-graphemes.d.ts", + "repository": "https://github.com/xtermjs/xterm.js/tree/master/addons/xterm-addon-unicode-graphemes", + "license": "MIT", + "keywords": [ + "terminal", + "xterm", + "xterm.js" + ], + "scripts": { + "build": "../../node_modules/.bin/tsc -p .", + "prepackage": "npm run build", + "package": "../../node_modules/.bin/webpack", + "prepublishOnly": "npm run package", + "benchmark": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json out-benchmark/benchmark/*benchmark.js", + "benchmark-baseline": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --baseline out-benchmark/benchmark/*benchmark.js", + "benchmark-eval": "NODE_PATH=../../out:./out:./out-benchmark/ ../../node_modules/.bin/xterm-benchmark -r 5 -c benchmark/benchmark.json --eval out-benchmark/benchmark/*benchmark.js" + }, + "peerDependencies": { + "xterm": "^5.0.0" + } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts new file mode 100644 index 0000000000..39fbec8407 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemeProvider.ts @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2023 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { IUnicodeVersionProvider } from 'xterm'; +import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; +import * as UC from './third-party/UnicodeProperties'; + +export class UnicodeGraphemeProvider implements IUnicodeVersionProvider { + public readonly version; + public ambiguousCharsAreWide: boolean = false; + public readonly handleGraphemes: boolean; + + constructor(handleGraphemes: boolean = true) { + this.version = handleGraphemes ? '15-graphemes' : '15'; + this.handleGraphemes = handleGraphemes; + } + + private static readonly _plainNarrowProperties: UnicodeCharProperties + = UnicodeService.createPropertyValue(UC.GRAPHEME_BREAK_Other, 1, false); + + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + // Optimize the simple ASCII case, under the condition that + // UnicodeService.extractCharKind(preceding) === GRAPHEME_BREAK_Other + // (which also covers the case that preceding === 0). + if ((codepoint >= 32 && codepoint < 127) && (preceding >> 3) === 0) { + return UnicodeGraphemeProvider._plainNarrowProperties; + } + + let charInfo = UC.getInfo(codepoint); + let w = UC.infoToWidthInfo(charInfo); + let shouldJoin = false; + if (w >= 2) { + // Treat emoji_presentation_selector as WIDE. + w = w === 3 || this.ambiguousCharsAreWide || codepoint === 0xfe0f ? 2 : 1; + } else { + w = 1; + } + if (preceding !== 0) { + const oldWidth = UnicodeService.extractWidth(preceding); + if (this.handleGraphemes) { + charInfo = UC.shouldJoin(UnicodeService.extractCharKind(preceding), charInfo); + } else { + charInfo = w === 0 ? 1 : 0; + } + shouldJoin = charInfo > 0; + if (shouldJoin) { + if (oldWidth > w) { + w = oldWidth; + } else if (charInfo === 32) { // UC.GRAPHEME_BREAK_SAW_Regional_Pair) + w = 2; + } + } + } + return UnicodeService.createPropertyValue(charInfo, w, shouldJoin); + } + + public wcwidth(codepoint: number): UnicodeCharWidth { + const charInfo = UC.getInfo(codepoint); + const w = UC.infoToWidthInfo(charInfo); + const kind = (charInfo & UC.GRAPHEME_BREAK_MASK) >> UC.GRAPHEME_BREAK_SHIFT; + if (kind === UC.GRAPHEME_BREAK_Extend || kind === UC.GRAPHEME_BREAK_Prepend) { + return 0; + } + if (w >= 2 && (w === 3 || this.ambiguousCharsAreWide)) { + return 2; + } + return 1; + } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts new file mode 100644 index 0000000000..80290edfd6 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/UnicodeGraphemesAddon.ts @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2023 The xterm.js authors. All rights reserved. + * @license MIT + * + * UnicodeVersionProvider for V15 with grapeme cluster handleing. + */ + +import { Terminal, ITerminalAddon, IUnicodeHandling } from 'xterm'; +import { UnicodeGraphemeProvider } from './UnicodeGraphemeProvider'; + + +export class UnicodeGraphemesAddon implements ITerminalAddon { + private _provider15Graphemes?: UnicodeGraphemeProvider; + private _provider15?: UnicodeGraphemeProvider; + private _unicode?: IUnicodeHandling; + private _oldVersion: string = ''; + + public activate(terminal: Terminal): void { + if (! this._provider15) { + this._provider15 = new UnicodeGraphemeProvider(false); + } + if (! this._provider15Graphemes) { + this._provider15Graphemes = new UnicodeGraphemeProvider(true); + } + const unicode = terminal.unicode; + this._unicode = unicode; + unicode.register(this._provider15); + unicode.register(this._provider15Graphemes); + this._oldVersion = unicode.activeVersion; + unicode.activeVersion = '15-graphemes'; + } + + public dispose(): void { + if (this._unicode) { + this._unicode.activeVersion = this._oldVersion; + } + } +} diff --git a/addons/xterm-addon-unicode-graphemes/src/third-party/UnicodeProperties.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/UnicodeProperties.ts new file mode 100644 index 0000000000..0ee147f85a --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/third-party/UnicodeProperties.ts @@ -0,0 +1,147 @@ +import UnicodeTrie from './unicode-trie'; +const trieRaw = "AAARAAAAAABwxwAAAb4LQfTtmw+sVmUdx58LL/ffe/kjzNBV80gW1F3yR+6CvbJiypoZa0paWmAWSluErSBbFtYkkuZykq6QamGJ4WRqo2kFGy6dYWtEq6G1MFAJbRbOVTQr+x7f5+x97q/n/3me87wXzm/3s+f/7/d7/p7znnvOlvGMbQM7wIPgEbAPHABPgcPgefAS+BfYwuv/F/Q2OulBxKcK6TMRPxu8FcwFbwcjYCFYDC4Cl4ArwNXgGvBJsA58UdBDwy+jbBO4La8DtoEd4H7wkNBuN+KPgn3gADgIngaHwFHwF/AyeAWMm4C+TGi3LdiJ/EnIex04A2RgFpgD5oKFYDG4CLwHXAo+IKSvAqt4/evA9bz9jWA6+Cq3dyvCP8HWNwX93wF38/ROcD94SCjP2+1B+BiPP4HwgOD/7xD/I08fRniMx48jPAFeBeuF+n29jE0G08FZvaPHYWZvh9mcEfAOjlhXx/qGfd2QvLO3zccmtMnzliC9lPt+GenD1nyMiK/LNf1cycs+gfAzPJ6vtxe4jhuQtx5sBLeA28G3eb3v8/Beif4HkPewxu5G6N/rMP4qfgEdvwZPgj+AZ8Cx3nYfxiE8Dk6AV0FfH/YEOB28AbwJDIPzQAtcAC4Gl/Z19F+J+NVCehWPr0b46b7RvixvdPg8yr7U10l/BfFN4La8DdgGdoAHwU/AI2AfOACeAofB8+AlcAKwfvyBKeCM/o7NrF9PXmdWv9/Ynot2I7ztIg8dF5I2a8i63CjZU+9Fm2Wcy4U4ZQVYyeOrwVoev57UuxHcJKRvFuJXgnU8/nUebtbYrKmpCUOx31P7UVNTU1NTU1NTU1OGLTz8Xr/77+W7+9vP0or0MxPMbXaizY8FW3sQ3wseB/t5/kGEh8DR/vbzwL8i/Af4Dy8fP8BYE0weaKenI/wV/DhrQG97JspngzlgLpgHzgPzwUhdVpfVZXVZXRa87HxwAVgQ4Pn5WEd85l5TUzOasvezFw/E3b/LoP9D4CpwrcTWWsGXNQOj748/G9k3G56d1KYxmbELwQbwKFiJvBM8nDWlHa5E+AOwCzwLzjkNeeB28NvTeB1OYyr0gQ1g99R23nGE50xj7MPgc+A+8K5Bxj4FHgB/G2z/T9XEzCZjd/S0WYX4Pc3/r/Nn5I0f6qQXIP5x8ENwBMyYyNhHJ3b0pOCuLrBvM941NTU1JyNHEp+BrC8dMyalt1/m3uWfhmeULzRGp9d3wf0WZSN8+prCr60Wz09tuNmx35sl9Y825HXvRN39KNveaL8flb9f913kbec67kHeTsR3gYcH2uV7ED4m2HhCYi/X9ZuBzvuXv0f8iKIfx5B/XCg7gTgbVPdvAsomCuWnD45eK28UyvL3Jt+s0fU2TVnOXJQvJHUWIb0ELAWXgCt4+UcMumSsEtpch/g6ouMGpG/ieZsc9N/q4YsLd3D9WyPbsWEbfNgO7hN82TWY/n8xKbmsC3xQsYKf+7sjrx2TH+u4H3vhx+OO6+X9hmtXN7C/4r15EPaeBs9J7L7YBeeED/k7wn8fbIf/Rji+yVizmd4vW6bB19cb/PU9w7MxMA60bzPHgM8+zG623+OnzOf55yNc3Gw/k303wveBy3nZcoTXgNVgLfiCRNcG5N3SbIebwZ08fhe4l8d/BH7K4yI/4+HPwS/BAfBks+PzIaHuc3x+ivSL4GUyZ68I6fwZYRNMG2qnz+Th2QjfMtTx/1zE5w61nyN+Q7C3aKgdin1dgrylYBn4INdhGn/Z2FfFiqH01/SUXMvnPD+jC+j85N/RqRhR/DYaS6T+P09K1mD+vzW+5zVqqeVUl0wTz2lK8odJHRGXfBufdGLSoSo3+ZFJ6sl0qvJVNmhI4z4i06mrZ6uT1le1z5h5HE3tMiHPtQ5javu+ItMXUr/MXpmwmyRL3D6U7UwIMyYfczGu0qdqb2pbhcw4xQkhWQBMerrZ/liXrGTbsQwTwrEu4zSczKLrd7fCSKiKn+zSo8BWXMe8myXWOivrUxWi60OPoQ7VIasbQ0S/Ukk3rZVullNhHEL1rYoxUF0PTfm6elWJzq54ZsU4z11ohOy0oxT2izFqCNj4TesXcWZo6+Jfqr1O+1O1beqDagypj2J9F1u2daucj3Eknmq/6PaHrK7Mb1o35DiW1a/a76LuhlDXZX25SOz11S33ErKxDb2/fc/bFKI6axskn+4/W90u9mOtbRf7smsoTdvOfwoRz0t6DaP9k81v6P7Re5aUQudTd303rX+bZzBl97/KR7E+Xbux9lLI+aNr1PfaYLpPDiW2/vrYTX1drMIeXbMye6HXlw8292Jl7ZXxLxRlxXbcaH9drjFlxfa3Qozx8NWRi834lPVZbD+SmN7EJPzc9TVCSVXXDps9L+513b2J7fMu176V2YOhx1A3JrJ8KrLxUumpcu5j/lYT+2tzLRVDZmhjO442a1Clu0ox9VPVXzE/lcS4V0k1D6LI1pJsz8fct9SGbO5l/rmKzTlvsxdj3IvRtC2uv0t1fotltvd2VaCy5Sp5m0EhnZG4CCNxXZrWp/VUIrOjapfnNw11ZNI0V/GWzKNuxtzGKKTEtJeR0NVmpojbtBuW5On0u0is9ZMxvU8ZM+8vEyadtu10oqtP9Q4rcJEm85+Two/QkpGwjI6YkgkhtUfzZOW6fFVexuRri+qj9TJJHZkdmW5abiu0rs6uj2TMfmx06bISUj9tZ9Lja8dVQtox6WpxTJKfW3M4MSTmvU4sWy1CU6BF4jIfdNeDjHWuO1lCWIm2Jr2ixNZvklD2fP0Q6+vsmO4hqN1hJvfDtV5G8mTlsvau4qPP1a64L1skT6QYEzEtq0PzGZOfCbSdSmcKTP7Qs86Ej/1hEpelaV6IMdT5ayu2+nT9tmnnO746XbLxE8t0qOrYtJWhmk9bvaLfsrotRVw1PnR+bcafSUKZ6Mps7smobybJLH2R6WqRkJa1DHV0UmbfUcksiSF0HExSpp+uY0zbTklMaCm7blzEtg8h1rNMXNaYi05ZXsbC75sQ/4+aUxFV2jL50Q3jE0rK2rVtN09By8OHoo1vH2LPSdE323mr2sdu0pUZiDkWLRKWnfeQY6taKzHF9n/GPv8jd/0/egiRvYMR24fU79iY3s9Qva9RlYR8n8HHtq9fMcT1HRWfdZXiHd9YInt/iI4PTaf+BimXKvdXYU+3hlRpHzs2dVK/cxhDn+xs0I2jzxjL5kpXz1VU72aLtkK/97sALKyQqu25SshvG6h08/cLrlKswRklKXvvXfa+pZt+y8nah5YUv2Oo/ap/X2URdRfico9K69hcp6r6XaCz5Wo/hs/iNTGF6N6tV92/9ZS0Wba9SlT3pKF/e6W674+x9ly+VRL73cPU8ygb31D3eSqfVd+iqET0y3YMYojoO11XqrTt2nPxmeq1HYeqxkmUMt8DiesjpoTSr+qDrD+qPZDiOZxMdH0pRPX8MFUfQtv0Xbs+a1a1NnRryNZ/2+tsaPG5ZoX0RXZei88yZGdo4UMPj/cwv/kMJboxLISuQbE+1VW12Mx7FWOrW3M9Hv7Y+uxyraPSo8B2TGPuLdOeZha+hBKf8Sjsm/oR+7pmsx/oeOraFWdXleeV6oyl41zm+mgSuq9C6ox1TsU8D+m4dwMmf8v2nz7Tm+fYfj7HV1K/x1HWjquvY+2dllxM64ue87Su772zzbXIVC+WxLZTRR9MdkMTypZNH1z6G0tUvoccwxA+hfLNdV+a7MaQqscztMi+7QnxDZXvd1dldWQOyMbApb1Jd2h91Ffx+y9Xfb7tClokboOvrRhrbVpFFO8z+65t2/u4su9MUx028znH01/TGVDmHAj13W1o+1USw+eUfYtpO+b82rRNsb6oPpV+1fdBqddB6n3WDXvdJDZrJ0QfQp6bsc/kqq4BIddHWXGdN1pmWveh58F1zYUW1zmOITHXWOg1XrZvZSWUf77tq1ofqear6muaT1lIQp3bofabSafJVlnfYo9B6LGr8uzz2Xchvzfw+T9PlgiV/A8="; + +declare const Buffer: any; +function _dec(s: string): Uint8Array { + if (typeof Buffer !== 'undefined') return Buffer.from(s, 'base64'); + const bs = atob(s); + const r = new Uint8Array(bs.length); + for (let i = 0; i < r.length; ++i) r[i] = bs.charCodeAt(i); + return r; +} + +const trieData = new UnicodeTrie(_dec(trieRaw)); +export const GRAPHEME_BREAK_MASK = 0xF; +export const GRAPHEME_BREAK_SHIFT = 0; +export const CHARWIDTH_MASK = 0x30; +export const CHARWIDTH_SHIFT = 4; + +// Values for the GRAPHEME_BREAK property +export const GRAPHEME_BREAK_Other = 0; // includes CR, LF, Control +export const GRAPHEME_BREAK_Prepend = 1; +export const GRAPHEME_BREAK_Extend = 2; +export const GRAPHEME_BREAK_Regional_Indicator = 3; +export const GRAPHEME_BREAK_SpacingMark = 4; +export const GRAPHEME_BREAK_Hangul_L = 5; +export const GRAPHEME_BREAK_Hangul_V = 6; +export const GRAPHEME_BREAK_Hangul_T = 7; +export const GRAPHEME_BREAK_Hangul_LV = 8; +export const GRAPHEME_BREAK_Hangul_LVT = 9; +export const GRAPHEME_BREAK_ZWJ = 10; +export const GRAPHEME_BREAK_ExtPic = 11; + +// Only used as return value from shouldJoin/shouldJoinBackwards. +// (Must be positive; distinct from other values; +// and become GRAPHEME_BREAK_Other when masked with GRAPHEME_BREAK_MASK.) +const GRAPHEME_BREAK_SAW_Regional_Pair = 32; + +export const CHARWIDTH_NORMAL = 0; +export const CHARWIDTH_FORCE_1COLUMN = 1; +export const CHARWIDTH_EA_AMBIGUOUS = 2; +export const CHARWIDTH_WIDE = 3; + +// In the following 'info' is an encoded value from trie.get(codePoint) + +// In the following 'info' is an encoded value from trie.get(codePoint) + +export function infoToWidthInfo(info: number): number { + return (info & CHARWIDTH_MASK) >> CHARWIDTH_SHIFT; +} + +export function infoToWidth(info: number, ambiguousIsWide = false): 0 | 1 |2 { + const v = infoToWidthInfo(info); + return v < CHARWIDTH_EA_AMBIGUOUS ? 1 + : v >= CHARWIDTH_WIDE || ambiguousIsWide ? 2 : 1; +} + +export function strWidth(str: string, preferWide: boolean): number { + let width = 0; + for (let i = 0; i < str.length;) { + const codePoint = str.codePointAt(i) as number; + width += infoToWidth(getInfo(codePoint), preferWide); + i += (codePoint <= 0xffff) ? 1 : 2; + } + return width; +} + +export function columnToIndexInContext(str: string, startIndex: number, column: number, preferWide: boolean): number { + let rv = 0; + for (let i = startIndex; ;) { + if (i >= str.length) + return i; + const codePoint = str.codePointAt(i) as number; + const w = infoToWidth(getInfo(codePoint), preferWide); + rv += w; + if (rv > column) + return i; + i += (codePoint <= 0xffff) ? 1 : 2; + } +} + + +// Test if should break between beforeState and afterCode. +// Return <= 0 if should break; > 0 if should join. +// 'beforeState' is the return value from the previous possible break; +// the value 0 is start of string. +// 'afterCode' is the GRAPHEME_BREAK_Xxx value for the following codepoint. +export function shouldJoin(beforeState: number, afterInfo: number): number { + let beforeCode = (beforeState & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + let afterCode = (afterInfo & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + if (_shouldJoin(beforeCode, afterCode)) { + if (afterCode === GRAPHEME_BREAK_Regional_Indicator) + return GRAPHEME_BREAK_SAW_Regional_Pair; + else + return afterCode + 16; + } else + return afterCode - 16; +} + +export function shouldJoinBackwards(beforeInfo: number, afterState: number): number { + let afterCode = (afterState & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + let beforeCode = (beforeInfo & GRAPHEME_BREAK_MASK) >> GRAPHEME_BREAK_SHIFT; + if (_shouldJoin(beforeCode, afterCode)) { + if (beforeCode === GRAPHEME_BREAK_Regional_Indicator) + return GRAPHEME_BREAK_SAW_Regional_Pair; + else + return beforeCode + 16; + } else + return beforeCode - 16; +} + +/** Doesn't handle an odd number of RI characters. */ +function _shouldJoin(beforeCode: number, afterCode: number): boolean { + if (beforeCode >= GRAPHEME_BREAK_Hangul_L + && beforeCode <= GRAPHEME_BREAK_Hangul_LVT) { + if (beforeCode == GRAPHEME_BREAK_Hangul_L // GB6 + && (afterCode == GRAPHEME_BREAK_Hangul_L + || afterCode == GRAPHEME_BREAK_Hangul_V + || afterCode == GRAPHEME_BREAK_Hangul_LV + || afterCode == GRAPHEME_BREAK_Hangul_LVT)) + return true; + if ((beforeCode == GRAPHEME_BREAK_Hangul_LV // GB7 + || beforeCode == GRAPHEME_BREAK_Hangul_V) + && (afterCode == GRAPHEME_BREAK_Hangul_V + || afterCode == GRAPHEME_BREAK_Hangul_T)) + return true; + if ((beforeCode == GRAPHEME_BREAK_Hangul_LVT // GB8 + || beforeCode == GRAPHEME_BREAK_Hangul_T) + && afterCode == GRAPHEME_BREAK_Hangul_T) + return true; + } + if (afterCode == GRAPHEME_BREAK_Extend // GB9 + || afterCode == GRAPHEME_BREAK_ZWJ + || beforeCode == GRAPHEME_BREAK_Prepend // GB9a + || afterCode == GRAPHEME_BREAK_SpacingMark) // GB9b + return true; + if (beforeCode == GRAPHEME_BREAK_ZWJ // GB11 + && afterCode == GRAPHEME_BREAK_ExtPic) + return true; + if (afterCode == GRAPHEME_BREAK_Regional_Indicator // GB12, GB13 + && beforeCode == GRAPHEME_BREAK_Regional_Indicator) + return true; + return false; +} + +export function getInfo(codePoint: number): number { + return trieData.get(codePoint); +} diff --git a/addons/xterm-addon-unicode-graphemes/src/third-party/tiny-inflate.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/tiny-inflate.ts new file mode 100644 index 0000000000..a8d2e8a4bd --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/third-party/tiny-inflate.ts @@ -0,0 +1,380 @@ +var TINF_OK = 0; +var TINF_DATA_ERROR = -3; + +class Tree { + table = new Uint16Array(16); /* table of code length counts */ + trans = new Uint16Array(288); /* code -> symbol translation table */ +}; + +class Data { + tag: number = 0; + bitcount: number = 0; + destLen: number = 0; + ltree: Tree; + dtree: Tree; + source: Uint8Array; + dest: Uint8Array; + sourceIndex: number = 0; + + constructor(source: Uint8Array, dest: Uint8Array) { + this.source = source; + this.dest = dest; + this.ltree = new Tree(); /* dynamic length/symbol tree */ + this.dtree = new Tree(); /* dynamic distance tree */ + } +} + +/* --------------------------------------------------- * + * -- uninitialized global data (static structures) -- * + * --------------------------------------------------- */ + +var sltree = new Tree(); +var sdtree = new Tree(); + +/* extra bits and base tables for length codes */ +var length_bits = new Uint8Array(30); +var length_base = new Uint16Array(30); + +/* extra bits and base tables for distance codes */ +var dist_bits = new Uint8Array(30); +var dist_base = new Uint16Array(30); + +/* special ordering of code length codes */ +var clcidx = new Uint8Array([ + 16, 17, 18, 0, 8, 7, 9, 6, + 10, 5, 11, 4, 12, 3, 13, 2, + 14, 1, 15 +]); + +/* used by tinf_decode_trees, avoids allocations every call */ +const code_tree = new Tree(); +const lengths = new Uint8Array(288 + 32); + +/* ----------------------- * + * -- utility functions -- * + * ----------------------- */ + +/* build extra bits and base tables */ +function tinf_build_bits_base(bits: Uint8Array, base: Uint16Array, delta: number, first: number): void { + var i, sum; + + /* build bits table */ + for (i = 0; i < delta; ++i) bits[i] = 0; + for (i = 0; i < 30 - delta; ++i) bits[i + delta] = i / delta | 0; + + /* build base table */ + for (sum = first, i = 0; i < 30; ++i) { + base[i] = sum; + sum += 1 << bits[i]; + } +} + +/* build the fixed huffman trees */ +function tinf_build_fixed_trees(lt: Tree, dt: Tree): void { + var i; + + /* build fixed length tree */ + for (i = 0; i < 7; ++i) lt.table[i] = 0; + + lt.table[7] = 24; + lt.table[8] = 152; + lt.table[9] = 112; + + for (i = 0; i < 24; ++i) lt.trans[i] = 256 + i; + for (i = 0; i < 144; ++i) lt.trans[24 + i] = i; + for (i = 0; i < 8; ++i) lt.trans[24 + 144 + i] = 280 + i; + for (i = 0; i < 112; ++i) lt.trans[24 + 144 + 8 + i] = 144 + i; + + /* build fixed distance tree */ + for (i = 0; i < 5; ++i) dt.table[i] = 0; + + dt.table[5] = 32; + + for (i = 0; i < 32; ++i) dt.trans[i] = i; +} + +/* given an array of code lengths, build a tree */ +var offs = new Uint16Array(16); + +function tinf_build_tree(t: Tree, lengths: Uint8Array, off: number, num: number): void { + var i, sum; + + /* clear code length count table */ + for (i = 0; i < 16; ++i) t.table[i] = 0; + + /* scan symbol lengths, and sum code length counts */ + for (i = 0; i < num; ++i) t.table[lengths[off + i]]++; + + t.table[0] = 0; + + /* compute offset table for distribution sort */ + for (sum = 0, i = 0; i < 16; ++i) { + offs[i] = sum; + sum += t.table[i]; + } + + /* create code->symbol translation table (symbols sorted by code) */ + for (i = 0; i < num; ++i) { + if (lengths[off + i]) t.trans[offs[lengths[off + i]]++] = i; + } +} + +/* ---------------------- * + * -- decode functions -- * + * ---------------------- */ + +/* get one bit from source stream */ +function tinf_getbit(d: Data): number { + /* check if tag is empty */ + if (!d.bitcount--) { + /* load next tag */ + d.tag = d.source[d.sourceIndex++]; + d.bitcount = 7; + } + + /* shift bit out of tag */ + var bit = d.tag & 1; + d.tag >>>= 1; + + return bit; +} + +/* read a num bit value from a stream and add base */ +function tinf_read_bits(d: Data, num: number, base: number): number { + if (!num) + return base; + + while (d.bitcount < 24) { + d.tag |= d.source[d.sourceIndex++] << d.bitcount; + d.bitcount += 8; + } + + var val = d.tag & (0xffff >>> (16 - num)); + d.tag >>>= num; + d.bitcount -= num; + return val + base; +} + +/* given a data stream and a tree, decode a symbol */ +function tinf_decode_symbol(d: Data, t: Tree): number { + while (d.bitcount < 24) { + d.tag |= d.source[d.sourceIndex++] << d.bitcount; + d.bitcount += 8; + } + + var sum = 0, cur = 0, len = 0; + var tag = d.tag; + + /* get more bits while code value is above sum */ + do { + cur = 2 * cur + (tag & 1); + tag >>>= 1; + ++len; + + sum += t.table[len]; + cur -= t.table[len]; + } while (cur >= 0); + + d.tag = tag; + d.bitcount -= len; + + return t.trans[sum + cur]; +} + +/* given a data stream, decode dynamic trees from it */ +function tinf_decode_trees(d: Data, lt: Tree, dt: Tree): void { + var hlit, hdist, hclen; + var i, num, length; + + /* get 5 bits HLIT (257-286) */ + hlit = tinf_read_bits(d, 5, 257); + + /* get 5 bits HDIST (1-32) */ + hdist = tinf_read_bits(d, 5, 1); + + /* get 4 bits HCLEN (4-19) */ + hclen = tinf_read_bits(d, 4, 4); + + for (i = 0; i < 19; ++i) lengths[i] = 0; + + /* read code lengths for code length alphabet */ + for (i = 0; i < hclen; ++i) { + /* get 3 bits code length (0-7) */ + var clen = tinf_read_bits(d, 3, 0); + lengths[clcidx[i]] = clen; + } + + /* build code length tree */ + tinf_build_tree(code_tree, lengths, 0, 19); + + /* decode code lengths for the dynamic trees */ + for (num = 0; num < hlit + hdist;) { + var sym = tinf_decode_symbol(d, code_tree); + + switch (sym) { + case 16: + /* copy previous code length 3-6 times (read 2 bits) */ + var prev = lengths[num - 1]; + for (length = tinf_read_bits(d, 2, 3); length; --length) { + lengths[num++] = prev; + } + break; + case 17: + /* repeat code length 0 for 3-10 times (read 3 bits) */ + for (length = tinf_read_bits(d, 3, 3); length; --length) { + lengths[num++] = 0; + } + break; + case 18: + /* repeat code length 0 for 11-138 times (read 7 bits) */ + for (length = tinf_read_bits(d, 7, 11); length; --length) { + lengths[num++] = 0; + } + break; + default: + /* values 0-15 represent the actual code lengths */ + lengths[num++] = sym; + break; + } + } + + /* build dynamic trees */ + tinf_build_tree(lt, lengths, 0, hlit); + tinf_build_tree(dt, lengths, hlit, hdist); +} + +/* ----------------------------- * + * -- block inflate functions -- * + * ----------------------------- */ + +/* given a stream and two trees, inflate a block of data */ +function tinf_inflate_block_data(d: Data, lt: Tree, dt: Tree): number { + for (;;) { + var sym = tinf_decode_symbol(d, lt); + + /* check for end of block */ + if (sym === 256) { + return TINF_OK; + } + + if (sym < 256) { + d.dest[d.destLen++] = sym; + } else { + var length, dist, offs; + var i; + + sym -= 257; + + /* possibly get more bits from length code */ + length = tinf_read_bits(d, length_bits[sym], length_base[sym]); + + dist = tinf_decode_symbol(d, dt); + + /* possibly get more bits from distance code */ + offs = d.destLen - tinf_read_bits(d, dist_bits[dist], dist_base[dist]); + + /* copy match */ + for (i = offs; i < offs + length; ++i) { + d.dest[d.destLen++] = d.dest[i]; + } + } + } +} + +/* inflate an uncompressed block of data */ +function tinf_inflate_uncompressed_block(d: Data) { + var length, invlength; + var i; + + /* unread from bitbuffer */ + while (d.bitcount > 8) { + d.sourceIndex--; + d.bitcount -= 8; + } + + /* get length */ + length = d.source[d.sourceIndex + 1]; + length = 256 * length + d.source[d.sourceIndex]; + + /* get one's complement of length */ + invlength = d.source[d.sourceIndex + 3]; + invlength = 256 * invlength + d.source[d.sourceIndex + 2]; + + /* check length */ + if (length !== (~invlength & 0x0000ffff)) + return TINF_DATA_ERROR; + + d.sourceIndex += 4; + + /* copy block */ + for (i = length; i; --i) + d.dest[d.destLen++] = d.source[d.sourceIndex++]; + + /* make sure we start next block on a byte boundary */ + d.bitcount = 0; + + return TINF_OK; +} + +/* inflate stream from source to dest */ +function tinf_uncompress(source: Uint8Array, dest: Uint8Array) { + var d = new Data(source, dest); + var bfinal, btype, res; + + do { + /* read final block flag */ + bfinal = tinf_getbit(d); + + /* read block type (2 bits) */ + btype = tinf_read_bits(d, 2, 0); + + /* decompress block */ + switch (btype) { + case 0: + /* decompress uncompressed block */ + res = tinf_inflate_uncompressed_block(d); + break; + case 1: + /* decompress block with fixed huffman trees */ + res = tinf_inflate_block_data(d, sltree, sdtree); + break; + case 2: + /* decompress block with dynamic huffman trees */ + tinf_decode_trees(d, d.ltree, d.dtree); + res = tinf_inflate_block_data(d, d.ltree, d.dtree); + break; + default: + res = TINF_DATA_ERROR; + } + + if (res !== TINF_OK) + throw new Error('Data error'); + + } while (!bfinal); + + if (d.destLen < d.dest.length) { + if (typeof d.dest.slice === 'function') + return d.dest.slice(0, d.destLen); + else + return d.dest.subarray(0, d.destLen); + } + + return d.dest; +} + +/* -------------------- * + * -- initialization -- * + * -------------------- */ + +/* build fixed huffman trees */ +tinf_build_fixed_trees(sltree, sdtree); + +/* build extra bits and base tables */ +tinf_build_bits_base(length_bits, length_base, 4, 3); +tinf_build_bits_base(dist_bits, dist_base, 2, 1); + +/* fix a special case */ +length_bits[28] = 0; +length_base[28] = 258; + +export default tinf_uncompress diff --git a/addons/xterm-addon-unicode-graphemes/src/third-party/unicode-trie.ts b/addons/xterm-addon-unicode-graphemes/src/third-party/unicode-trie.ts new file mode 100644 index 0000000000..2125f04d67 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/third-party/unicode-trie.ts @@ -0,0 +1,134 @@ +import inflate from './tiny-inflate' + +// Shift size for getting the index-1 table offset. +const SHIFT_1 = 6 + 5; + +// Shift size for getting the index-2 table offset. +const SHIFT_2 = 5; + +// Difference between the two shift sizes, +// for getting an index-1 offset from an index-2 offset. 6=11-5 +const SHIFT_1_2 = SHIFT_1 - SHIFT_2; + +// Number of index-1 entries for the BMP. 32=0x20 +// This part of the index-1 table is omitted from the serialized form. +const OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; + +// Number of entries in an index-2 block. 64=0x40 +const INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; + +// Mask for getting the lower bits for the in-index-2-block offset. */ +const INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; + +// Shift size for shifting left the index array values. +// Increases possible data size with 16-bit index values at the cost +// of compactability. +// This requires data blocks to be aligned by DATA_GRANULARITY. +const INDEX_SHIFT = 2; + +// Number of entries in a data block. 32=0x20 +const DATA_BLOCK_LENGTH = 1 << SHIFT_2; + +// Mask for getting the lower bits for the in-data-block offset. +const DATA_MASK = DATA_BLOCK_LENGTH - 1; + +// The part of the index-2 table for U+D800..U+DBFF stores values for +// lead surrogate code _units_ not code _points_. +// Values for lead surrogate code _points_ are indexed with this portion of the table. +// Length=32=0x20=0x400>>SHIFT_2. (There are 1024=0x400 lead surrogates.) +const LSCP_INDEX_2_OFFSET = 0x10000 >> SHIFT_2; +const LSCP_INDEX_2_LENGTH = 0x400 >> SHIFT_2; + +// Count the lengths of both BMP pieces. 2080=0x820 +const INDEX_2_BMP_LENGTH = LSCP_INDEX_2_OFFSET + LSCP_INDEX_2_LENGTH; + +// The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. +// Length 32=0x20 for lead bytes C0..DF, regardless of SHIFT_2. +const UTF8_2B_INDEX_2_OFFSET = INDEX_2_BMP_LENGTH; +const UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; // U+0800 is the first code point after 2-byte UTF-8 + +// The index-1 table, only used for supplementary code points, at offset 2112=0x840. +// Variable length, for code points up to highStart, where the last single-value range starts. +// Maximum length 512=0x200=0x100000>>SHIFT_1. +// (For 0x100000 supplementary code points U+10000..U+10ffff.) +// +// The part of the index-2 table for supplementary code points starts +// after this index-1 table. +// +// Both the index-1 table and the following part of the index-2 table +// are omitted completely if there is only BMP data. +const INDEX_1_OFFSET = UTF8_2B_INDEX_2_OFFSET + UTF8_2B_INDEX_2_LENGTH; + +// The alignment size of a data block. Also the granularity for compaction. +const DATA_GRANULARITY = 1 << INDEX_SHIFT; + +const isBigEndian = (new Uint8Array(new Uint32Array([0x12345678]).buffer)[0] === 0x12); + +class UnicodeTrie { + private data: Uint32Array; + private highStart: number; + private errorValue: number; + constructor(data: Uint8Array) { + // read binary format + + const view = new DataView(data.buffer); + this.highStart = view.getUint32(0, true); + this.errorValue = view.getUint32(4, true); + let uncompressedLength = view.getUint32(8, true); + data = data.subarray(12); + + // double inflate the actual trie data + data = inflate(data, new Uint8Array(uncompressedLength)); + data = inflate(data, new Uint8Array(uncompressedLength)); + + if (isBigEndian) { + // swap bytes from little-endian + const len = data.length; + for (let i = 0; i < len; i += 4) { + // Exchange data[i] and data[i + 3]: + let x = data[i]; data[i] = data[i+3]; data[i+3] = x; + // Exchange data[i + 1] and data[i + 2]: + let y = data[i+1]; data[i+1] = data[i+2]; data[i+2] = y; + } + } + + this.data = new Uint32Array(data.buffer); + + } + + get(codePoint: number): number { + let index; + if ((codePoint < 0) || (codePoint > 0x10ffff)) { + return this.errorValue; + } + + if ((codePoint < 0xd800) || ((codePoint > 0xdbff) && (codePoint <= 0xffff))) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the index. + // data is stored in the index array itself. + index = (this.data[codePoint >> SHIFT_2] << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + index = (this.data[LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> SHIFT_2)] << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + if (codePoint < this.highStart) { + // Supplemental code point, use two-level lookup. + index = this.data[(INDEX_1_OFFSET - OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> SHIFT_1)]; + index = this.data[index + ((codePoint >> SHIFT_2) & INDEX_2_MASK)]; + index = (index << INDEX_SHIFT) + (codePoint & DATA_MASK); + return this.data[index]; + } + + return this.data[this.data.length - DATA_GRANULARITY]; + } +} + +export default UnicodeTrie diff --git a/addons/xterm-addon-unicode-graphemes/src/tsconfig.json b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json new file mode 100644 index 0000000000..60824fee94 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/src/tsconfig.json @@ -0,0 +1,33 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "es2021", + "lib": [ + "dom", + "es2021" + ], + "rootDir": ".", + "outDir": "../out", + "sourceMap": true, + "removeComments": true, + "strict": true, + "baseUrl": ".", + "paths": { + "common/*": [ + "../../../src/common/*" + ] + }, + "types": [ + "../../../node_modules/@types/mocha" + ] + }, + "include": [ + "./**/*", + "../../../typings/xterm.d.ts" + ], + "references": [ + { + "path": "../../../src/common" + } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts new file mode 100644 index 0000000000..e13ad2921c --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/test/UnicodeGraphemesAddon.api.ts @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { openTerminal, launchBrowser } from '../../../out-test/api/TestUtils'; +import { Browser, Page } from '@playwright/test'; + +const APP = 'http://127.0.0.1:3001/test'; + +let browser: Browser; +let page: Page; +const width = 800; +const height = 600; + +describe('UnicodeGraphemesAddon', () => { + before(async function(): Promise { + browser = await launchBrowser(); + page = await (await browser.newContext()).newPage(); + await page.setViewportSize({ width, height }); + }); + + after(async () => { + await browser.close(); + }); + + beforeEach(async function(): Promise { + await page.goto(APP); + await openTerminal(page); + }); + async function evalWidth(str: string): Promise { + return page.evaluate(`window.term._core.unicodeService.getStringCellWidth('${str}')`); + } + const ourVersion = '15-graphemes'; + it('wcwidth V15 emoji test', async () => { + await page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + `); + // should have loaded '15-graphemes' + assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', '15', '15-graphemes']); + // switch should not throw + await page.evaluate(`window.term.unicode.activeVersion = '${ourVersion}';`); + assert.equal(await page.evaluate(`window.term.unicode.activeVersion`), ourVersion); + assert.equal(await evalWidth('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣'), 20, + '10 emoji - width 10 in V6; 20 in V11 or later'); + assert.equal(await evalWidth('\u{1F476}\u{1F3FF}\u{1F476}'), 4, + 'baby with emoji modifier fitzpatrick type-6; baby'); + assert.equal(await evalWidth('\u{1F469}\u200d\u{1f469}\u200d\u{1f466}'), 2, + 'woman+zwj+woman+zwj+boy'); + assert.equal(await evalWidth('=\u{1F3CB}\u{FE0F}=\u{F3CB}\u{1F3FE}\u200D\u2640='), 7, + 'person lifting weights (plain, emoji); woman lighting weights, medium dark'); + assert.equal(await evalWidth('\u{1F469}\u{1F469}\u{200D}\u{1F393}\u{1F468}\u{1F3FF}\u{200D}\u{1F393}'), 6, + 'woman; woman student; man student dark'); + assert.equal(await evalWidth('\u{1f1f3}\u{1f1f4}/'), 3, + 'regional indicator symbol letters N and O, cluster'); + assert.equal(await evalWidth('\u{1f1f3}/\u{1f1f4}'), 3, + 'regional indicator symbol letters N and O, separated'); + assert.equal(await evalWidth('\u0061\u0301'), 1, + 'letter a with acute accent'); + assert.equal(await evalWidth('{\u1100\u1161\u11a8\u1100\u1161}'), 6, + 'Korean Jamo'); + assert.equal(await evalWidth('\uAC00=\uD685='), 6, + 'Hangul syllables (pre-composed)'); + assert.equal(await evalWidth('(\u26b0\ufe0e)'), 3, + 'coffin with text presentation'); + assert.equal(await evalWidth('(\u26b0\ufe0f)'), 4, + 'coffin with emoji presentation'); + assert.equal(await evalWidth(''), 16, + 'Égalité (using separate acute) emoij_presentation'); + }); +}); diff --git a/addons/xterm-addon-unicode-graphemes/test/tsconfig.json b/addons/xterm-addon-unicode-graphemes/test/tsconfig.json new file mode 100644 index 0000000000..4b3cb31cfd --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/test/tsconfig.json @@ -0,0 +1,35 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "es2015", + "lib": [ + "dom", + "es2015" + ], + "rootDir": ".", + "outDir": "../out-test", + "sourceMap": true, + "removeComments": true, + "strict": true, + "baseUrl": ".", + "paths": { + "common/*": [ + "../../../src/common/*" + ] + }, + "types": [ + "../../../node_modules/@types/mocha", + "../../../node_modules/@types/node", + "../../../out-test/api/TestUtils" + ] + }, + "include": [ + "./**/*", + "../../../typings/xterm.d.ts" + ], + "references": [ + { + "path": "../../../src/common" + } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/tsconfig.json b/addons/xterm-addon-unicode-graphemes/tsconfig.json new file mode 100644 index 0000000000..0e7b5c3502 --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/tsconfig.json @@ -0,0 +1,9 @@ +{ + "files": [], + "include": [], + "references": [ + { "path": "./src" }, + { "path": "./test" }, + { "path": "./benchmark" } + ] +} diff --git a/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts new file mode 100644 index 0000000000..e4a333504f --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/typings/xterm-addon-unicode-graphemes.d.ts @@ -0,0 +1,14 @@ +/** + * Copyright (c) 2023 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { Terminal, ITerminalAddon } from 'xterm'; + +declare module 'xterm-addon-unicode-graphemes' { + export class Unicode11Addon implements ITerminalAddon { + constructor(); + public activate(terminal: Terminal): void; + public dispose(): void; + } +} diff --git a/addons/xterm-addon-unicode-graphemes/webpack.config.js b/addons/xterm-addon-unicode-graphemes/webpack.config.js new file mode 100644 index 0000000000..89abf53aad --- /dev/null +++ b/addons/xterm-addon-unicode-graphemes/webpack.config.js @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2019 The xterm.js authors. All rights reserved. + * @license MIT + */ + +const path = require('path'); + +const addonName = 'UnicodeGraphemesAddon'; +const mainFile = 'xterm-addon-unicode-graphemes.js'; + +module.exports = { + entry: `./out/${addonName}.js`, + devtool: 'source-map', + module: { + rules: [ + { + test: /\.js$/, + use: ["source-map-loader"], + enforce: "pre", + exclude: /node_modules/ + } + ] + }, + resolve: { + modules: ['./node_modules'], + extensions: [ '.js' ], + alias: { + common: path.resolve('../../out/common') + } + }, + output: { + filename: mainFile, + path: path.resolve('./lib'), + library: addonName, + libraryTarget: 'umd' + }, + mode: 'production' +}; diff --git a/addons/xterm-addon-unicode11/src/UnicodeV11.ts b/addons/xterm-addon-unicode11/src/UnicodeV11.ts index b616091ab3..c1ef08c169 100644 --- a/addons/xterm-addon-unicode11/src/UnicodeV11.ts +++ b/addons/xterm-addon-unicode11/src/UnicodeV11.ts @@ -4,8 +4,8 @@ */ import { IUnicodeVersionProvider } from 'xterm'; - -type CharWidth = 0 | 1 | 2; +import { UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; const BMP_COMBINING = [ [0x0300, 0x036F], [0x0483, 0x0489], [0x0591, 0x05BD], @@ -210,12 +210,26 @@ export class UnicodeV11 implements IUnicodeVersionProvider { } } - public wcwidth(num: number): CharWidth { + public wcwidth(num: number): UnicodeCharWidth { if (num < 32) return 0; if (num < 127) return 1; - if (num < 65536) return table[num] as CharWidth; + if (num < 65536) return table[num] as UnicodeCharWidth; if (bisearch(num, HIGH_COMBINING)) return 0; if (bisearch(num, HIGH_WIDE)) return 2; return 1; } + + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0 && preceding !== 0; + if (shouldJoin) { + const oldWidth = UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); + } } diff --git a/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts b/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts index c7f3588f33..83c7a61d47 100644 --- a/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts +++ b/addons/xterm-addon-unicode11/test/Unicode11Addon.api.ts @@ -36,7 +36,7 @@ describe('Unicode11Addon', () => { window.term.loadAddon(window.unicode11); `); // should have loaded '11' - assert.deepEqual(await page.evaluate(`window.term.unicode.versions`), ['6', '11']); + assert.deepEqual((await page.evaluate(`window.term.unicode.versions`) as string[]).includes('11'), true); // switch should not throw await page.evaluate(`window.term.unicode.activeVersion = '11';`); assert.deepEqual(await page.evaluate(`window.term.unicode.activeVersion`), '11'); diff --git a/demo/client.ts b/demo/client.ts index 06949a8a25..9daaeacc9f 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -18,6 +18,7 @@ import { SerializeAddon } from '../addons/xterm-addon-serialize/out/SerializeAdd import { WebLinksAddon } from '../addons/xterm-addon-web-links/out/WebLinksAddon'; import { WebglAddon } from '../addons/xterm-addon-webgl/out/WebglAddon'; import { Unicode11Addon } from '../addons/xterm-addon-unicode11/out/Unicode11Addon'; +import { UnicodeGraphemesAddon } from '../addons/xterm-addon-unicode-graphemes/out/UnicodeGraphemesAddon'; import { LigaturesAddon } from '../addons/xterm-addon-ligatures/out/LigaturesAddon'; // Playwright/WebKit on Windows does not support WebAssembly https://stackoverflow.com/q/62311688/1156119 @@ -38,6 +39,7 @@ if ('WebAssembly' in window) { // import { WebLinksAddon } from 'xterm-addon-web-links'; // import { WebglAddon } from 'xterm-addon-webgl'; // import { Unicode11Addon } from 'xterm-addon-unicode11'; +// import { UnicodeGraphemesAddon } from 'xterm-addon-unicode-graphemes'; // import { LigaturesAddon } from 'xterm-addon-ligatures'; // Pulling in the module's types relies on the above, it's looks a @@ -56,6 +58,7 @@ export interface IWindowWithTerminal extends Window { WebLinksAddon?: typeof WebLinksAddon; // eslint-disable-line @typescript-eslint/naming-convention WebglAddon?: typeof WebglAddon; // eslint-disable-line @typescript-eslint/naming-convention Unicode11Addon?: typeof Unicode11Addon; // eslint-disable-line @typescript-eslint/naming-convention + UnicodeGraphemesAddon?: typeof UnicodeGraphemesAddon; // eslint-disable-line @typescript-eslint/naming-convention LigaturesAddon?: typeof LigaturesAddon; // eslint-disable-line @typescript-eslint/naming-convention } declare let window: IWindowWithTerminal; @@ -67,7 +70,7 @@ let socket; let pid; let autoResize: boolean = true; -type AddonType = 'attach' | 'canvas' | 'fit' | 'image' | 'search' | 'serialize' | 'unicode11' | 'webLinks' | 'webgl' | 'ligatures'; +type AddonType = 'attach' | 'canvas' | 'fit' | 'image' | 'search' | 'serialize' | 'unicode11' | 'unicodeGraphemes' | 'webLinks' | 'webgl' | 'ligatures'; interface IDemoAddon { name: T; @@ -81,8 +84,9 @@ interface IDemoAddon { T extends 'serialize' ? typeof SerializeAddon : T extends 'webLinks' ? typeof WebLinksAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'ligatures' ? typeof LigaturesAddon : - typeof WebglAddon + T extends 'unicodeGraphemes' ? typeof UnicodeGraphemesAddon : + T extends 'ligatures' ? typeof LigaturesAddon : + typeof WebglAddon ); instance?: ( T extends 'attach' ? AttachAddon : @@ -94,8 +98,9 @@ interface IDemoAddon { T extends 'webLinks' ? WebLinksAddon : T extends 'webgl' ? WebglAddon : T extends 'unicode11' ? typeof Unicode11Addon : - T extends 'ligatures' ? typeof LigaturesAddon : - never + T extends 'unicodeGraphemes' ? typeof UnicodeGraphemesAddon : + T extends 'ligatures' ? typeof LigaturesAddon : + never ); } @@ -109,6 +114,7 @@ const addons: { [T in AddonType]: IDemoAddon } = { webLinks: { name: 'webLinks', ctor: WebLinksAddon, canChange: true }, webgl: { name: 'webgl', ctor: WebglAddon, canChange: true }, unicode11: { name: 'unicode11', ctor: Unicode11Addon, canChange: true }, + unicodeGraphemes: { name: 'unicodeGraphemes', ctor: UnicodeGraphemesAddon, canChange: true }, ligatures: { name: 'ligatures', ctor: LigaturesAddon, canChange: true } }; @@ -178,6 +184,7 @@ const disposeRecreateButtonHandler: () => void = () => { addons.search.instance = undefined; addons.serialize.instance = undefined; addons.unicode11.instance = undefined; + addons.unicodeGraphemes.instance = undefined; addons.ligatures.instance = undefined; addons.webLinks.instance = undefined; addons.webgl.instance = undefined; @@ -226,6 +233,7 @@ if (document.location.pathname === '/test') { window.SearchAddon = SearchAddon; window.SerializeAddon = SerializeAddon; window.Unicode11Addon = Unicode11Addon; + window.UnicodeGraphemesAddon = UnicodeGraphemesAddon; window.LigaturesAddon = LigaturesAddon; window.WebLinksAddon = WebLinksAddon; window.WebglAddon = WebglAddon; @@ -244,6 +252,7 @@ if (document.location.pathname === '/test') { document.getElementById('ansi-colors').addEventListener('click', ansiColorsTest); document.getElementById('osc-hyperlinks').addEventListener('click', addAnsiHyperlink); document.getElementById('sgr-test').addEventListener('click', sgrTest); + document.getElementById('add-grapheme-clusters').addEventListener('click', addGraphemeClusters); document.getElementById('add-decoration').addEventListener('click', addDecoration); document.getElementById('add-overview-ruler').addEventListener('click', addOverviewRuler); document.getElementById('weblinks-test').addEventListener('click', testWeblinks); @@ -277,7 +286,7 @@ function createTerminal(): void { addons.serialize.instance = new SerializeAddon(); addons.fit.instance = new FitAddon(); addons.image.instance = new ImageAddon(); - addons.unicode11.instance = new Unicode11Addon(); + addons.unicodeGraphemes.instance = new UnicodeGraphemesAddon(); try { // try to start with webgl renderer (might throw on older safari/webkit) addons.webgl.instance = new WebglAddon(); } catch (e) { @@ -288,7 +297,7 @@ function createTerminal(): void { typedTerm.loadAddon(addons.image.instance); typedTerm.loadAddon(addons.search.instance); typedTerm.loadAddon(addons.serialize.instance); - typedTerm.loadAddon(addons.unicode11.instance); + typedTerm.loadAddon(addons.unicodeGraphemes.instance); typedTerm.loadAddon(addons.webLinks.instance); window.term = term; // Expose `term` to window for debugging purposes @@ -613,6 +622,9 @@ function initAddons(term: TerminalType): void { if (name === 'unicode11' && checkbox.checked) { term.unicode.activeVersion = '11'; } + if (name === 'unicodeGraphemes' && checkbox.checked) { + term.unicode.activeVersion = '15-graphemes'; + } if (name === 'search' && checkbox.checked) { addon.instance.onDidChangeResults(e => updateFindResults(e)); } @@ -648,6 +660,8 @@ function initAddons(term: TerminalType): void { }, 0); } else if (name === 'unicode11') { term.unicode.activeVersion = '11'; + } else if (name === 'unicodeGraphemes') { + term.unicode.activeVersion = '15-graphemes'; } else if (name === 'search') { addon.instance.onDidChangeResults(e => updateFindResults(e)); } @@ -662,7 +676,7 @@ function initAddons(term: TerminalType): void { addons.webgl.instance.textureAtlas.remove(); } else if (name === 'canvas') { addons.canvas.instance.textureAtlas.remove(); - } else if (name === 'unicode11') { + } else if (name === 'unicode11' || name === 'unicodeGraphemes') { term.unicode.activeVersion = '6'; } addon.instance!.dispose(); @@ -1113,6 +1127,24 @@ function getRandomSgr(): string { return randomSgrAttributes[Math.floor(Math.random() * randomSgrAttributes.length)]; } +function addGraphemeClusters(): void { + term.write('\n\n\r'); + term.writeln('🤣🤣🤣🤣🤣🤣🤣🤣🤣🤣 [Simple emoji v6: 10 cells, v15: 20 cells]'); + term.writeln('\u{1F476}\u{1F3FF}\u{1F476} [baby with emoji modifier fitzpatrick type-6; baby]'); + term.writeln('\u{1F469}\u200d\u{1f469}\u200d\u{1f466} [woman+zwj+woman+zwj+boy]'); + term.writeln('\u{1F64B}\u{1F64B}\u{200D}\u{2642}\u{FE0F} [person/man raising hand]'); + term.writeln('\u{1F3CB}\u{FE0F}=\u{1F3CB}\u{1F3FE}\u{200D}\u{2640}\u{FE0F} [person lifting weights emoji; woman lighting weights, medium dark]'); + term.writeln('\u{1F469}\u{1F469}\u{200D}\u{1F393}\u{1F468}\u{1F3FF}\u{200D}\u{1F393} [woman; woman student; man student dark]'); + term.writeln('\u{1f1f3}\u{1f1f4}_ [REGIONAL INDICATOR SYMBOL LETTER N and RI O]'); + term.writeln('\u{1f1f3}_\u{1f1f4} {RI N; underscore; RI O]'); + term.writeln('\u0061\u0301 [letter a with acute accent]'); + term.writeln('\u1100\u1161\u11A8=\u1100\u1161= [Korean Jamo]'); + term.writeln('\uAC00=\uD685= [Hangul syllables (pre-composed)]'); + term.writeln('(\u26b0\ufe0e) [coffin with text_presentation]'); + term.writeln('(\u26b0\ufe0f) [coffin with Emoji_presentation]'); + term.writeln(' [Égalité (using separate acute) emoij_presentation]'); +} + function addDecoration(): void { term.options['overviewRulerWidth'] = 15; const marker = term.registerMarker(1); diff --git a/demo/index.html b/demo/index.html index 7d7329a7f6..caff2ca213 100644 --- a/demo/index.html +++ b/demo/index.html @@ -97,6 +97,7 @@

Test

+
Decorations
diff --git a/src/common/InputHandler.ts b/src/common/InputHandler.ts index bbc9256ca2..afc41a3a01 100644 --- a/src/common/InputHandler.ts +++ b/src/common/InputHandler.ts @@ -10,13 +10,14 @@ import { CHARSETS, DEFAULT_CHARSET } from 'common/data/Charsets'; import { EscapeSequenceParser } from 'common/parser/EscapeSequenceParser'; import { Disposable } from 'common/Lifecycle'; import { StringToUtf32, stringFromCodePoint, Utf8ToUtf32 } from 'common/input/TextDecoder'; -import { DEFAULT_ATTR_DATA } from 'common/buffer/BufferLine'; +import { BufferLine, DEFAULT_ATTR_DATA } from 'common/buffer/BufferLine'; import { EventEmitter } from 'common/EventEmitter'; import { IParsingState, IEscapeSequenceParser, IParams, IFunctionIdentifier } from 'common/parser/Types'; import { NULL_CELL_CODE, NULL_CELL_WIDTH, Attributes, FgFlags, BgFlags, Content, UnderlineStyle } from 'common/buffer/Constants'; import { CellData } from 'common/buffer/CellData'; import { AttributeData } from 'common/buffer/AttributeData'; import { ICoreService, IBufferService, IOptionsService, ILogService, ICoreMouseService, ICharsetService, IUnicodeService, LogLevelEnum, IOscLinkService } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; import { OscHandler } from 'common/parser/OscParser'; import { DcsHandler } from 'common/parser/DcsParser'; import { IBuffer } from 'common/buffer/Types'; @@ -516,13 +517,10 @@ export class InputHandler extends Disposable implements IInputHandler { bufferRow.setCellFromCodePoint(this._activeBuffer.x - 1, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); } + let precedingJoinState = this._parser.precedingJoinState; for (let pos = start; pos < end; ++pos) { code = data[pos]; - // calculate print space - // expensive call, therefore we save width in line buffer - chWidth = this._unicodeService.wcwidth(code); - // get charset replacement character // charset is only defined for ASCII, therefore we only // search for an replacement char if code < 127 @@ -533,6 +531,12 @@ export class InputHandler extends Disposable implements IInputHandler { } } + const currentInfo = this._unicodeService.charProperties(code, precedingJoinState); + chWidth = UnicodeService.extractWidth(currentInfo); + const shouldJoin = UnicodeService.extractShouldJoin(currentInfo); + const oldWidth = shouldJoin ? UnicodeService.extractWidth(precedingJoinState) : 0; + precedingJoinState = currentInfo; + if (screenReaderMode) { this._onA11yChar.fire(stringFromCodePoint(code)); } @@ -540,34 +544,16 @@ export class InputHandler extends Disposable implements IInputHandler { this._oscLinkService.addLineToLink(this._getCurrentLinkId(), this._activeBuffer.ybase + this._activeBuffer.y); } - // insert combining char at last cursor position - // this._activeBuffer.x should never be 0 for a combining char - // since they always follow a cell consuming char - // therefore we can test for this._activeBuffer.x to avoid overflow left - if (!chWidth && this._activeBuffer.x) { - if (!bufferRow.getWidth(this._activeBuffer.x - 1)) { - // found empty cell after fullwidth, need to go 2 cells back - // it is save to step 2 cells back here - // since an empty cell is only set by fullwidth chars - bufferRow.addCodepointToCell(this._activeBuffer.x - 2, code); - } else { - bufferRow.addCodepointToCell(this._activeBuffer.x - 1, code); - } - continue; - } - // goto next line if ch would overflow // NOTE: To avoid costly width checks here, // the terminal does not allow a cols < 2. - if (this._activeBuffer.x + chWidth - 1 >= cols) { + if (this._activeBuffer.x + chWidth - oldWidth > cols) { // autowrap - DECAWM // automatically wraps to the beginning of the next line if (wraparoundMode) { - // clear left over cells to the right - while (this._activeBuffer.x < cols) { - bufferRow.setCellFromCodePoint(this._activeBuffer.x++, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); - } - this._activeBuffer.x = 0; + const oldRow = bufferRow; + let oldCol = this._activeBuffer.x - oldWidth; + this._activeBuffer.x = oldWidth; this._activeBuffer.y++; if (this._activeBuffer.y === this._activeBuffer.scrollBottom + 1) { this._activeBuffer.y--; @@ -582,6 +568,16 @@ export class InputHandler extends Disposable implements IInputHandler { } // row changed, get it again bufferRow = this._activeBuffer.lines.get(this._activeBuffer.ybase + this._activeBuffer.y)!; + if (oldWidth > 0 && bufferRow instanceof BufferLine) { + // Combining character widens 1 column to 2. + // Move old character to next line. + bufferRow.copyCellsFrom(oldRow as BufferLine, + oldCol, 0, oldWidth, false); + } + // clear left over cells to the right + while (oldCol < cols) { + oldRow.setCellFromCodePoint(oldCol++, 0, 1, curAttr.fg, curAttr.bg, curAttr.extended); + } } else { this._activeBuffer.x = cols - 1; if (chWidth === 2) { @@ -592,10 +588,27 @@ export class InputHandler extends Disposable implements IInputHandler { } } + // insert combining char at last cursor position + // this._activeBuffer.x should never be 0 for a combining char + // since they always follow a cell consuming char + // therefore we can test for this._activeBuffer.x to avoid overflow left + if (shouldJoin && this._activeBuffer.x) { + const offset = bufferRow.getWidth(this._activeBuffer.x - 1) ? 1 : 2; + // if empty cell after fullwidth, need to go 2 cells back + // it is save to step 2 cells back here + // since an empty cell is only set by fullwidth chars + bufferRow.addCodepointToCell(this._activeBuffer.x - offset, + code, chWidth); + for (let delta = chWidth - oldWidth; --delta >= 0; ) { + bufferRow.setCellFromCodePoint(this._activeBuffer.x++, 0, 0, curAttr.fg, curAttr.bg, curAttr.extended); + } + continue; + } + // insert mode: move characters to right if (insertMode) { // right shift cells according to the width - bufferRow.insertCells(this._activeBuffer.x, chWidth, this._activeBuffer.getNullCell(curAttr), curAttr); + bufferRow.insertCells(this._activeBuffer.x, chWidth - oldWidth, this._activeBuffer.getNullCell(curAttr), curAttr); // test last cell - since the last cell has only room for // a halfwidth char any fullwidth shifted there is lost // and will be set to empty cell @@ -617,20 +630,8 @@ export class InputHandler extends Disposable implements IInputHandler { } } } - // store last char in Parser.precedingCodepoint for REP to work correctly - // This needs to check whether: - // - fullwidth + surrogates: reset - // - combining: only base char gets carried on (bug in xterm?) - if (end - start > 0) { - bufferRow.loadCell(this._activeBuffer.x - 1, this._workCell); - if (this._workCell.getWidth() === 2 || this._workCell.getCode() > 0xFFFF) { - this._parser.precedingCodepoint = 0; - } else if (this._workCell.isCombined()) { - this._parser.precedingCodepoint = this._workCell.getChars().charCodeAt(0); - } else { - this._parser.precedingCodepoint = this._workCell.content; - } - } + + this._parser.precedingJoinState = precedingJoinState; // handle wide chars: reset cell to the right if it is second cell of a wide char if (this._activeBuffer.x < cols && end - start > 0 && bufferRow.getWidth(this._activeBuffer.x) === 0 && !bufferRow.hasContent(this._activeBuffer.x)) { @@ -1576,9 +1577,8 @@ export class InputHandler extends Disposable implements IInputHandler { * If the character preceding REP is a control function or part of a control function, * the effect of REP is not defined by this Standard. * - * Since we propagate the terminal as xterm-256color we have to follow xterm's behavior: - * - fullwidth + surrogate chars are ignored - * - for combining chars only the base char gets repeated + * We extend xterm's behavior to allow repeating entire grapheme clusters. + * This isn't 100% xterm-compatible, but it seems saner and more useful. * - text attrs are applied normally * - wrap around is respected * - any valid sequence resets the carried forward char @@ -1592,16 +1592,29 @@ export class InputHandler extends Disposable implements IInputHandler { * (NOOP for any other sequence in between or NON ASCII characters). */ public repeatPrecedingCharacter(params: IParams): boolean { - if (!this._parser.precedingCodepoint) { + const joinState = this._parser.precedingJoinState; + if (!joinState) { return true; } // call print to insert the chars and handle correct wrapping const length = params.params[0] || 1; - const data = new Uint32Array(length); - for (let i = 0; i < length; ++i) { - data[i] = this._parser.precedingCodepoint; - } - this.print(data, 0, data.length); + const chWidth = UnicodeService.extractWidth(joinState); + const x = this._activeBuffer.x - chWidth; + const bufferRow = this._activeBuffer.lines.get(this._activeBuffer.ybase + this._activeBuffer.y)!; + const text = bufferRow.getString(x); + const data = new Uint32Array(text.length * length); + let idata = 0; + for (let itext = 0; itext < text.length; ) { + const ch = text.codePointAt(itext) || 0; + data[idata++] = ch; + itext += ch > 0xffff ? 2 : 1; + } + let tlength = idata; + for (let i = 1; i < length; ++i) { + data.copyWithin(tlength, 0, idata); + tlength += idata; + } + this.print(data, 0, tlength); return true; } diff --git a/src/common/TestUtils.test.ts b/src/common/TestUtils.test.ts index 3e554d1018..fcf0c2cdff 100644 --- a/src/common/TestUtils.test.ts +++ b/src/common/TestUtils.test.ts @@ -3,7 +3,8 @@ * @license MIT */ -import { IBufferService, ICoreService, ILogService, IOptionsService, ITerminalOptions, ICoreMouseService, ICharsetService, IUnicodeService, IUnicodeVersionProvider, LogLevelEnum, IDecorationService, IInternalDecoration, IOscLinkService } from 'common/services/Services'; +import { IBufferService, ICoreService, ILogService, IOptionsService, ITerminalOptions, ICoreMouseService, ICharsetService, UnicodeCharProperties, UnicodeCharWidth, IUnicodeService, IUnicodeVersionProvider, LogLevelEnum, IDecorationService, IInternalDecoration, IOscLinkService } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; import { IEvent, EventEmitter } from 'common/EventEmitter'; import { clone } from 'common/Clone'; import { DEFAULT_OPTIONS } from 'common/services/OptionsService'; @@ -168,7 +169,20 @@ export class MockUnicodeService implements IUnicodeService { public versions: string[] = []; public activeVersion: string = ''; public onChange: IEvent = new EventEmitter().event; - public wcwidth = (codepoint: number): number => this._provider.wcwidth(codepoint); + public wcwidth = (codepoint: number): UnicodeCharWidth => this._provider.wcwidth(codepoint); + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0 && preceding !== 0; + if (shouldJoin) { + const oldWidth = UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); + } public getStringCellWidth(s: string): number { throw new Error('Method not implemented.'); } diff --git a/src/common/Types.d.ts b/src/common/Types.d.ts index 4a9d70220d..fc8fdf4e61 100644 --- a/src/common/Types.d.ts +++ b/src/common/Types.d.ts @@ -234,7 +234,7 @@ export interface IBufferLine { loadCell(index: number, cell: ICellData): ICellData; setCell(index: number, cell: ICellData): void; setCellFromCodePoint(index: number, codePoint: number, width: number, fg: number, bg: number, eAttrs: IExtendedAttrs): void; - addCodepointToCell(index: number, codePoint: number): void; + addCodepointToCell(index: number, codePoint: number, width: number): void; insertCells(pos: number, n: number, ch: ICellData, eraseAttr?: IAttributeData): void; deleteCells(pos: number, n: number, fill: ICellData, eraseAttr?: IAttributeData): void; replaceCells(start: number, end: number, fill: ICellData, eraseAttr?: IAttributeData, respectProtect?: boolean): void; diff --git a/src/common/buffer/BufferLine.test.ts b/src/common/buffer/BufferLine.test.ts index 111aae036c..f2819aa8c4 100644 --- a/src/common/buffer/BufferLine.test.ts +++ b/src/common/buffer/BufferLine.test.ts @@ -431,7 +431,7 @@ describe('BufferLine', function(): void { describe('addCharToCell', () => { it('should set width to 1 for empty cell', () => { const line = new TestBufferLine(3, CellData.fromCharData([DEFAULT_ATTR, NULL_CELL_CHAR, NULL_CELL_WIDTH, NULL_CELL_CODE]), false); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); const cell = line.loadCell(0, new CellData()); // chars contains single combining char // width is set to 1 @@ -444,7 +444,7 @@ describe('BufferLine', function(): void { const cell = line .loadCell(0, new CellData()); cell.setFromCharData([123, 'e\u0301', 1, 'e\u0301'.charCodeAt(1)]); line.setCell(0, cell); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); line.loadCell(0, cell); // chars contains 3 chars // width is set to 1 @@ -457,7 +457,7 @@ describe('BufferLine', function(): void { const cell = line .loadCell(0, new CellData()); cell.setFromCharData([123, 'e', 1, 'e'.charCodeAt(1)]); line.setCell(0, cell); - line.addCodepointToCell(0, '\u0301'.charCodeAt(0)); + line.addCodepointToCell(0, '\u0301'.charCodeAt(0), 0); line.loadCell(0, cell); // chars contains 2 chars // width is set to 1 diff --git a/src/common/buffer/BufferLine.ts b/src/common/buffer/BufferLine.ts index de25dc2878..a268f2bce5 100644 --- a/src/common/buffer/BufferLine.ts +++ b/src/common/buffer/BufferLine.ts @@ -227,7 +227,7 @@ export class BufferLine implements IBufferLine { * onto a leading char. Since we already set the attrs * by the previous `setDataFromCodePoint` call, we can omit it here. */ - public addCodepointToCell(index: number, codePoint: number): void { + public addCodepointToCell(index: number, codePoint: number, width: number): void { let content = this._data[index * CELL_SIZE + Cell.CONTENT]; if (content & Content.IS_COMBINED_MASK) { // we already have a combined string, simply add @@ -245,8 +245,12 @@ export class BufferLine implements IBufferLine { // simply set the data in the cell buffer with a width of 1 content = codePoint | (1 << Content.WIDTH_SHIFT); } - this._data[index * CELL_SIZE + Cell.CONTENT] = content; } + if (width) { + content &= ~Content.WIDTH_MASK; + content |= width << Content.WIDTH_SHIFT; + } + this._data[index * CELL_SIZE + Cell.CONTENT] = content; } public insertCells(pos: number, n: number, fillCellData: ICellData, eraseAttr?: IAttributeData): void { diff --git a/src/common/input/UnicodeV6.ts b/src/common/input/UnicodeV6.ts index bf63a18b22..83265f705a 100644 --- a/src/common/input/UnicodeV6.ts +++ b/src/common/input/UnicodeV6.ts @@ -2,9 +2,8 @@ * Copyright (c) 2019 The xterm.js authors. All rights reserved. * @license MIT */ -import { IUnicodeVersionProvider } from 'common/services/Services'; - -type CharWidth = 0 | 1 | 2; +import { IUnicodeVersionProvider, UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; +import { UnicodeService } from 'common/services/UnicodeService'; const BMP_COMBINING = [ [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], @@ -121,12 +120,26 @@ export class UnicodeV6 implements IUnicodeVersionProvider { } } - public wcwidth(num: number): CharWidth { + public wcwidth(num: number): UnicodeCharWidth { if (num < 32) return 0; if (num < 127) return 1; - if (num < 65536) return table[num] as CharWidth; + if (num < 65536) return table[num] as UnicodeCharWidth; if (bisearch(num, HIGH_COMBINING)) return 0; if ((num >= 0x20000 && num <= 0x2fffd) || (num >= 0x30000 && num <= 0x3fffd)) return 2; return 1; } + + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + let width = this.wcwidth(codepoint); + let shouldJoin = width === 0 && preceding !== 0; + if (shouldJoin) { + const oldWidth = UnicodeService.extractWidth(preceding); + if (oldWidth === 0) { + shouldJoin = false; + } else if (oldWidth > width) { + width = oldWidth; + } + } + return UnicodeService.createPropertyValue(0, width, shouldJoin); + } } diff --git a/src/common/parser/EscapeSequenceParser.ts b/src/common/parser/EscapeSequenceParser.ts index de20632248..b3d02768b6 100644 --- a/src/common/parser/EscapeSequenceParser.ts +++ b/src/common/parser/EscapeSequenceParser.ts @@ -230,7 +230,7 @@ export const VT500_TRANSITION_TABLE = (function (): TransitionTable { export class EscapeSequenceParser extends Disposable implements IEscapeSequenceParser { public initialState: number; public currentState: number; - public precedingCodepoint: number; + public precedingJoinState: number; // UnicodeJoinProperties // buffers over several parse calls protected _params: Params; @@ -271,7 +271,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params = new Params(); // defaults to 32 storable params/subparams this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; // set default fallback handlers and handler lookup containers this._printHandlerFb = (data, start, end): void => { }; @@ -448,7 +448,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; // abort pending continuation from async handler // Here the RESET type indicates, that the next parse call will // ignore any saved stack, instead continues sync with next codepoint from GROUND @@ -610,7 +610,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP // cleanup before continuing with the main sync loop this._parseStack.state = ParserStackType.NONE; start = this._parseStack.chunkPos + 1; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; this.currentState = this._parseStack.transition & TableAccess.TRANSITION_STATE_MASK; } } @@ -653,7 +653,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP case ParserAction.EXECUTE: if (this._executeHandlers[code]) this._executeHandlers[code](); else this._executeHandlerFb(code); - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.IGNORE: break; @@ -688,7 +688,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP if (j < 0) { this._csiHandlerFb(this._collect << 8 | code, this._params); } - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.PARAM: // inner loop: digits (0x30 - 0x39) and ; (0x3b) and : (0x3a) @@ -727,7 +727,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP if (jj < 0) { this._escHandlerFb(this._collect << 8 | code); } - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.CLEAR: this._params.reset(); @@ -758,7 +758,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; case ParserAction.OSC_START: this._oscParser.start(); @@ -783,7 +783,7 @@ export class EscapeSequenceParser extends Disposable implements IEscapeSequenceP this._params.reset(); this._params.addParam(0); // ZDM this._collect = 0; - this.precedingCodepoint = 0; + this.precedingJoinState = 0; break; } this.currentState = transition & TableAccess.TRANSITION_STATE_MASK; diff --git a/src/common/parser/Types.d.ts b/src/common/parser/Types.d.ts index a1ea0ec262..2ed4acdcaf 100644 --- a/src/common/parser/Types.d.ts +++ b/src/common/parser/Types.d.ts @@ -146,11 +146,12 @@ export type PrintFallbackHandlerType = PrintHandlerType; */ export interface IEscapeSequenceParser extends IDisposable { /** - * Preceding codepoint to get REP working correctly. - * This must be set by the print handler as last action. - * It gets reset by the parser for any valid sequence beside REP itself. + * Preceding grapheme-join-state. + * Used for joining grapheme clusters across calls to `print`. + * Also used by REP to check if repeating a character is allowed. + * It gets reset by the parser for any valid sequence besides text. */ - precedingCodepoint: number; + precedingJoinState: number; // More specifically: UnicodeJoinProperties /** * Reset the parser to its initial state (handlers are kept). diff --git a/src/common/services/Services.ts b/src/common/services/Services.ts index b4ee5a762d..52c2a79fae 100644 --- a/src/common/services/Services.ts +++ b/src/common/services/Services.ts @@ -296,6 +296,29 @@ export interface IOscLinkService { getLinkData(linkId: number): IOscLinkData | undefined; } +/* + * Width and Grapheme_Cluster_Break properties of a character as a bit mask. + * + * bit 0: shouldJoin - should combine with preceding character. + * bit 1..2: wcwidth - see UnicodeCharWidth. + * bit 3..31: class of character (currently only 4 bits are used). + * This is used to determined grapheme clustering - i.e. which codepoints + * are to be combined into a single compound character. + * + * Use the UnicodeService static function createPropertyValue to create a + * UnicodeCharProperties; use extractShouldJoin, extractWidth, and + * extractCharKind to extract the components. + */ +export type UnicodeCharProperties = number; + +/** + * Width in columns of a character. + * In a CJK context, "half-width" characters (such as Latin) are width 1, + * while "full-width" characters (such as Kanji) are 2 columns wide. + * Combining characters (such as accents) are width 0. + */ +export type UnicodeCharWidth = 0 | 1 | 2; + export const IUnicodeService = createDecorator('UnicodeService'); export interface IUnicodeService { serviceBrand: undefined; @@ -311,13 +334,20 @@ export interface IUnicodeService { /** * Unicode version dependent */ - wcwidth(codepoint: number): number; + wcwidth(codepoint: number): UnicodeCharWidth; getStringCellWidth(s: string): number; + /** + * Return character width and type for grapheme clustering. + * If preceding != 0, it is the return code from the previous character; + * in that case the result specifies if the characters should be joined. + */ + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties; } export interface IUnicodeVersionProvider { readonly version: string; - wcwidth(ucs: number): 0 | 1 | 2; + wcwidth(ucs: number): UnicodeCharWidth; + charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties; } export const IDecorationService = createDecorator('DecorationService'); diff --git a/src/common/services/UnicodeService.test.ts b/src/common/services/UnicodeService.test.ts index a2c4b6367f..01e3c0862e 100644 --- a/src/common/services/UnicodeService.test.ts +++ b/src/common/services/UnicodeService.test.ts @@ -12,6 +12,9 @@ class DummyProvider implements IUnicodeVersionProvider { public wcwidth(n: number): 0 | 1 | 2 { return 2; } + public charProperties(codepoint: number): number { + return UnicodeService.createPropertyValue(0, this.wcwidth(codepoint)); + } } describe('unicode provider', () => { diff --git a/src/common/services/UnicodeService.ts b/src/common/services/UnicodeService.ts index 4f3596d2d3..767eecaf95 100644 --- a/src/common/services/UnicodeService.ts +++ b/src/common/services/UnicodeService.ts @@ -2,9 +2,10 @@ * Copyright (c) 2019 The xterm.js authors. All rights reserved. * @license MIT */ + import { EventEmitter } from 'common/EventEmitter'; import { UnicodeV6 } from 'common/input/UnicodeV6'; -import { IUnicodeService, IUnicodeVersionProvider } from 'common/services/Services'; +import { IUnicodeService, IUnicodeVersionProvider, UnicodeCharProperties, UnicodeCharWidth } from 'common/services/Services'; export class UnicodeService implements IUnicodeService { public serviceBrand: any; @@ -16,6 +17,19 @@ export class UnicodeService implements IUnicodeService { private readonly _onChange = new EventEmitter(); public readonly onChange = this._onChange.event; + public static extractShouldJoin(value: UnicodeCharProperties): boolean { + return (value & 1) !== 0; + } + public static extractWidth(value: UnicodeCharProperties): UnicodeCharWidth { + return ((value >> 1) & 0x3) as UnicodeCharWidth; + } + public static extractCharKind(value: UnicodeCharProperties): number { + return value >> 3; + } + public static createPropertyValue(state: number, width: number, shouldJoin: boolean = false): UnicodeCharProperties { + return ((state & 0xffffff) << 3) | ((width & 3) << 1) | (shouldJoin?1:0); + } + constructor() { const defaultProvider = new UnicodeV6(); this.register(defaultProvider); @@ -51,12 +65,13 @@ export class UnicodeService implements IUnicodeService { /** * Unicode version dependent interface. */ - public wcwidth(num: number): number { + public wcwidth(num: number): UnicodeCharWidth { return this._activeProvider.wcwidth(num); } public getStringCellWidth(s: string): number { let result = 0; + let precedingInfo = 0; const length = s.length; for (let i = 0; i < length; ++i) { let code = s.charCodeAt(i); @@ -79,8 +94,18 @@ export class UnicodeService implements IUnicodeService { result += this.wcwidth(second); } } - result += this.wcwidth(code); + const currentInfo = this.charProperties(code, precedingInfo); + let chWidth = UnicodeService.extractWidth(currentInfo); + if (UnicodeService.extractShouldJoin(currentInfo)) { + chWidth -= UnicodeService.extractWidth(precedingInfo); + } + result += chWidth; + precedingInfo = currentInfo; } return result; } + + public charProperties(codepoint: number, preceding: UnicodeCharProperties): UnicodeCharProperties { + return this._activeProvider.charProperties(codepoint, preceding); + } } diff --git a/test/api/TestUtils.ts b/test/api/TestUtils.ts index 702fcb2de2..288a3c0719 100644 --- a/test/api/TestUtils.ts +++ b/test/api/TestUtils.ts @@ -43,9 +43,19 @@ export async function timeout(ms: number): Promise { return new Promise(r => setTimeout(r, ms)); } -export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}): Promise { +export async function openTerminal(page: playwright.Page, options: ITerminalOptions & ITerminalInitOnlyOptions = {}, testOptions: { loadUnicodeGraphemesAddon: boolean } = { loadUnicodeGraphemesAddon: true }): Promise { await page.evaluate(`window.term = new Terminal(${JSON.stringify({ allowProposedApi: true, ...options })})`); await page.evaluate(`window.term.open(document.querySelector('#terminal-container'))`); + + // HACK: This is a soft layer breaker that's temporarily included until unicode graphemes have + // more complete integration tests. See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 + if (testOptions.loadUnicodeGraphemesAddon) { + await page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + window.term.unicode.activeVersion = '15-graphemes'; + `); + } await page.waitForSelector('.xterm-rows'); } diff --git a/test/playwright/InputHandler.test.ts b/test/playwright/InputHandler.test.ts index e422bd1cbc..fb2eeb9ce5 100644 --- a/test/playwright/InputHandler.test.ts +++ b/test/playwright/InputHandler.test.ts @@ -255,18 +255,18 @@ test.describe('InputHandler Integration Tests', () => { `); await pollFor(ctx.page, () => getLinesAsArray(4), ['##', '##', '##', '######']); await pollFor(ctx.page, () => getCursor(), { col: 6, row: 3 }); - // should not repeat on fullwidth chars + // repeat on fullwidth chars await ctx.page.evaluate(` window.term.reset(); - window.term.write('¥\x1b[10b'); + window.term.write('¥\x1b[8b'); `); - await pollFor(ctx.page, () => getLinesAsArray(1), ['¥']); - // should repeat only base char of combining + await pollFor(ctx.page, () => getLinesAsArray(1), ['¥¥¥¥¥']); + // change from xterm: repeat grapheme cluster await ctx.page.evaluate(` window.term.reset(); - window.term.write('e\u0301\x1b[5b'); + window.term.write('e\u0301\x1b[2b'); `); - await pollFor(ctx.page, () => getLinesAsArray(1), ['e\u0301eeeee']); + await pollFor(ctx.page, () => getLinesAsArray(1), ['e\u0301e\u0301e\u0301']); // should wrap correctly await ctx.page.evaluate(` window.term.reset(); diff --git a/test/playwright/Terminal.test.ts b/test/playwright/Terminal.test.ts index 3a4e00f083..19f6eb315e 100644 --- a/test/playwright/Terminal.test.ts +++ b/test/playwright/Terminal.test.ts @@ -22,7 +22,7 @@ test.describe('API Integration Tests', () => { }); test('Proposed API check', async () => { - await openTerminal(ctx, { allowProposedApi: false }); + await openTerminal(ctx, { allowProposedApi: false }, { loadUnicodeGraphemesAddon: false }); await ctx.page.evaluate(` try { window.term.markers; diff --git a/test/playwright/TestUtils.ts b/test/playwright/TestUtils.ts index 18a1f1bba9..0a51757fac 100644 --- a/test/playwright/TestUtils.ts +++ b/test/playwright/TestUtils.ts @@ -351,7 +351,7 @@ class TerminalCoreProxy { } } -export async function openTerminal(ctx: ITestContext, options: ITerminalOptions | ITerminalInitOnlyOptions = {}): Promise { +export async function openTerminal(ctx: ITestContext, options: ITerminalOptions | ITerminalInitOnlyOptions = {}, testOptions: { loadUnicodeGraphemesAddon: boolean } = { loadUnicodeGraphemesAddon: true }): Promise { await ctx.page.evaluate(` if ('term' in window) { try { @@ -366,6 +366,15 @@ export async function openTerminal(ctx: ITestContext, options: ITerminalOptions window.term = new window.Terminal(${JSON.stringify({ allowProposedApi: true, ...options })}); window.term.open(document.querySelector('#terminal-container')); `); + // HACK: This is a soft layer breaker that's temporarily included until unicode graphemes have + // more complete integration tests. See https://github.com/xtermjs/xterm.js/pull/4519#discussion_r1285234453 + if (testOptions.loadUnicodeGraphemesAddon) { + await ctx.page.evaluate(` + window.unicode = new UnicodeGraphemesAddon(); + window.term.loadAddon(window.unicode); + window.term.unicode.activeVersion = '15-graphemes'; + `); + } await ctx.page.waitForSelector('.xterm-rows'); ctx.termHandle = await ctx.page.evaluateHandle('window.term'); await ctx.proxy.initTerm(); diff --git a/tsconfig.all.json b/tsconfig.all.json index 79ced9cc49..5d8af629c0 100644 --- a/tsconfig.all.json +++ b/tsconfig.all.json @@ -15,6 +15,7 @@ { "path": "./addons/xterm-addon-search" }, { "path": "./addons/xterm-addon-serialize" }, { "path": "./addons/xterm-addon-unicode11" }, + { "path": "./addons/xterm-addon-unicode-graphemes" }, { "path": "./addons/xterm-addon-web-links" }, { "path": "./addons/xterm-addon-webgl" } ] diff --git a/typings/xterm-headless.d.ts b/typings/xterm-headless.d.ts index 688e2eda98..99aa8237fe 100644 --- a/typings/xterm-headless.d.ts +++ b/typings/xterm-headless.d.ts @@ -1240,6 +1240,7 @@ declare module 'xterm-headless' { * Unicode version dependent wcwidth implementation. */ wcwidth(codepoint: number): 0 | 1 | 2; + charProperties(codepoint: number, preceding: number): number; } /** diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index 44402b848d..3f603b9e19 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -1773,6 +1773,7 @@ declare module 'xterm' { * Unicode version dependent wcwidth implementation. */ wcwidth(codepoint: number): 0 | 1 | 2; + charProperties(codepoint: number, preceding: number): number; } /**