Skip to content

Commit

Permalink
Update UnicodeParsingTests.ps1
Browse files Browse the repository at this point in the history
  • Loading branch information
ninmonkey committed Jan 8, 2024
1 parent 838d347 commit 4d570d3
Showing 1 changed file with 59 additions and 40 deletions.
99 changes: 59 additions & 40 deletions Pwsh/Unicode/UnicodeParsingTests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,56 @@ $Uni = @{

' '.EnumerateRunes()|ft


[regex]::Match( $what, '^..')
[regex]::Match( $what, '^...')

# Groups Success Name Captures Index Length Value ValueSpan Hex
# ------ ------- ---- -------- ----- ------ ----- --------- ---
# {0} True 0 {0} 0 1 a 61 fffd
# {0} True 0 {0} 0 1 aπŸ’ 61 1f412


$what = 'aπŸ’c'
$list = $what.ToCharArray()
[Text.Encoding]::Unicode.GetBytes( $what ) | Join-string -sep ' ' -f ' {0:x}'

$what = 'aπŸ’c'
[Text.Encoding]::Unicode.GetBytes( $what ) | Join-string -sep ' ' -f '{0:x2}'
'61 00 3d d8 12 dc 63 00'

$list[1].ToInt64($Null) | % tostring 'x'
'd83d'

$list[0].ToInt64($Null) | % tostring 'x'
'61'

$what -match '\ud83d'
'True'

$what.EnumerateRunes()
[regex]::match( $what, '\ud83d.') | ft # '[2] = πŸ’'
[regex]::match( $what, '\ud83d\udc12') | ft # '[2] = πŸ’'


function CodepointAs16RegexLiteral {
<#
.EXAMPLE
> 'πŸ’' -match ( CodepointAs16RegexLiteral 'πŸ’')
> 'aπŸ’b' -match (CodepointAs16RegexLiteral 'aπŸ’b')
# out: true
#>
[OutputType('String')]
param(
[ArgumentCompletions('πŸ’')]
[string]$Text = 'πŸ’')

return ([Text.Encoding]::GetEncoding('utf-16le').GetBytes( $Text )
| Join-string -f '{0:x2}' ) -replace
'(?<=\G.{4})(?!$)', ':' -split ':' | Join-String -f '\u{0}' { $_ -replace '(..)(..)', '$2$1' }
}


function j.Hex {
<#
.SYNOPSIS
Expand Down Expand Up @@ -68,46 +118,6 @@ function j.Hex {
}


$what = 'aπŸ’c'
$list = $what.ToCharArray()
[Text.Encoding]::Unicode.GetBytes( $what ) | Join-string -sep ' ' -f ' {0:x}'

$what = 'aπŸ’c'
[Text.Encoding]::Unicode.GetBytes( $what ) | Join-string -sep ' ' -f '{0:x2}'
'61 00 3d d8 12 dc 63 00'

$list[1].ToInt64($Null) | % tostring 'x'
'd83d'

$list[0].ToInt64($Null) | % tostring 'x'
'61'

$what -match '\ud83d'
'True'

$what.EnumerateRunes()
[regex]::match( $what, '\ud83d.') | ft # '[2] = πŸ’'
[regex]::match( $what, '\ud83d\udc12') | ft # '[2] = πŸ’'


function CodepointAs16RegexLiteral {
<#
.EXAMPLE
in: πŸ’
> 'πŸ’' -match ( CodepointAs16RegexLiteral 'πŸ’')
# out: true
#>
[OutputType('String')]
param(
[ArgumentCompletions('πŸ’')]
[string]$Text = 'πŸ’')
if( @($Text.EnumerateRunes().Value).count -gt 1 ) { throw "expected one codepoint for now"}
return ([Text.Encoding]::GetEncoding('utf-16le').GetBytes( $Text )
| Join-string -f '{0:x2}' ) -replace
'(?<=\G.{4})(?!$)', ':' -split ':' | Join-String -f '\u{0}' { $_ -replace '(..)(..)', '$2$1' }
}

# CodepointAs16Literal

Expand All @@ -117,6 +127,15 @@ function CodepointAs16RegexLiteral {
# ) -Force -ea 'silentlycontinue'

# πŸ±β€πŸ‘€

Write-FormatView -TypeName ([System.Text.RegularExpressions.Match]) -Property @(
'Groups', 'Success', 'Name', 'Captures', 'Index', 'Length', 'Value', "ValueSpan", 'Hex' #, 'Ctrl'
) -VirtualProperty @{
'Hex' = { ($_.Value)?.EnumerateRunes().value | j.Hex }
'Ctrl' = {
$_.Value | Fcc
}
} -AutoSize | Out-FormatData | Push-FormatData
Write-FormatView -TypeName ([Text.Rune]) -Property @(
'Render'
'Hex'
Expand Down

0 comments on commit 4d570d3

Please sign in to comment.