From b04b04dd5f1a4d27e506335939a0b4dad0b000c0 Mon Sep 17 00:00:00 2001 From: iRon7 Date: Wed, 1 May 2024 11:49:49 +0200 Subject: [PATCH] Use ASCII See: https://github.com/PowerShell/PSScriptAnalyzer/issues/1999 --- UseASCII.Tests.ps1 | 93 ++++++++++++++++++++++++++++++++ UseASCII.psm1 | 132 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 UseASCII.Tests.ps1 create mode 100644 UseASCII.psm1 diff --git a/UseASCII.Tests.ps1 b/UseASCII.Tests.ps1 new file mode 100644 index 0000000..5e09547 --- /dev/null +++ b/UseASCII.Tests.ps1 @@ -0,0 +1,93 @@ +#Requires -Modules @{ModuleName="Pester"; ModuleVersion="5.0.0"} + +Describe 'UseASCII' { + + BeforeAll { + $TemporaryFile = [System.IO.Path]::ChangeExtension((New-TemporaryFile), '.ps1') + } + + Context 'Positives' { + + It 'Smart characters' { + $Result = Invoke-ScriptAnalyzer -CustomRulePath .\UseASCII.psm1 -ScriptDefinition { Write-Host 'coöperate' }.ToString() + $Result.RuleName | Should -Be 'PSUseASCII' + $Result.Severity | Should -Be 'Information' + } + + It 'Fix' { + Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value { + <# + .SYNOPSIS + Use ASCII test + .DESCRIPTION + The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added. + Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words, + such as "coöperate", without which the letter sequence could be misinterpreted to be pronounced + #> + + # [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')] + Param() + + Write-Host “test” –ForegroundColor ‘Red’ -BackgroundColor ‘Green’ + Write-Host 'No-break space' + }.ToString() + Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile + Get-Content -Raw -Literal $TemporaryFile | Should -be { + <# + .SYNOPSIS + Use ASCII test + .DESCRIPTION + The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added. + Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words, + such as "cooperate", without which the letter sequence could be misinterpreted to be pronounced + #> + + # [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'cooperate')] + Param() + + Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green' + Write-Host 'No-break space' + }.ToString() + } + + It 'Suppress' { + Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value { + <# + .SYNOPSIS + Use ASCII test + .DESCRIPTION + The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added. + Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words, + such as "coöperate", without which the letter sequence could be misinterpreted to be pronounced + #> + + [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')] + Param() + + Write-Host “test” –ForegroundColor ‘Red’ -BackgroundColor ‘Green’ + Write-Host 'No-break space' + }.ToString() + Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile -ErrorAction SilentlyContinue + Get-Content -Raw -Literal $TemporaryFile | Should -be { + <# + .SYNOPSIS + Use ASCII test + .DESCRIPTION + The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added. + Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words, + such as "coöperate", without which the letter sequence could be misinterpreted to be pronounced + #> + + [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')] + Param() + + Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green' + Write-Host 'No-break space' + }.ToString() + } + } + + AfterAll { + # if (Test-Path -LiteralPath $TemporaryFile) { Remove-Item -LiteralPath $TemporaryFile } + } +} \ No newline at end of file diff --git a/UseASCII.psm1 b/UseASCII.psm1 new file mode 100644 index 0000000..882a35b --- /dev/null +++ b/UseASCII.psm1 @@ -0,0 +1,132 @@ +#Requires -Version 3.0 + +function Measure-UseASCII { +<# + .SYNOPSIS + Use UTF-8 Characters + .DESCRIPTION + Validates if only ASCII characters are used and reveal the position of any violation. + .INPUTS + [System.Management.Automation.Language.ScriptBlockAst] + .OUTPUTS + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord] +#> + + [CmdletBinding()] + [OutputType([Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord])] + Param ( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [System.Management.Automation.Language.ScriptBlockAst] + $ScriptBlockAst + ) + Begin { + function GetNonASCIIPositions ([String]$Text) { + $LF = [Char]0x0A + $DEL = [Char]0x7F + $LineNumber = 1; $ColumnNumber = 1 + for ($Offset = 0; $Offset -lt $Text.Length; $Offset++) { + $Character = $Text[$Offset] + if ($Character -eq $Lf) { + $LineNumber++ + $ColumnNumber = 0 + } + else { + $ColumnNumber++ + if ($Character -gt $Del) { + [PSCustomObject]@{ + Character = $Character + Offset = $Offset + LineNumber = $LineNumber + ColumnNumber = $ColumnNumber + } + } + } + } + } + + function CharToHex([Char]$Char) { + ([Int][Char]$Char).ToString('x4') + } + function SuggestedASCII([Char]$Char) { + switch ([Int]$Char) { + 0x00A0 { ' ' } + 0x1806 { '-' } + 0x2010 { '-' } + 0x2011 { '-' } + 0x2012 { '-' } + 0x2013 { '-' } + 0x2014 { '-' } + 0x2015 { '-' } + 0x2016 { '-' } + 0x2212 { '-' } + 0x2018 { "'" } + 0x2019 { "'" } + 0x201A { "'" } + 0x201B { "'" } + 0x201C { '"' } + 0x201D { '"' } + 0x201E { '"' } + 0x201F { '"' } + Default { + $ASCII = $Char.ToString().Normalize([System.text.NormalizationForm]::FormD)[0] + if ($ASCII -le 0x7F) { $ASCII } else { '_' } + } + + } + } + } + + Process { + # As the AST parser, tokenize doesn't capture (smart) quotes + # $Tokens = [System.Management.Automation.PSParser]::Tokenize($ScriptBlockAst.Extent.Text, [ref]$null) + # $Violations = $Tokens.where{ $_.Content -cMatch '[\u0100-\uFFFF]' } + $Violations = GetNonASCIIPositions $ScriptBlockAst.Extent.Text + [Collections.Generic.List[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]]@( + Foreach ($Violation in $Violations) { + $Text = $ScriptBlockAst.Extent.Text + For ($i = $Violation.Offset - 1; $i -ge 0; $i--) { if ($Text[$i] -NotMatch '\w') { break } } + $Start = $i + 1 + For ($i = $Violation.Offset + 1; $i -lt $Text.Length; $i++) { if ($Text[$i] -NotMatch '\w') { break } } + $Length = $i - $Start + $Word = $Text.SubString($Start, $Length) + + $StartPosition = [System.Management.Automation.Language.ScriptPosition]::new( + $Null, + $Violation.LineNumber, + $Violation.ColumnNumber, + $ScriptBlockAst.Extent.Text + ) + $EndPosition = [System.Management.Automation.Language.ScriptPosition]::new( + $Null, + $Violation.LineNumber, + ($Violation.ColumnNumber + 1), + $ScriptBlockAst.Extent.Text + ) + $Extent = [System.Management.Automation.Language.ScriptExtent]::new($StartPosition, $EndPosition) + $Character = $Violation.Character + $UniCode = "U+$(CharToHex $Character)" + $SuggestedASCII = SuggestedASCII $Character + $AscCode = "U+$(CharToHex $SuggestedASCII)" + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]@{ + Message = "Non-ASCII character $UniCode found in: $Word" + Extent = $Extent + RuleName = 'PSUseASCII' + Severity = 'Information' + RuleSuppressionID = $Word + SuggestedCorrections = [System.Collections.ObjectModel.Collection[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]]( + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]::New( + $Violation.LineNumber, + $Violation.LineNumber, + $Violation.ColumnNumber, + ($Violation.ColumnNumber + 1), + "$SuggestedASCII", + "Replace '$Character' ($UniCode) with '$SuggestedASCII' ($AscCode)" + ) + ) + } + } + ) + } +} +Export-ModuleMember -Function Measure-*