From b04b04dd5f1a4d27e506335939a0b4dad0b000c0 Mon Sep 17 00:00:00 2001
From: iRon7 <boderonald@gmail.com>
Date: Wed, 1 May 2024 11:49:49 +0200
Subject: [PATCH] Use ASCII

See: https://github.com/PowerShell/PSScriptAnalyzer/issues/1999
---
 UseASCII.Tests.ps1 |  93 ++++++++++++++++++++++++++++++++
 UseASCII.psm1      | 132 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)
 create mode 100644 UseASCII.Tests.ps1
 create mode 100644 UseASCII.psm1
diff --git a/UseASCII.Tests.ps1 b/UseASCII.Tests.ps1
new file mode 100644
index 0000000..5e09547
--- /dev/null
+++ b/UseASCII.Tests.ps1
@@ -0,0 +1,93 @@
+#Requires -Modules @{ModuleName="Pester"; ModuleVersion="5.0.0"}
+
+Describe 'UseASCII' {
+
+    BeforeAll {
+        $TemporaryFile = [System.IO.Path]::ChangeExtension((New-TemporaryFile), '.ps1')
+    }
+
+    Context 'Positives' {
+
+        It 'Smart characters' {
+            $Result = Invoke-ScriptAnalyzer -CustomRulePath .\UseASCII.psm1 -ScriptDefinition { Write-Host 'coöperate' }.ToString()
+            $Result.RuleName | Should -Be 'PSUseASCII'
+            $Result.Severity | Should -Be 'Information'
+        }
+
+        It 'Fix' {
+            Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value {
+                <#
+                    .SYNOPSIS
+                    Use ASCII test
+                    .DESCRIPTION
+                    The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
+                    Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
+                    such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced
+                #>
+
+                # [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]
+                Param()
+
+                Write-Host “test” –ForegroundColor ‘Red’ -BackgroundColor ‘Green’
+                Write-Host 'No-break space'
+            }.ToString()
+            Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile
+            Get-Content -Raw -Literal $TemporaryFile | Should -be {
+                <#
+                    .SYNOPSIS
+                    Use ASCII test
+                    .DESCRIPTION
+                    The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
+                    Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
+                    such as "cooperate", without which the <oo> letter sequence could be misinterpreted to be pronounced
+                #>
+
+                # [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'cooperate')]
+                Param()
+
+                Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green'
+                Write-Host 'No-break space'
+            }.ToString()
+        }
+
+        It 'Suppress' {
+            Set-Content -LiteralPath $TemporaryFile -Encoding utf8 -NoNewline -Value {
+                <#
+                    .SYNOPSIS
+                    Use ASCII test
+                    .DESCRIPTION
+                    The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
+                    Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
+                    such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced
+                #>
+
+                [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]
+                Param()
+
+                Write-Host “test” –ForegroundColor ‘Red’ -BackgroundColor ‘Green’
+                Write-Host 'No-break space'
+            }.ToString()
+            Invoke-ScriptAnalyzer -Fix -CustomRulePath .\UseASCII.psm1 -Path $TemporaryFile -ErrorAction SilentlyContinue
+            Get-Content -Raw -Literal $TemporaryFile | Should -be {
+                <#
+                    .SYNOPSIS
+                    Use ASCII test
+                    .DESCRIPTION
+                    The main use of diacritics in Latin script is to change the sound-values of the letters to which they are added.
+                    Historically, English has used the diaeresis diacritic to indicate the correct pronunciation of ambiguous words,
+                    such as "coöperate", without which the <oo> letter sequence could be misinterpreted to be pronounced
+                #>
+
+                [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseAscii', 'coöperate')]
+                Param()
+
+                Write-Host "test" -ForegroundColor 'Red' -BackgroundColor 'Green'
+                Write-Host 'No-break space'
+            }.ToString()
+        }
+   }
+
+    AfterAll {
+        # if (Test-Path -LiteralPath $TemporaryFile) { Remove-Item -LiteralPath $TemporaryFile }
+    }
+}
\ No newline at end of file
diff --git a/UseASCII.psm1 b/UseASCII.psm1
new file mode 100644
index 0000000..882a35b
--- /dev/null
+++ b/UseASCII.psm1
@@ -0,0 +1,132 @@
+#Requires -Version 3.0
+
+function Measure-UseASCII {
+<#
+    .SYNOPSIS
+    Use UTF-8 Characters
+    .DESCRIPTION
+    Validates if only ASCII characters are used and reveal the position of any violation.
+    .INPUTS
+    [System.Management.Automation.Language.ScriptBlockAst]
+    .OUTPUTS
+    [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]
+#>
+
+    [CmdletBinding()]
+    [OutputType([Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord])]
+    Param (
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        [System.Management.Automation.Language.ScriptBlockAst]
+        $ScriptBlockAst
+    )
+    Begin {
+        function GetNonASCIIPositions ([String]$Text) {
+            $LF  = [Char]0x0A
+            $DEL = [Char]0x7F
+            $LineNumber = 1; $ColumnNumber = 1
+            for ($Offset = 0; $Offset -lt $Text.Length; $Offset++) {
+                $Character = $Text[$Offset]
+                if ($Character -eq $Lf) {
+                    $LineNumber++
+                    $ColumnNumber = 0
+                }
+                else {
+                    $ColumnNumber++
+                    if ($Character -gt $Del) {
+                        [PSCustomObject]@{
+                            Character    = $Character
+                            Offset       = $Offset
+                            LineNumber   = $LineNumber
+                            ColumnNumber = $ColumnNumber
+                        }
+                    }
+                }
+            }
+        }
+
+        function CharToHex([Char]$Char) {
+            ([Int][Char]$Char).ToString('x4')
+        }
+        function SuggestedASCII([Char]$Char) {
+            switch ([Int]$Char) {
+                0x00A0 { ' ' }
+                0x1806 { '-' }
+                0x2010 { '-' }
+                0x2011 { '-' }
+                0x2012 { '-' }
+                0x2013 { '-' }
+                0x2014 { '-' }
+                0x2015 { '-' }
+                0x2016 { '-' }
+                0x2212 { '-' }
+                0x2018 { "'" }
+                0x2019 { "'" }
+                0x201A { "'" }
+                0x201B { "'" }
+                0x201C { '"' }
+                0x201D { '"' }
+                0x201E { '"' }
+                0x201F { '"' }
+                Default {
+                    $ASCII = $Char.ToString().Normalize([System.text.NormalizationForm]::FormD)[0]
+                    if ($ASCII -le 0x7F) { $ASCII } else { '_' }
+                }
+
+            }
+        }
+    }
+
+    Process {
+        # As the AST parser, tokenize doesn't capture (smart) quotes
+        # $Tokens = [System.Management.Automation.PSParser]::Tokenize($ScriptBlockAst.Extent.Text, [ref]$null)
+        # $Violations = $Tokens.where{ $_.Content -cMatch '[\u0100-\uFFFF]' }
+        $Violations = GetNonASCIIPositions $ScriptBlockAst.Extent.Text
+        [Collections.Generic.List[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]]@(
+            Foreach ($Violation in $Violations) {
+                $Text = $ScriptBlockAst.Extent.Text
+                For ($i = $Violation.Offset - 1; $i -ge 0; $i--) { if ($Text[$i] -NotMatch '\w') { break } }
+                $Start = $i + 1
+                For ($i = $Violation.Offset + 1; $i -lt $Text.Length; $i++) { if ($Text[$i] -NotMatch '\w') { break } }
+                $Length = $i - $Start
+                $Word = $Text.SubString($Start, $Length)
+
+                $StartPosition = [System.Management.Automation.Language.ScriptPosition]::new(
+                    $Null,
+                    $Violation.LineNumber,
+                    $Violation.ColumnNumber,
+                    $ScriptBlockAst.Extent.Text
+                )
+                $EndPosition = [System.Management.Automation.Language.ScriptPosition]::new(
+                    $Null,
+                    $Violation.LineNumber,
+                    ($Violation.ColumnNumber + 1),
+                    $ScriptBlockAst.Extent.Text
+                )
+                $Extent = [System.Management.Automation.Language.ScriptExtent]::new($StartPosition, $EndPosition)
+                $Character = $Violation.Character
+                $UniCode   = "U+$(CharToHex $Character)"
+                $SuggestedASCII = SuggestedASCII $Character
+                $AscCode   = "U+$(CharToHex $SuggestedASCII)"
+                [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]@{
+                    Message              = "Non-ASCII character $UniCode found in: $Word"
+                    Extent               = $Extent
+                    RuleName             = 'PSUseASCII'
+                    Severity             = 'Information'
+                    RuleSuppressionID    = $Word
+                    SuggestedCorrections = [System.Collections.ObjectModel.Collection[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]](
+                        [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]::New(
+                            $Violation.LineNumber,
+                            $Violation.LineNumber,
+                            $Violation.ColumnNumber,
+                            ($Violation.ColumnNumber + 1),
+                            "$SuggestedASCII",
+                            "Replace '$Character' ($UniCode) with '$SuggestedASCII' ($AscCode)"
+                        )
+                    )
+                }
+            }
+        )
+    }
+}
+Export-ModuleMember -Function Measure-*