From e51f3c0c57e94027930603761a5571e03af4b460 Mon Sep 17 00:00:00 2001 From: Jake Bolton Date: Sun, 10 Dec 2023 15:25:54 -0600 Subject: [PATCH] regex sketch balanced groups --- .../Regex/Using-BalancedGroupDefintions.ipynb | 120 ++++++++++++++++++ Pwsh/Regex/Using-BalancedGroupDefintions.ps1 | 64 ++++++++++ 2 files changed, 184 insertions(+) create mode 100644 Pwsh/Regex/Using-BalancedGroupDefintions.ipynb create mode 100644 Pwsh/Regex/Using-BalancedGroupDefintions.ps1 diff --git a/Pwsh/Regex/Using-BalancedGroupDefintions.ipynb b/Pwsh/Regex/Using-BalancedGroupDefintions.ipynb new file mode 100644 index 0000000..79b2711 --- /dev/null +++ b/Pwsh/Regex/Using-BalancedGroupDefintions.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### about\n", + "\n", + "Using Regex's with Balanced groups" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[32;1mModuleType\u001b[0m\u001b[32;1m Version \u001b[0m \u001b[32;1;3mPreRelease\u001b[0m\u001b[32;1m Name \u001b[0m\u001b[32;1m ExportedCommands\u001b[0m\n", + "\u001b[32;1m----------\u001b[0m \u001b[32;1m------- \u001b[0m \u001b[32;1m----------\u001b[0m \u001b[32;1m---- \u001b[0m \u001b[32;1m----------------\u001b[0m\n", + "Script 0.7.8 irregular {Compress-Regex, Export-RegEx…\n", + "( path:\"c:\\foo\" && ( ext:ps1 dm:last2days ) )\n", + "\n" + ] + } + ], + "source": [ + "Import-module irregular -passThru\n", + "$re = @{}\n", + "$Str = @'\n", + "( path:\"c:\\foo\" && ( ext:ps1 dm:last2days ) )\n", + "'@\n", + "\n", + "$Str" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(?x)\r\n", + "^\r\n", + "# any of neither balanced character\r\n", + "[^()]*\r\n", + "\r\n", + "# open: lefty parens, closing: righty parens\r\n", + "\\(\r\n", + "(?.*)\r\n", + "\\)\r\n", + "\r\n", + "$\r\n" + ] + } + ], + "source": [ + "$re.Balance = @'\n", + "(?x)\n", + " ^\n", + " # any of neither balanced character\n", + " [^()]*\n", + "\n", + " # open: lefty parens, closing: righty parens\n", + " \\(\n", + " (?.*)\n", + " \\)\n", + "\n", + " $\n", + "'@\n", + "\n", + "$re.Balance" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (PowerShell)", + "language": "PowerShell", + "name": ".net-pwsh" + }, + "language_info": { + "name": "polyglot-notebook" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "pwsh", + "items": [ + { + "aliases": [], + "languageName": "pwsh", + "name": "pwsh" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pwsh/Regex/Using-BalancedGroupDefintions.ps1 b/Pwsh/Regex/Using-BalancedGroupDefintions.ps1 new file mode 100644 index 0000000..6a1a665 --- /dev/null +++ b/Pwsh/Regex/Using-BalancedGroupDefintions.ps1 @@ -0,0 +1,64 @@ +Import-Module irregular +# new-Regex -Pattern (New-Regex -NotLiteralCharacter '(', ')') -Repeat * +$Re = @{} +$Ex = @{ + First = '( ext:ps1 ( dm:last2weeks ))' +} + +$re.scratch = @' +(?x) + ^ + # any of neither balanced character + [^()]* + + # open: lefty parens, closing: righty parens + \( + (?.*) + \) + + $ +'@ + +$re.Balance = @' +(?x) +^ +# https://regex101.com/r/dTWnVb/1 +# example: https://learn.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#balancing-group-definitions + +# any-non of neither balanced character +[^()]* + +# open: lefty parens, closing: righty parens + +(? \( ) +# any-non +[^()]* + +( + (? \( ) + # any-non + [^()]* +)+ +# Match a right angle bracket, assign the substring between the Open group and the current group to the Close group, and delete the definition of the Open group. +# +(? \) ) +[^()]* +( + (? \) ) + [^()]* +)+ + +( + ( + (? \( ) + [^<>]* + )+ + ( + (? \) ) + [^()]* + )+ +)* +(?(Open)(?!)) +$ +'@ +$re.Balance