From fdb4e9da4d035afc38f1941a84adf621fd8c2574 Mon Sep 17 00:00:00 2001 From: Zaid Date: Fri, 28 Aug 2020 20:14:51 +0200 Subject: [PATCH] Include FParsec source code and target netstandard2.0 --- NpgsqlFSharpAnalyzer.sln | 32 +- src/FParsec/AssemblyInfo.fs | 14 + src/FParsec/CharParsers.fs | 1617 +++++++ src/FParsec/CharParsers.fsi | 776 ++++ src/FParsec/Emit.fs | 575 +++ src/FParsec/Error.fs | 363 ++ src/FParsec/Error.fsi | 142 + src/FParsec/FParsec.fsproj | 32 + src/FParsec/Internals.fs | 404 ++ src/FParsec/Primitives.fs | 947 ++++ src/FParsec/Primitives.fsi | 422 ++ src/FParsec/Range.fs | 325 ++ src/FParsec/StaticMapping.fs | 839 ++++ src/FParsec/StaticMapping.fsi | 78 + src/FParsecCS/Buffer.cs | 233 + src/FParsecCS/CaseFoldTable.cs | 1557 +++++++ src/FParsecCS/CharSet.cs | 120 + src/FParsecCS/CharStream.cs | 3925 +++++++++++++++++ src/FParsecCS/CharStreamLT.cs | 1948 ++++++++ src/FParsecCS/Cloning.cs | 1981 +++++++++ src/FParsecCS/ErrorMessage.cs | 274 ++ src/FParsecCS/ErrorMessageList.cs | 111 + src/FParsecCS/Errors.cs | 120 + src/FParsecCS/FParsecCS.csproj | 15 + .../FastGenericEqualityERComparer.cs | 86 + src/FParsecCS/HexFloat.cs | 596 +++ src/FParsecCS/IdentifierValidator.cs | 709 +++ src/FParsecCS/ManyChars.cs | 255 ++ src/FParsecCS/OperatorPrecedenceParser.cs | 771 ++++ src/FParsecCS/Position.cs | 67 + src/FParsecCS/Properties/AssemblyInfo.cs | 32 + src/FParsecCS/Reply.cs | 79 + src/FParsecCS/StringBuffer.cs | 380 ++ src/FParsecCS/Strings.cs | 315 ++ src/FParsecCS/Text.cs | 679 +++ src/FParsecCS/UnmanagedMemoryPool.cs | 40 + .../NpgsqlFSharpAnalyzer.Core.fsproj | 1 + src/NpgsqlFSharpAnalyzer.Core/SqlAnalysis.fs | 7 +- .../NpgsqlFSharpParser.fsproj | 7 +- src/NpgsqlFSharpVs/NpgsqlFSharpVs.csproj | 22 +- src/NpgsqlFSharpVs/paket.references | 1 + .../NpgsqlFSharpAnalyzer.Tests.fsproj | 1 - 42 files changed, 20885 insertions(+), 13 deletions(-) create mode 100644 src/FParsec/AssemblyInfo.fs create mode 100644 src/FParsec/CharParsers.fs create mode 100644 src/FParsec/CharParsers.fsi create mode 100644 src/FParsec/Emit.fs create mode 100644 src/FParsec/Error.fs create mode 100644 src/FParsec/Error.fsi create mode 100644 src/FParsec/FParsec.fsproj create mode 100644 src/FParsec/Internals.fs create mode 100644 src/FParsec/Primitives.fs create mode 100644 src/FParsec/Primitives.fsi create mode 100644 src/FParsec/Range.fs create mode 100644 src/FParsec/StaticMapping.fs create mode 100644 src/FParsec/StaticMapping.fsi create mode 100644 src/FParsecCS/Buffer.cs create mode 100644 src/FParsecCS/CaseFoldTable.cs create mode 100644 src/FParsecCS/CharSet.cs create mode 100644 src/FParsecCS/CharStream.cs create mode 100644 src/FParsecCS/CharStreamLT.cs create mode 100644 src/FParsecCS/Cloning.cs create mode 100644 src/FParsecCS/ErrorMessage.cs create mode 100644 src/FParsecCS/ErrorMessageList.cs create mode 100644 src/FParsecCS/Errors.cs create mode 100644 src/FParsecCS/FParsecCS.csproj create mode 100644 src/FParsecCS/FastGenericEqualityERComparer.cs create mode 100644 src/FParsecCS/HexFloat.cs create mode 100644 src/FParsecCS/IdentifierValidator.cs create mode 100644 src/FParsecCS/ManyChars.cs create mode 100644 src/FParsecCS/OperatorPrecedenceParser.cs create mode 100644 src/FParsecCS/Position.cs create mode 100644 src/FParsecCS/Properties/AssemblyInfo.cs create mode 100644 src/FParsecCS/Reply.cs create mode 100644 src/FParsecCS/StringBuffer.cs create mode 100644 src/FParsecCS/Strings.cs create mode 100644 src/FParsecCS/Text.cs create mode 100644 src/FParsecCS/UnmanagedMemoryPool.cs diff --git a/NpgsqlFSharpAnalyzer.sln b/NpgsqlFSharpAnalyzer.sln index 014c755..03e99e5 100644 --- a/NpgsqlFSharpAnalyzer.sln +++ b/NpgsqlFSharpAnalyzer.sln @@ -33,7 +33,11 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NpgsqlFSharpVs", "src\Npgsq EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "NpgsqlFSharpAnalyzer.Core", "src\NpgsqlFSharpAnalyzer.Core\NpgsqlFSharpAnalyzer.Core.fsproj", "{5964BB56-97B8-4FAE-9933-8113DB11438D}" EndProject -Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "NpgsqlFSharpParser", "src\NpgsqlFSharpParser\NpgsqlFSharpParser.fsproj", "{BC524F8E-6282-4E31-9A0E-29FCE38832E7}" +Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "NpgsqlFSharpParser", "src\NpgsqlFSharpParser\NpgsqlFSharpParser.fsproj", "{BC524F8E-6282-4E31-9A0E-29FCE38832E7}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FParsecCS", "src\FParsecCS\FParsecCS.csproj", "{C5EB813F-4278-4EE7-925B-6757BAD0FE9B}" +EndProject +Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "FParsec", "src\FParsec\FParsec.fsproj", "{9C8E7641-9DC8-470C-8009-71A747C01DC5}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -117,6 +121,30 @@ Global {BC524F8E-6282-4E31-9A0E-29FCE38832E7}.Release|x64.Build.0 = Release|Any CPU {BC524F8E-6282-4E31-9A0E-29FCE38832E7}.Release|x86.ActiveCfg = Release|Any CPU {BC524F8E-6282-4E31-9A0E-29FCE38832E7}.Release|x86.Build.0 = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|x64.ActiveCfg = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|x64.Build.0 = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|x86.ActiveCfg = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Debug|x86.Build.0 = Debug|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|Any CPU.Build.0 = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|x64.ActiveCfg = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|x64.Build.0 = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|x86.ActiveCfg = Release|Any CPU + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B}.Release|x86.Build.0 = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|x64.ActiveCfg = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|x64.Build.0 = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|x86.ActiveCfg = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Debug|x86.Build.0 = Debug|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|Any CPU.Build.0 = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|x64.ActiveCfg = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|x64.Build.0 = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|x86.ActiveCfg = Release|Any CPU + {9C8E7641-9DC8-470C-8009-71A747C01DC5}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -129,6 +157,8 @@ Global {37577282-1289-40DB-AD3D-24499BD09DAE} = {C397A34C-84F1-49E7-AEBC-2F9F2B196216} {5964BB56-97B8-4FAE-9933-8113DB11438D} = {C397A34C-84F1-49E7-AEBC-2F9F2B196216} {BC524F8E-6282-4E31-9A0E-29FCE38832E7} = {C397A34C-84F1-49E7-AEBC-2F9F2B196216} + {C5EB813F-4278-4EE7-925B-6757BAD0FE9B} = {C397A34C-84F1-49E7-AEBC-2F9F2B196216} + {9C8E7641-9DC8-470C-8009-71A747C01DC5} = {C397A34C-84F1-49E7-AEBC-2F9F2B196216} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {BC821061-2FB3-4ABD-9FA1-044D4C59C475} diff --git a/src/FParsec/AssemblyInfo.fs b/src/FParsec/AssemblyInfo.fs new file mode 100644 index 0000000..25b6cec --- /dev/null +++ b/src/FParsec/AssemblyInfo.fs @@ -0,0 +1,14 @@ +namespace FParsec + +open System.Reflection +open System.Runtime.CompilerServices +open System.Runtime.InteropServices + +[] + +#if LOW_TRUST + [] + [] +#endif +[] +do () \ No newline at end of file diff --git a/src/FParsec/CharParsers.fs b/src/FParsec/CharParsers.fs new file mode 100644 index 0000000..44223a6 --- /dev/null +++ b/src/FParsec/CharParsers.fs @@ -0,0 +1,1617 @@ +// Copyright (c) Stephan Tolksdorf 2007-2019 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.CharParsers + +open System.Diagnostics +open System.Text +open System.Text.RegularExpressions +open System.Runtime.CompilerServices // for MethodImplAttribute + +#if LOW_TRUST +#else +open Microsoft.FSharp.NativeInterop +#endif + +open FParsec +open FParsec.Internals +open FParsec.Error +open FParsec.Primitives + +#nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." +#nowarn "51" // "The address-of operator may result in non-verifiable code." + +// ================ +// Helper functions +// ================ + +[] +let EOS = '\uffff' + +let foldCase = Text.FoldCase : string -> string +let normalizeNewlines = Text.NormalizeNewlines + +let floatToHexString = HexFloat.DoubleToHexString +let floatOfHexString = HexFloat.DoubleFromHexString + +let float32ToHexString = HexFloat.SingleToHexString +let float32OfHexString = HexFloat.SingleFromHexString + +// ======================== +// Running parsers on input +// ======================== + +[] +type ParserResult<'Result,'UserState> = + | Success of 'Result * 'UserState * Position + | Failure of string * ParserError * 'UserState + with + member private t.StructuredFormatDisplay = + match t with + | Success(r,_,_) -> + if typeof<'Result> = typeof then "Success: ()" + else sprintf "Success: %A" r + | Failure(msg,_,_) -> + sprintf "Failure:\n%s" msg + +let internal applyParser (parser: Parser<'Result,'UserState>) (stream: CharStream<'UserState>) = + let reply = parser stream + if reply.Status = Ok then + Success(reply.Result, stream.UserState, stream.Position) + else + let error = ParserError(stream.Position, stream.UserState, reply.Error) + Failure(error.ToString(stream), error, stream.UserState) + +let runParserOnString (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (chars: string) = + CharStream.ParseString(chars, 0, chars.Length, applyParser parser, ustate, streamName) + +let runParserOnSubstring (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (chars: string) (index: int) length = + CharStream.ParseString(chars, index, length, applyParser parser, ustate, streamName) + +let runParserOnStream (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (byteStream: System.IO.Stream) (encoding: System.Text.Encoding) = +#if LOW_TRUST + let +#else + use +#endif + stream = new CharStream<'UserState>(byteStream, encoding) + stream.UserState <- ustate + stream.Name <- streamName + applyParser parser stream + +#if PCL +#else +let runParserOnFile (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (path: string) (encoding: System.Text.Encoding) = +#if LOW_TRUST + let +#else + use +#endif + stream = new CharStream<'UserState>(path, encoding) + stream.UserState <- ustate + applyParser parser stream +#endif + +let run parser (string: string) = + runParserOnString parser () "" string + +// ======= +// Parsers +// ======= + +// ------------------------------------------------------------- +// Reading the input stream position and handling the user state +// ------------------------------------------------------------- + +let getPosition : Parser = + fun stream -> Reply(stream.Position) + +let getUserState : Parser<'u,'u> = + fun stream -> Reply(stream.UserState) + +let setUserState (newUserState: 'u) : Parser = + fun stream -> + stream.UserState <- newUserState + Reply(()) + +let updateUserState (f: 'u -> 'u) : Parser = + fun stream -> + stream.UserState <- f stream.UserState + Reply(()) + +let userStateSatisfies f : Parser = + fun stream -> + let status = if f stream.UserState then Ok else Error + Reply(status, (), NoErrorMessages) + +// -------------------- +// Parsing single chars +// -------------------- + +let newlineReturn result : Parser<_,'u> = + fun stream -> + if stream.SkipNewline() then Reply(result) + else Reply(Error, Errors.ExpectedNewline) + +let newline<'u> = newlineReturn '\n' : Parser<_,'u> +let skipNewline<'u> = newlineReturn () : Parser<_,'u> + +let unicodeNewlineReturn result : Parser<_,'u> = + fun stream -> + if stream.SkipUnicodeNewline() then Reply(result) + else Reply(Error, Errors.ExpectedNewline) + +let unicodeNewline<'u> = unicodeNewlineReturn '\n' : Parser<_,'u> +let skipUnicodeNewline<'u> = unicodeNewlineReturn () : Parser<_,'u> + +let internal charReturnE (c: char) result error : Parser<'a,'u> = + fun stream -> + if stream.Skip(c) then Reply(result) + else Reply(Error, error) + +let charReturn c result : Parser<'a,'u> = + match c with + | '\r' | '\n' -> newlineReturn result + | EOS -> invalidArg "c" "The char '\uffff' (EOS) is not a valid argument for the pchar/skipChar/charReturn parser. If you want to check for the end of the stream, consider using the `eof` parser." + | _ -> charReturnE c result (expectedString (string c)) + +let pchar c = charReturn c c +let skipChar c = charReturn c () + + +/// returns true for chars '\u000E' - '\ufffe' +let inline internal isCertainlyNoNLOrEOS (c: char) = + // '\n' = '\u000A', '\r' = '\u000D' + unativeint c - 0xEun < unativeint EOS - 0xEun + +let anyChar : Parser = + fun stream -> + let c = stream.ReadCharOrNewline() + if c <> EOS then Reply(c) + else Reply(Error, Errors.ExpectedAnyChar) + +let skipAnyChar : Parser = + fun stream -> + if stream.ReadCharOrNewline() <> EOS then Reply(()) + else Reply(Error, Errors.ExpectedAnyChar) + + +// doesn't check for newlines or EOS +let +#if NOINLINE +#else + inline +#endif + internal fastInlineSatisfyE f error : Parser = + fun stream -> + let c = stream.Peek() + if f c then + stream.Skip() + Reply(c) + else + Reply(Error, error) + +let internal satisfyE f error : Parser = + fun stream -> + let mutable reply = Reply() + match stream.Peek() with + | c when isCertainlyNoNLOrEOS c -> + if f c then + stream.Skip() + reply.Status <- Ok + reply.Result <- c + else + reply.Error <- error + | '\r' | '\n' -> + if f '\n' then + stream.SkipNewline() |> ignore + reply.Status <- Ok + reply.Result <- '\n' + else + reply.Error <- error + | c -> + if c <> EOS && f c then + stream.Skip() + reply.Status <- Ok + reply.Result <- c + else + reply.Error <- error + reply + +let internal skipSatisfyE f error : Parser = + fun stream -> + let mutable reply = Reply() + match stream.Peek() with + | c when isCertainlyNoNLOrEOS c -> + if f c then + stream.Skip() + reply.Status <- Ok + else + reply.Error <- error + | '\r' | '\n' -> + if f '\n' then + stream.SkipNewline() |> ignore + reply.Status <- Ok + else + reply.Error <- error + | c -> + if c <> EOS && f c then + stream.Skip() + reply.Status <- Ok + else + reply.Error <- error + reply + +let satisfy f = satisfyE f NoErrorMessages +let satisfyL f label = satisfyE f (expected label) + +let skipSatisfy f = skipSatisfyE f NoErrorMessages +let skipSatisfyL f label = skipSatisfyE f (expected label) + + +let private charsToString (chars: seq) = +#if PCL + match box chars with +#else + match chars with +#endif + | :? string as str -> str + | _ -> new string(Array.ofSeq chars) + +let private stringToChars (str: string) = +#if PCL + match box str with + | :? seq as chars -> chars + | _ -> seq { for i = 0 to str.Length - 1 do yield str.[i] } +#else + str +#endif + + +let isAnyOf (chars: seq) = +#if LOW_TRUST + let cs = new CharSet(charsToString chars) + fun c -> cs.Contains(c) +#else + #if USE_STATIC_MAPPING_FOR_IS_ANY_OF + StaticMapping.createStaticCharIndicatorFunction false chars + #else + let cs = new CharSet(charsToString chars) + fun c -> cs.Contains(c) + #endif +#endif + +let isNoneOf (chars: seq) = +#if LOW_TRUST + let cs = new CharSet(charsToString chars) + fun c -> not (cs.Contains(c)) +#else + #if USE_STATIC_MAPPING_FOR_IS_ANY_OF + StaticMapping.createStaticCharIndicatorFunction true chars + #else + let cs = new CharSet(charsToString chars) + fun c -> not (cs.Contains(c)) + #endif +#endif + +let anyOf (chars: seq) = + let str = charsToString chars + let chars = stringToChars str // PCL workaround + satisfyE (isAnyOf chars) (Errors.ExpectedAnyCharIn(str)) + +let skipAnyOf (chars: seq) = + let str = charsToString chars + let chars = stringToChars str // PCL workaround + skipSatisfyE (isAnyOf chars) (Errors.ExpectedAnyCharIn(str)) + +let noneOf (chars: seq) = + let str = charsToString chars + let chars = stringToChars str // PCL workaround + satisfyE (isNoneOf chars) (Errors.ExpectedAnyCharNotIn(str)) + +let skipNoneOf (chars: seq) = + let str = charsToString chars + let chars = stringToChars str // PCL workaround + skipSatisfyE (isNoneOf chars) (Errors.ExpectedAnyCharNotIn(str)) + +let inline isAsciiUpper (c: char) = + uint32 c - uint32 'A' <= uint32 'Z' - uint32 'A' + +let inline isAsciiLower (c: char) = + uint32 c - uint32 'a' <= uint32 'z' - uint32 'a' + +let inline isAsciiLetter (c: char) = + let cc = uint32 c ||| uint32 ' ' + cc - uint32 'a' <= uint32 'z' - uint32 'a' + +let inline isUpper (c: char) = + isAsciiUpper c || (c > '\u007F' && System.Char.IsUpper(c)) + +let inline isLower (c: char) = + isAsciiLower c || (c > '\u007F' && System.Char.IsLower(c)) + +let inline isLetter (c: char) = + isAsciiLetter c || (c > '\u007F' && System.Char.IsLetter(c)) + +let inline isDigit (c: char) = + uint32 c - uint32 '0' <= uint32 '9' - uint32 '0' + +let inline isHex (c: char) = + let cc = uint32 c ||| uint32 ' ' + isDigit c || cc - uint32 'a' <= uint32 'f' - uint32 'a' + +let inline isOctal (c: char) = + uint32 c - uint32 '0' <= uint32 '7' - uint32 '0' + +let asciiUpper stream = fastInlineSatisfyE isAsciiUpper Errors.ExpectedAsciiUppercaseLetter stream +let asciiLower stream = fastInlineSatisfyE isAsciiLower Errors.ExpectedAsciiLowercaseLetter stream +let asciiLetter stream = fastInlineSatisfyE isAsciiLetter Errors.ExpectedAsciiLetter stream + +// unicode is the default for letters and ascii the default for numbers +let upper stream = fastInlineSatisfyE isUpper Errors.ExpectedUppercaseLetter stream +let lower stream = fastInlineSatisfyE isLower Errors.ExpectedLowercaseLetter stream +let letter stream = fastInlineSatisfyE isLetter Errors.ExpectedLetter stream + +let digit stream = fastInlineSatisfyE isDigit Errors.ExpectedDecimalDigit stream +let hex stream = fastInlineSatisfyE isHex Errors.ExpectedHexadecimalDigit stream +let octal stream = fastInlineSatisfyE isOctal Errors.ExpectedOctalDigit stream + +let tab stream = fastInlineSatisfyE ((=) '\t') Errors.ExpectedTab stream + +let spaces : Parser = + fun stream -> + stream.SkipWhitespace() |> ignore + Reply(()) + +let spaces1 : Parser = + fun stream -> + if stream.SkipWhitespace() then Reply(()) + else Reply(Error, Errors.ExpectedWhitespace) + +let unicodeSpaces : Parser = + fun stream -> + stream.SkipUnicodeWhitespace() |> ignore + Reply(()) + +let unicodeSpaces1 : Parser = + fun stream -> + if stream.SkipUnicodeWhitespace() then Reply(()) + else Reply(Error, Errors.ExpectedWhitespace) + +let eof : Parser= + fun stream -> + if stream.IsEndOfStream then Reply(()) + else Reply(Error, Errors.ExpectedEndOfInput) + + +// ------------------------ +// Parsing strings directly +// ------------------------ + +let internal newlineOrEOSCharInStringArg name (arg: string) i = + let msg2 = match arg.[i] with + |'\r'|'\n' -> " may not contain newline chars ('\r' or '\n')." + | EOS -> " may not contain the char '\uffff' (EOS)" + | _ -> failwith "newlineOrEOSCharInStringArg" + raise (System.ArgumentException(concat3 "The string argument to " name msg2)) + +let internal checkStringContainsNoNewlineOrEOSChar s name = + let i = findNewlineOrEOSChar s + if i >= 0 then newlineOrEOSCharInStringArg name s i + +let stringReturn s result : Parser<'a,'u> = + let inline checkNoNewlineOrEOSChar c i = + if not (isCertainlyNoNLOrEOS c) then + match c with + |'\r'|'\n'|EOS -> newlineOrEOSCharInStringArg "pstring/skipString/stringReturn" s i + | _ -> () + + let error = expectedString s + match s.Length with + | 0 -> preturn result + | 1 -> + let c = s.[0] + checkNoNewlineOrEOSChar c 0 + charReturnE c result error + | 2 -> + let c0, c1 = s.[0], s.[1] + checkNoNewlineOrEOSChar c0 0 + checkNoNewlineOrEOSChar c1 1 + let cs = TwoChars(c0, c1) + fun stream -> + if stream.Skip(cs) then Reply(result) + else Reply(Error, error) + | _ -> + checkStringContainsNoNewlineOrEOSChar s "pstring/skipString/stringReturn" + fun stream -> + if stream.Skip(s) then Reply(result) + else Reply(Error, error) + +let pstring s = stringReturn s s +let skipString s = stringReturn s () + +let pstringCI s : Parser = + checkStringContainsNoNewlineOrEOSChar s "pstringCI" + let error = expectedStringCI s + let cfs = foldCase s + fun stream -> + let index0 = stream.IndexToken + if stream.SkipCaseFolded(cfs) then + Reply(stream.ReadFrom(index0)) + else Reply(Error, error) + +let stringCIReturn (s: string) result : Parser<'a,'u> = + let error = expectedStringCI s + if s.Length = 1 then + let c = s.[0] + if not (isCertainlyNoNLOrEOS c) then + match c with '\r'|'\n'|EOS -> newlineOrEOSCharInStringArg "skipStringCI/stringCIReturn" s 0 | _ -> () + let cfc = Text.FoldCase(c) + fun stream -> + if stream.SkipCaseFolded(cfc) then Reply(result) + else Reply(Error, error) + else + checkStringContainsNoNewlineOrEOSChar s "skipStringCI/stringCIReturn" + let cfs = foldCase s + fun stream -> + if stream.SkipCaseFolded(cfs) then Reply(result) + else Reply(Error, error) + +let skipStringCI s = stringCIReturn s () + + +let anyString n : Parser = + let error = Errors.ExpectedAnySequenceOfNChars(n) + fun stream -> + let state = stream.State + let str = stream.ReadCharsOrNewlines(n, true) + if str.Length = n then Reply(str) + else + stream.BacktrackTo(state) + Reply(Error, error) + +let skipAnyString n : Parser = + let error = Errors.ExpectedAnySequenceOfNChars(n) + fun stream -> + let state = stream.State + if stream.SkipCharsOrNewlines(n) = n then Reply(()) + else + stream.BacktrackTo(state) + Reply(Error, error) + +let restOfLine skipNewline : Parser<_,_> = + fun stream -> + Reply(stream.ReadRestOfLine(skipNewline)) + +let skipRestOfLine skipNewline : Parser<_,_> = + fun stream -> + stream.SkipRestOfLine(skipNewline) + Reply(()) + +let charsTillString (s: string) skipString maxCount : Parser = + checkStringContainsNoNewlineOrEOSChar s "charsTillString" + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + let error = Errors.CouldNotFindString(s) + fun stream -> + let mutable charsBeforeString = null + stream.SkipCharsOrNewlinesUntilString(s, maxCount, true, &charsBeforeString) |> ignore + if isNotNull charsBeforeString then + if skipString then stream.Skip(s.Length) + Reply(charsBeforeString) + else + Reply(Error, error) + +let charsTillStringCI (s: string) skipString maxCount : Parser = + checkStringContainsNoNewlineOrEOSChar s "charsTillStringCI" + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + let cfs = foldCase s + let error = Errors.CouldNotFindCaseInsensitiveString(s) + fun stream -> + let mutable charsBeforeString = null + stream.SkipCharsOrNewlinesUntilCaseFoldedString(cfs, maxCount, true, &charsBeforeString) |> ignore + if isNotNull charsBeforeString then + if skipString then stream.Skip(s.Length) + Reply(charsBeforeString) + else + Reply(Error, error) + + +let skipCharsTillString (s: string) skipString maxCount : Parser = + checkStringContainsNoNewlineOrEOSChar s "skipCharsTillString" + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + let error = Errors.CouldNotFindString(s) + fun stream -> + let mutable foundString = false + stream.SkipCharsOrNewlinesUntilString(s, maxCount, &foundString) |> ignore + if foundString then + if skipString then stream.Skip(s.Length) + Reply(()) + else + Reply(Error, error) + +let skipCharsTillStringCI (s: string) skipString maxCount : Parser = + checkStringContainsNoNewlineOrEOSChar s "skipCharsTillStringCI" + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + let cfs = foldCase s + let error = Errors.CouldNotFindCaseInsensitiveString(s) + fun stream -> + let mutable foundString = false + stream.SkipCharsOrNewlinesUntilCaseFoldedString(cfs, maxCount, &foundString) |> ignore + if foundString then + if skipString then stream.Skip(s.Length) + Reply(()) + else + Reply(Error, error) + +let +#if NOINLINE +#else + inline +#endif + internal manySatisfyImpl require1 (f1: char -> bool) (f: char -> bool) error : Parser = + fun stream -> + let str = stream.ReadCharsOrNewlinesWhile(f1, f, true) + if not require1 || str.Length <> 0 then Reply(str) + else Reply(Error, error) + +let +#if NOINLINE +#else + inline +#endif + internal skipManySatisfyImpl require1 (f1: char -> bool) (f: char -> bool) error : Parser = + fun stream -> + let n = stream.SkipCharsOrNewlinesWhile(f1, f) + if not require1 || n <> 0 then Reply(()) + else Reply(Error, error) + +let manySatisfy2 f1 f = manySatisfyImpl false f1 f NoErrorMessages +let many1Satisfy2 f1 f = manySatisfyImpl true f1 f NoErrorMessages +let many1Satisfy2L f1 f label = manySatisfyImpl true f1 f (expected label) + +let skipManySatisfy2 f1 f = skipManySatisfyImpl false f1 f NoErrorMessages +let skipMany1Satisfy2 f1 f = skipManySatisfyImpl true f1 f NoErrorMessages +let skipMany1Satisfy2L f1 f label = skipManySatisfyImpl true f1 f (expected label) + +let manySatisfy f = manySatisfy2 f f +let many1Satisfy f = many1Satisfy2 f f +let many1SatisfyL f label = many1Satisfy2L f f label + +let skipManySatisfy f = skipManySatisfy2 f f +let skipMany1Satisfy f = skipMany1Satisfy2 f f +let skipMany1SatisfyL f label = skipMany1Satisfy2L f f label + + +let internal manyMinMaxSatisfy2E minCount maxCount f1 f error : Parser = + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + if minCount > 0 then + fun stream -> + let str = stream.ReadCharsOrNewlinesWhile(f1, f, minCount, maxCount, true) + if str.Length <> 0 then Reply(str) + else Reply(Error, error) + else + fun stream -> + Reply(stream.ReadCharsOrNewlinesWhile(f1, f, 0, maxCount, true)) + +let internal skipManyMinMaxSatisfy2E minCount maxCount f1 f error : Parser = + if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) + if minCount > 0 then + fun stream -> + let n = stream.SkipCharsOrNewlinesWhile(f1, f, minCount, maxCount) + if n <> 0 then Reply(()) + else Reply(Error, error) + else + fun stream -> + stream.SkipCharsOrNewlinesWhile(f1, f, 0, maxCount) |> ignore + Reply(()) + +let manyMinMaxSatisfy minCount maxCount f = manyMinMaxSatisfy2E minCount maxCount f f NoErrorMessages +let manyMinMaxSatisfyL minCount maxCount f label = manyMinMaxSatisfy2E minCount maxCount f f (expected label) +let manyMinMaxSatisfy2 minCount maxCount f1 f = manyMinMaxSatisfy2E minCount maxCount f1 f NoErrorMessages +let manyMinMaxSatisfy2L minCount maxCount f1 f label = manyMinMaxSatisfy2E minCount maxCount f1 f (expected label) + +let skipManyMinMaxSatisfy minCount maxCount f = skipManyMinMaxSatisfy2E minCount maxCount f f NoErrorMessages +let skipManyMinMaxSatisfyL minCount maxCount f label = skipManyMinMaxSatisfy2E minCount maxCount f f (expected label) +let skipManyMinMaxSatisfy2 minCount maxCount f1 f = skipManyMinMaxSatisfy2E minCount maxCount f1 f NoErrorMessages +let skipManyMinMaxSatisfy2L minCount maxCount f1 f label = skipManyMinMaxSatisfy2E minCount maxCount f1 f (expected label) + + +let internal regexE pattern error : Parser = + let regex = new Regex("\\A" + pattern, RegexOptions.Multiline ||| + RegexOptions.ExplicitCapture) + fun stream -> + let m = stream.Match(regex) + if m.Success then + let str = m.Value + if findNewlineOrEOSChar str < 0 then + if str.Length <> 0 then stream.Skip(str.Length) + Reply(str) + else + let nStr = normalizeNewlines str + let mutable nSkippedChars = 0 + let n = stream.SkipCharsOrNewlines(nStr.Length) + if n = nStr.Length then Reply(nStr) + else Reply(FatalError, messageError "Internal error in the regex parser. Please report this error to fparsec@quanttec.com.") + else Reply(Error, error) + +let regex pattern = regexE pattern (Errors.ExpectedStringMatchingRegex(pattern)) +let regexL pattern label = regexE pattern (expected label) + +type private IdFlags = IdentifierValidator.IdentifierCharFlags + +type IdentifierOptions(?isAsciiIdStart, ?isAsciiIdContinue, + #if PCL + #else + ?normalization, + ?normalizeBeforeValidation, + #endif + ?allowJoinControlChars, ?preCheckStart, ?preCheckContinue, ?allowAllNonAsciiCharsInPreCheck, ?label, ?invalidCharMessage) = + // we use match instead of defaultArg here, so that the function wrapper objects only get constructed when needed + let isAsciiIdStart = match isAsciiIdStart with Some v -> v | _ -> IdentifierValidator.IsXIdStartOrSurrogate + let isAsciiIdContinue = match isAsciiIdContinue with Some v -> v | _ -> IdentifierValidator.IsXIdContinueOrSurrogate +#if PCL +#else + let normalizationForm = defaultArg normalization (enum 0) + let normalizeBeforeValidation = defaultArg normalizeBeforeValidation false +#endif + let allowJoinControlChars = defaultArg allowJoinControlChars false + let expectedIdentifierError = expected (defaultArg label Strings.Identifier) + let invalidCharError = messageError (defaultArg invalidCharMessage Strings.IdentifierContainsInvalidCharacterAtIndicatedPosition) + let allowAllNonAsciiCharsInPreCheck = defaultArg allowAllNonAsciiCharsInPreCheck false + + let preCheckStart = if preCheckStart.IsSome then preCheckStart.Value + elif allowAllNonAsciiCharsInPreCheck then isAsciiIdStart + else Unchecked.defaultof<_> + let preCheckContinue = if preCheckContinue.IsSome then preCheckContinue.Value + elif allowAllNonAsciiCharsInPreCheck then isAsciiIdContinue + else Unchecked.defaultof<_> + + let asciiOptions = Array.zeroCreate 128 + do for i = 1 to 127 do + let c = char i + let mutable v = IdFlags.None + if isAsciiIdStart c then v <- v ||| IdFlags.NonContinue + if isAsciiIdContinue c then v <- v ||| IdFlags.Continue + if allowAllNonAsciiCharsInPreCheck then + if preCheckStart c then v <- v ||| IdFlags.PreCheckNonContinue + if preCheckContinue c then v <- v ||| IdFlags.PreCheckContinue + asciiOptions.[i] <- v + + let iv = new IdentifierValidator(asciiOptions) + do + #if PCL + #else + iv.NormalizationForm <- normalizationForm + iv.NormalizeBeforeValidation <- normalizeBeforeValidation + #endif + iv.AllowJoinControlCharsAsIdContinueChars <- allowJoinControlChars + + let preCheck1 = + if allowAllNonAsciiCharsInPreCheck then + fun c -> let i = int c + if i <= 0x7f then + // not (x = y) currently yields better code here than (x <> y) + not (asciiOptions.[int c] &&& IdFlags.PreCheckNonContinue = IdFlags.None) + else true + elif isNotNull preCheckStart then preCheckStart + else iv.IsIdStartOrSurrogateFunc + + let preCheck = + if allowAllNonAsciiCharsInPreCheck then + fun c -> let i = int c + if i <= 0x7f then + not (asciiOptions.[i] &&& IdFlags.PreCheckContinue = IdFlags.None) + else true + elif isNotNull preCheckContinue then preCheckContinue + else iv.IsIdContinueOrJoinControlOrSurrogateFunc + + member internal t.IdentifierValidator = iv + member internal t.PreCheck1 = preCheck1 + member internal t.PreCheck = preCheck + member internal t.ExpectedIdentifierError = expectedIdentifierError + member internal t.InvalidCharError = invalidCharError + +let identifier (identifierOptions: IdentifierOptions) : Parser = + let validator = identifierOptions.IdentifierValidator + let preCheck1 = identifierOptions.PreCheck1 + let preCheck = identifierOptions.PreCheck + let expectedIdentifierError = identifierOptions.ExpectedIdentifierError + let invalidCharError = identifierOptions.InvalidCharError + fun stream -> + let str = stream.ReadCharsOrNewlinesWhile(preCheck1, preCheck, true) + if str.Length <> 0 then + let mutable errorPos = 0 + let nstr = validator.ValidateAndNormalize(str, &errorPos) + if isNotNull nstr then Reply(nstr) + else + stream.Skip(errorPos - str.Length) + Reply(FatalError, invalidCharError) + else + Reply(Error, expectedIdentifierError) + +// ---------------------------------------------- +// Parsing strings with the help of other parsers +// ---------------------------------------------- + +let manyChars2 p1 p = ManyChars(p1, p).AsFSharpFunc +let manyChars p = manyChars2 p p + +let many1Chars2 p1 p = Many1Chars(p1, p).AsFSharpFunc +let many1Chars p = many1Chars2 p p + +let manyCharsTillApply2 p1 p endp f = ManyCharsTill(p1, p, endp, f).AsFSharpFunc +let manyCharsTillApply p endp f = manyCharsTillApply2 p p endp f +let manyCharsTill2 p1 p endp = manyCharsTillApply2 p1 p endp (fun str _ -> str) +let manyCharsTill p endp = manyCharsTill2 p p endp + +let many1CharsTillApply2 p1 p endp f = Many1CharsTill(p1, p, endp, f).AsFSharpFunc +let many1CharsTillApply p endp f = many1CharsTillApply2 p p endp f +let many1CharsTill2 p1 p endp = many1CharsTillApply2 p1 p endp (fun str _ -> str) +let many1CharsTill p endp = many1CharsTill2 p p endp + + + +let +#if NOINLINE +#else + inline +#endif + internal manyStringsImpl require1 (p1: Parser) (p: Parser) : Parser = + fun stream -> + let mutable stateTag = stream.StateTag + let mutable reply = p1 stream + if reply.Status = Ok then + let result1 = reply.Result + let mutable error = reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status <> Ok then reply.Result <- result1 + else + let result2 = reply.Result + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status <> Ok then reply.Result <- result1 + result2 + else + let result3 = reply.Result + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status <> Ok then reply.Result <- concat3 result1 result2 result3 + else + let result4 = reply.Result + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status <> Ok then reply.Result <- concat4 result1 result2 result3 result4 + else + let n = 2*(result1.Length + result2.Length + result3.Length + result4.Length) + reply.Result.Length + let sb = new StringBuilder(n) + sb.Append(result1).Append(result2).Append(result3).Append(result4).Append(reply.Result) |> ignore + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + while reply.Status = Ok do + if stateTag = stream.StateTag then + raiseInfiniteLoopException "manyStrings" stream + error <- reply.Error + sb.Append(reply.Result) |> ignore + stateTag <- stream.StateTag + reply <- p stream + reply.Result <- sb.ToString() + // We assume that the string parser changes the state when it succeeds, + // so we don't need to merge more than 2 error message lists. + if stateTag = stream.StateTag then + if reply.Status = Error then + reply.Status <- Ok + if isNotNull error then + reply.Error <- mergeErrors error reply.Error + elif not require1 && reply.Status = Error && stateTag = stream.StateTag then + reply.Status <- Ok + reply.Result <- "" + reply + +let manyStrings2 p1 p = manyStringsImpl false p1 p +let manyStrings p = manyStrings2 p p +let many1Strings2 p1 p = manyStringsImpl true p1 p +let many1Strings p = many1Strings2 p p + +let +#if NOINLINE +#else + inline +#endif + internal stringsSepByImpl require1 (p: Parser) (sep: Parser) : Parser = + fun stream -> + let mutable stateTag = stream.StateTag + let mutable reply = p stream + if reply.Status = Ok then + let result1 = reply.Result + let mutable error = reply.Error + stateTag <- stream.StateTag + reply <- sep stream + if reply.Status <> Ok then + if stateTag = stream.StateTag then + if reply.Status = Error then + reply.Status <- Ok + reply.Result <- result1 + if isNotNull error then + reply.Error <- mergeErrors error reply.Error + else + // We assume that at least one of the parsers sep and p consume + // input when both are called consecutively and succeed. This + // way we only have to merge a maximum of 3 error message lists. + let mutable result = null + let mutable error0 = error + let mutable stateTag0 = stateTag + let result2 = reply.Result + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status = Ok then + let result3 = reply.Result + error0 <- error + stateTag0 <- stateTag + error <- reply.Error + stateTag <- stream.StateTag + reply <- sep stream + if reply.Status <> Ok then result <- concat3 result1 result2 result3 + else + let result4 = reply.Result + error0 <- error + stateTag0 <- stateTag + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if reply.Status = Ok then + let n = 2*(result1.Length + result2.Length + result3.Length + result4.Length) + reply.Result.Length + let sb = new StringBuilder(n) + sb.Append(result1).Append(result2).Append(result3).Append(result4) |> ignore + while reply.Status = Ok do + sb.Append(reply.Result) |> ignore + error0 <- error + stateTag0 <- stateTag + error <- reply.Error + stateTag <- stream.StateTag + reply <- sep stream + if reply.Status <> Ok then result <- sb.ToString() + else + sb.Append(reply.Result) |> ignore + if stateTag0 = stream.StateTag then + raiseInfiniteLoopException "stringsSepBy" stream + error0 <- error + stateTag0 <- stateTag + error <- reply.Error + stateTag <- stream.StateTag + reply <- p stream + if stateTag = stream.StateTag then + if isNotNull result && reply.Status = Error then + reply.Status <- Ok + reply.Result <- result + error <- mergeErrors error reply.Error + if stateTag0 = stateTag then + error <- mergeErrors error0 error + reply.Error <- error + elif not require1 && reply.Status = Error && stateTag = stream.StateTag then + reply.Status <- Ok + reply.Result <- "" + reply + +let stringsSepBy p sep = stringsSepByImpl false p sep +let stringsSepBy1 p sep = stringsSepByImpl true p sep + +let skipped (p: Parser) : Parser = + fun stream -> + let index0 = stream.IndexToken + let line0 = stream.Line + let reply = p stream + if reply.Status = Ok then + let str = stream.ReadFrom(index0) + let nstr = if line0 = stream.Line then str + else Text.NormalizeNewlines(str) + Reply(Ok, nstr, reply.Error) + else + Reply(reply.Status, reply.Error) + +let withSkippedString (f: string -> 'a -> 'b) (p: Parser<'a,'u>) : Parser<'b,'u> = + let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) + fun stream -> + let index0 = stream.IndexToken + let line0 = stream.Line + let reply = p stream + if reply.Status = Ok then + let str = stream.ReadFrom(index0) + let nstr = if line0 = stream.Line then str + else Text.NormalizeNewlines(str) + let result = optF.Invoke(nstr, reply.Result) + Reply(Ok, result, reply.Error) + else + Reply(reply.Status, reply.Error) + +// --------------- +// Parsing numbers +// --------------- + +[] +type NumberLiteralOptions = + | None = 0 + | AllowSuffix = 0b000000000001 + | AllowMinusSign = 0b000000000010 + | AllowPlusSign = 0b000000000100 + | AllowFraction = 0b000000001000 + | AllowFractionWOIntegerPart = 0b000000010000 + | AllowExponent = 0b000000100000 + | AllowHexadecimal = 0b000001000000 + | AllowBinary = 0b000010000000 + | AllowOctal = 0b000100000000 + | AllowInfinity = 0b001000000000 + | AllowNaN = 0b010000000000 + + | IncludeSuffixCharsInString = 0b100000000000 + + | DefaultInteger = 0b000111000110 + | DefaultUnsignedInteger = 0b000111000000 + | DefaultFloat = 0b011001101110 + +type internal NLO = NumberLiteralOptions + +[] +type NumberLiteralResultFlags = + | None = 0 + | SuffixLengthMask = 0b0000000000001111 + | HasMinusSign = 0b0000000000010000 + | HasPlusSign = 0b0000000000100000 + | HasIntegerPart = 0b0000000001000000 + | HasFraction = 0b0000000010000000 + | HasExponent = 0b0000000100000000 + | IsDecimal = 0b0000001000000000 + | IsHexadecimal = 0b0000010000000000 + | IsBinary = 0b0000100000000000 + | IsOctal = 0b0001000000000000 + | BaseMask = 0b0001111000000000 + | IsInfinity = 0b0010000000000000 + | IsNaN = 0b0100000000000000 + +type internal NLF = NumberLiteralResultFlags + +type NumberLiteral(string, info, suffixChar1, suffixChar2, suffixChar3, suffixChar4) = + member t.String = string + + member t.SuffixLength = int (info &&& NLF.SuffixLengthMask) + member t.SuffixChar1 = suffixChar1 + member t.SuffixChar2 = suffixChar2 + member t.SuffixChar3 = suffixChar3 + member t.SuffixChar4 = suffixChar4 + + member t.Info = info + + member t.HasMinusSign = int (info &&& NLF.HasMinusSign) <> 0 + member t.HasPlusSign = int (info &&& NLF.HasPlusSign) <> 0 + member t.HasIntegerPart = int (info &&& NLF.HasIntegerPart) <> 0 + member t.HasFraction = int (info &&& NLF.HasFraction) <> 0 + member t.HasExponent = int (info &&& NLF.HasExponent) <> 0 + member t.IsInteger = int (info &&& (NLF.HasFraction ||| NLF.HasExponent)) = 0 // HasIntegerPart must be set if HasFraction and HasExponent both aren't + member t.IsDecimal = int (info &&& NLF.IsDecimal) <> 0 + member t.IsHexadecimal = int (info &&& NLF.IsHexadecimal) <> 0 + member t.IsBinary = int (info &&& NLF.IsBinary) <> 0 + member t.IsOctal = int (info &&& NLF.IsOctal) <> 0 + member t.IsNaN = int (info &&& NLF.IsNaN) <> 0 + member t.IsInfinity = int (info &&& NLF.IsInfinity) <> 0 + + override t.Equals(other: obj) = + match other with + | :? NumberLiteral as other -> + t.String = other.String + && t.Info = other.Info + && t.SuffixChar1 = other.SuffixChar1 + && t.SuffixChar2 = other.SuffixChar2 + && t.SuffixChar3 = other.SuffixChar3 + && t.SuffixChar4 = other.SuffixChar4 + | _ -> false + + override t.GetHashCode() = + if isNotNull string then string.GetHashCode() else 0 + +let numberLiteralE (opt: NumberLiteralOptions) (errorInCaseNoLiteralFound: ErrorMessageList) (stream: CharStream<'u>) = + let index0 = stream.IndexToken + let stateTag = stream.StateTag + let mutable c = stream.Peek() + let mutable error = NoErrorMessages + let mutable flags = NLF.None + + if c = '-' && (opt &&& NLO.AllowMinusSign) <> NLO.None then + flags <- NLF.HasMinusSign + c <- stream.SkipAndPeek() + elif c = '+' && (opt &&& NLO.AllowPlusSign) <> NLO.None then + flags <- NLF.HasPlusSign + c <- stream.SkipAndPeek() + + let allowStartingPoint = NLO.AllowFraction ||| NLO.AllowFractionWOIntegerPart // for starting point both flags are required + + if isDigit c || (c = '.' && (opt &&& allowStartingPoint) = allowStartingPoint) then + let mutable c1 = '\u0000' + if c <> '0' + || (c1 <- stream.SkipAndPeek(); + c1 <= '9' + || (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None + || ((int c1 ||| int ' ') = int 'e')) + then + flags <- flags ||| NLF.IsDecimal + if c <> '.' then + flags <- flags ||| NLF.HasIntegerPart + if c <> '0' then + c <- stream.SkipAndPeek() + else + c <- c1 + while isDigit c do + c <- stream.SkipAndPeek() + if c = '.' && (opt &&& NLO.AllowFraction) <> NLO.None then + flags <- flags ||| NLF.HasFraction + c <- stream.SkipAndPeek() + if isDigit c then + c <- stream.SkipAndPeek() + elif (flags &&& NLF.HasIntegerPart) = NLF.None then + // at least one digit before or after the . is required + error <- Errors.ExpectedDecimalDigit + while isDigit c do + c <- stream.SkipAndPeek() + if (int c ||| int ' ') = int 'e' && isNull error && (opt &&& NLO.AllowExponent) <> NLO.None then + flags <- flags ||| NLF.HasExponent + c <- stream.SkipAndPeek() + if c = '-' || c = '+' then + c <- stream.SkipAndPeek() + if not (isDigit c) then + error <- Errors.ExpectedDecimalDigit + while isDigit c do + c <- stream.SkipAndPeek() + else + match int c1 ||| int ' ' with + | 0x78 (* 'x' *) when (opt &&& NLO.AllowHexadecimal) <> NLO.None -> + flags <- flags ||| NLF.IsHexadecimal + c <- stream.SkipAndPeek() + if isHex c then + flags <- flags ||| NLF.HasIntegerPart + c <- stream.SkipAndPeek() + elif (opt &&& NLO.AllowFractionWOIntegerPart) = NLO.None then + // integer part required + error <- Errors.ExpectedHexadecimalDigit + while isHex c do + c <- stream.SkipAndPeek() + if c = '.' && isNull error && (opt &&& NLO.AllowFraction) <> NLO.None then + flags <- flags ||| NLF.HasFraction + c <- stream.SkipAndPeek() + if isHex c then + c <- stream.SkipAndPeek() + elif (flags &&& NLF.HasIntegerPart) = NLF.None then + // at least one digit before or after the . is required + error <- Errors.ExpectedHexadecimalDigit + while isHex c do + c <- stream.SkipAndPeek() + elif (flags &&& NLF.HasIntegerPart) = NLF.None then + // we neither have an integer part nor a fraction + error <- Errors.ExpectedHexadecimalDigit + if (int c ||| int ' ') = int 'p' && isNull error && (opt &&& NLO.AllowExponent) <> NLO.None then + flags <- flags ||| NLF.HasExponent + c <- stream.SkipAndPeek() + if c = '-' || c = '+' then + c <- stream.SkipAndPeek() + if not (isDigit c) then + error <- Errors.ExpectedDecimalDigit + while isDigit c do + c <- stream.SkipAndPeek() + | 0x6f (* 'o' *) when (opt &&& NLO.AllowOctal) <> NLO.None -> + flags <- flags ||| NLF.IsOctal + c <- stream.SkipAndPeek() + if isOctal c then + flags <- flags ||| NLF.HasIntegerPart + c <- stream.SkipAndPeek() + else + error <- Errors.ExpectedOctalDigit + while isOctal c do + c <- stream.SkipAndPeek() + | 0x62 (* 'b' *) when (opt &&& NLO.AllowBinary) <> NLO.None -> + flags <- flags ||| NLF.IsBinary + c <- stream.SkipAndPeek() + if c = '0' || c = '1' then + flags <- flags ||| NLF.HasIntegerPart + c <- stream.SkipAndPeek() + else + error <- Errors.ExpectedBinaryDigit + while c = '0' || c = '1' do + c <- stream.SkipAndPeek() + | _ -> + flags <- flags ||| (NLF.IsDecimal ||| NLF.HasIntegerPart) + c <- c1 + + if isNull error then + if (opt &&& NLO.AllowSuffix) = NLO.None || not (isAsciiLetter c) then + let str = stream.ReadFrom(index0) + Reply(NumberLiteral(str, flags, EOS, EOS, EOS, EOS)) + else + let mutable str = if (opt &&& NLO.IncludeSuffixCharsInString) <> NLO.None then null + else stream.ReadFrom(index0) + let mutable nSuffix = 1 + let mutable s1 = c + let mutable s2 = EOS + let mutable s3 = EOS + let mutable s4 = EOS + c <- stream.SkipAndPeek() + if isAsciiLetter c then + nSuffix <- 2 + s2 <- c + c <- stream.SkipAndPeek() + if isAsciiLetter c then + nSuffix <- 3 + s3 <- c + c <- stream.SkipAndPeek() + if isAsciiLetter c then + nSuffix <- 4 + s4 <- c + c <- stream.SkipAndPeek() + flags <- flags ||| (enum) nSuffix + if (opt &&& NLO.IncludeSuffixCharsInString) <> NLO.None then + str <- stream.ReadFrom(index0) + Reply(NumberLiteral(str, flags, s1, s2, s3, s4)) + else + Reply(Error, error) + else + let cc = int c ||| int ' ' + if + if cc = int 'i' then + (opt &&& NLO.AllowInfinity) <> NLO.None + && stream.SkipCaseFolded("inf") && (flags <- flags ||| NLF.IsInfinity + stream.SkipCaseFolded("inity") |> ignore + true) + elif cc = int 'n' then + (opt &&& NLO.AllowNaN) <> NLO.None + && stream.SkipCaseFolded("nan") && (flags <- flags ||| NLF.IsNaN + true) + else false + then + let str = stream.ReadFrom(index0) + Reply(NumberLiteral(str, flags, EOS, EOS, EOS, EOS)) + else + if flags &&& (NLF.HasMinusSign ||| NLF.HasPlusSign) <> NLF.None then + stream.Seek(index0) + stream.StateTag <- stateTag + Reply(Error, errorInCaseNoLiteralFound) + +let numberLiteral opt label = numberLiteralE opt (expected label) + +let pfloat : Parser = + fun stream -> + let reply = numberLiteralE NLO.DefaultFloat Errors.ExpectedFloatingPointNumber stream + if reply.Status = Ok then + let nl = reply.Result + try + let d = if nl.IsDecimal then + System.Double.Parse(nl.String, System.Globalization.CultureInfo.InvariantCulture) + elif nl.IsHexadecimal then + floatOfHexString nl.String + elif nl.IsInfinity then + if nl.HasMinusSign then System.Double.NegativeInfinity else System.Double.PositiveInfinity + else + System.Double.NaN + Reply(d) + with + | :? System.OverflowException -> + Reply(if nl.HasMinusSign then System.Double.NegativeInfinity else System.Double.PositiveInfinity) + | :? System.FormatException -> + stream.Skip(-nl.String.Length) + Reply(FatalError, messageError "The floating-point number has an invalid format (this error is unexpected, please report this error message to fparsec@quanttec.com).") + else + Reply(reply.Status, reply.Error) + +let internal parseUInt64 (c0: char) (stream: CharStream<'u>) (status: ReplyStatus byref) (error: ErrorMessageList byref) = + Debug.Assert(isDigit c0 && (status = Ok)) + + // we rely on the compiler eliminating inactive branches + let opt = NumberLiteralOptions.DefaultUnsignedInteger + let limit10 = 1844674407370955160UL //(System.UInt64.MaxValue - 9UL)/10UL + let maxDiv10 = 1844674407370955161UL //System.UInt64.MaxValue/10UL + let maxMod10 = 5u //System.UInt64.MaxValue%10UL + + let limit16 = 1152921504606846975UL //(System.UInt64.MaxValue - 15UL)/16UL + let maxDiv16 = 1152921504606846975UL //System.UInt64.MaxValue/16UL + let maxMod16 = 15u //System.UInt64.MaxValue%16UL + + let limit8 = 2305843009213693951UL //(System.UInt64.MaxValue - 7UL)/8UL + let maxDiv8 = 2305843009213693951UL //System.UInt64.MaxValue/8UL + let maxMod8 = 7u //System.UInt64.MaxValue%8UL + + let limit2 = 9223372036854775807UL //(System.UInt64.MaxValue - 1UL)/2UL + let maxDiv2 = 9223372036854775807UL //System.UInt64.MaxValue/2UL + let maxMod2 = 1u //System.UInt64.MaxValue%2UL + + let mutable n = 0UL + let mutable c = c0 + let c1 = stream.SkipAndPeek() + + if (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None + || c <> '0' || c1 <= '9' + then + n <- uint64 (uint32 c - uint32 '0') + c <- c1 + while c >= '0' && c <= '9' do + let nc = uint32 c - uint32 '0' + if n <= limit10 || (maxMod10 < 9u && n = maxDiv10 && nc <= maxMod10) then + n <- 10UL*n + uint64 nc + c <- stream.SkipAndPeek() + else + status <- FatalError + c <- '!' // break + + else + let cc1 = uint32 c1 ||| uint32 ' ' + if (opt &&& NLO.AllowHexadecimal) <> NLO.None && cc1 = uint32 'x' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 0 + if (let cc = uint32 c ||| uint32 ' ' + if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' + else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) // 0x57u = uint32 'a' - 10u + then + n <- uint64 nc + c <- stream.SkipAndPeek() + while + (let cc = uint32 c ||| uint32 ' ' + if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' + else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) + do + if n <= limit16 || (maxMod16 < 15u && n = maxDiv16 && nc <= maxMod16) then + n <- 16UL*n + uint64 nc + c <- stream.SkipAndPeek() + else + status <- FatalError + c <- '!' // break + else + status <- Error + error <- Errors.ExpectedHexadecimalDigit + + elif (opt &&& NLO.AllowOctal) <> NLO.None && cc1 = uint32 'o' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 c - uint32 '0' + if nc = (nc &&& 7u) then + n <- uint64 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + while nc = (nc &&& 7u) do + if n <= limit8 || (maxMod8 < 7u && n = maxDiv8 && nc <= maxMod8) then + n <- 8UL*n + uint64 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + else + status <- FatalError + nc <- 11u // break + else + status <- Error + error <- Errors.ExpectedOctalDigit + + elif (opt &&& NLO.AllowBinary) <> NLO.None && cc1 = uint32 'b' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 c - uint32 '0' + if nc = (nc &&& 1u) then + n <- uint64 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + while nc = (nc &&& 1u) do + if n <= limit2 || (maxMod2 = 0u && n = maxDiv2 && nc = 0u) then + n <- 2UL*n + uint64 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + else + status <- FatalError + nc <- 11u // break + else + status <- Error + error <- Errors.ExpectedBinaryDigit + // else c = 0 && not (isDigit c1) + n + +let internal parseUInt32 (c0: char) (stream: CharStream<'u>) (status: ReplyStatus byref) (error: ErrorMessageList byref) = + Debug.Assert(isDigit c0 && (status = Ok)) + + // we rely on the compiler eliminating inactive branches + let opt = NumberLiteralOptions.DefaultUnsignedInteger + let limit10 = 429496728u //(System.UInt32.MaxValue - 9u)/10u + let maxDiv10 = 429496729u //System.UInt32.MaxValue/10u + let maxMod10 = 5u //System.UInt32.MaxValue%10u + + let limit16 = 268435455u //(System.UInt32.MaxValue - 15u)/16u + let maxDiv16 = 268435455u //System.UInt32.MaxValue/16u + let maxMod16 = 15u //System.UInt32.MaxValue%16u + + let limit8 = 536870911u //(System.UInt32.MaxValue - 7u)/8u + let maxDiv8 = 536870911u //System.UInt32.MaxValue/8u + let maxMod8 = 7u //System.UInt32.MaxValue%8u + + let limit2 = 2147483647u //(System.UInt32.MaxValue - 1u)/2u + let maxDiv2 = 2147483647u //System.UInt32.MaxValue/2u + let maxMod2 = 1u //System.UInt32.MaxValue%2u + + let mutable n = 0u + let mutable c = c0 + let c1 = stream.SkipAndPeek() + + if (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None + || c <> '0' || c1 <= '9' + then + n <- uint32 c - uint32 '0' + c <- c1 + while c >= '0' && c <= '9' do + let nc = uint32 c - uint32 '0' + if n <= limit10 || (maxMod10 < 9u && n = maxDiv10 && nc <= maxMod10) then + n <- 10u*n + nc + c <- stream.SkipAndPeek() + else + status <- FatalError + c <- '!' // break + + else + let cc1 = uint32 c1 ||| uint32 ' ' + if (opt &&& NLO.AllowHexadecimal) <> NLO.None && cc1 = uint32 'x' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 0 + if (let cc = uint32 c ||| uint32 ' ' + if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' + else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) // 0x57u = uint32 'a' - 10u + then + n <- uint32 nc + c <- stream.SkipAndPeek() + while + (let cc = uint32 c ||| uint32 ' ' + if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' + else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) + do + if n <= limit16 || (maxMod16 < 15u && n = maxDiv16 && nc <= maxMod16) then + n <- 16u*n + nc + c <- stream.SkipAndPeek() + else + status <- FatalError + c <- '!' // break + else + status <- Error + error <- Errors.ExpectedHexadecimalDigit + + elif (opt &&& NLO.AllowOctal) <> NLO.None && cc1 = uint32 'o' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 c - uint32 '0' + if nc = (nc &&& 7u) then + n <- uint32 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + while nc = (nc &&& 7u) do + if n <= limit8 || (maxMod8 < 7u && n = maxDiv8 && nc <= maxMod8) then + n <- 8u*n + nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + else + status <- FatalError + nc <- 11u // break + else + status <- Error + error <- Errors.ExpectedOctalDigit + + elif (opt &&& NLO.AllowBinary) <> NLO.None && cc1 = uint32 'b' then + c <- stream.SkipAndPeek() + let mutable nc = uint32 c - uint32 '0' + if nc = (nc &&& 1u) then + n <- uint32 nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + while nc = (nc &&& 1u) do + if n <= limit2 || (maxMod2 = 0u && n = maxDiv2 && nc = 0u) then + n <- 2u*n + nc + c <- stream.SkipAndPeek() + nc <- uint32 c - uint32 '0' + else + status <- FatalError + nc <- 11u // break + else + status <- Error + error <- Errors.ExpectedBinaryDigit + // else c = 0 && not (isDigit c1) + n + +[] +let internal overflowError message = + if isNotNull message then messageError message // isNotNull prevents fsc from inlining the function + else NoErrorMessages + +let inline internal pint (opt: NumberLiteralOptions) (max: 'uint) (uint64_: 'uint -> uint64) (uint: int -> 'uint) (uint_: uint32 -> 'uint) (uint__: uint64 -> 'uint) (int: 'uint -> 'int) (int_: int -> 'int) (errorInCaseNoLiteralFound: ErrorMessageList) (outOfRangeError: ErrorMessageList) (stream: CharStream<'u>) = + // we rely on the compiler eliminating inactive branches after inlining + + let minusIsAllowed = (opt &&& NLO.AllowMinusSign) <> NLO.None + + let index = stream.IndexToken + let stateTag = stream.StateTag + let mutable c = stream.Peek() + + let mutable plusMinus1 = 1 + let mutable signPresent = false + if minusIsAllowed && c = '-' then + plusMinus1 <- -1 + signPresent <- true + c <- stream.SkipAndPeek() + elif (opt &&& NLO.AllowPlusSign) <> NLO.None && c = '+' then + signPresent <- true + c <- stream.SkipAndPeek() + + let mutable status = Ok + let mutable error = NoErrorMessages + let mutable result = Unchecked.defaultof<_> + if c >= '0' && c <= '9' then + let n = if uint64_ max <= uint64 System.UInt32.MaxValue then + uint_ (parseUInt32 c stream (&status) (&error)) + else + uint__ (parseUInt64 c stream (&status) (&error)) + let isUInt32Or64 = uint64_ max = uint64 System.UInt32.MaxValue || uint64_ max = System.UInt64.MaxValue + if status = Ok && (isUInt32Or64 || (n <= max || (minusIsAllowed && plusMinus1 = -1 && n = max + uint 1))) then + result <- if minusIsAllowed then int_ plusMinus1 * int n else int n + elif status <> Error then + status <- FatalError + stream.Seek(index) + stream.StateTag <- stateTag + error <- outOfRangeError + else + status <- Error + error <- errorInCaseNoLiteralFound + if signPresent then + stream.Seek(index) + stream.StateTag <- stateTag + Reply(status, result, error) + +let pint64 stream = pint NumberLiteralOptions.DefaultInteger (uint64 System.Int64.MaxValue) uint64 uint64 uint64 uint64 int64 int64 Errors.ExpectedInt64 Errors.NumberOutsideOfInt64Range stream +let pint32 stream = pint NumberLiteralOptions.DefaultInteger (uint32 System.Int32.MaxValue) uint64 uint32 uint32 uint32 int32 int32 Errors.ExpectedInt32 Errors.NumberOutsideOfInt32Range stream + // fsc's optimizer seems to have problems with literals of small int types +let pint16 stream = pint NumberLiteralOptions.DefaultInteger ((*uint32 System.Int16.MaxValue*)0x7fffu) uint64 uint32 uint32 uint32 int16 int16 Errors.ExpectedInt16 Errors.NumberOutsideOfInt16Range stream +let pint8 stream = pint NumberLiteralOptions.DefaultInteger ((*uint32 System.SByte.MaxValue*)0x7fu) uint64 uint32 uint32 uint32 sbyte sbyte Errors.ExpectedInt8 Errors.NumberOutsideOfInt8Range stream + +let puint64 stream = pint NumberLiteralOptions.DefaultUnsignedInteger System.UInt64.MaxValue uint64 uint64 uint64 uint64 uint64 uint64 Errors.ExpectedUInt64 Errors.NumberOutsideOfUInt64Range stream +let puint32 stream = pint NumberLiteralOptions.DefaultUnsignedInteger System.UInt32.MaxValue uint64 uint32 uint32 uint32 uint32 uint32 Errors.ExpectedUInt32 Errors.NumberOutsideOfUInt32Range stream +let puint16 stream = pint NumberLiteralOptions.DefaultUnsignedInteger 0xffffu uint64 uint32 uint32 uint32 uint16 uint16 Errors.ExpectedUInt16 Errors.NumberOutsideOfUInt16Range stream +let puint8 stream = pint NumberLiteralOptions.DefaultUnsignedInteger 0xffu uint64 uint32 uint32 uint32 byte byte Errors.ExpectedUInt8 Errors.NumberOutsideOfUInt8Range stream + + + +// ------------------- +// Conditional parsing +// ------------------- + +let notFollowedByEof : Parser = + fun stream -> + if not (stream.IsEndOfStream) then Reply(()) + else Reply(Error, Errors.UnexpectedEndOfInput) + +let followedByNewline : Parser = + fun stream -> + match stream.Peek() with + |'\r' | '\n' -> Reply(()) + | _ -> Reply(Error, Errors.ExpectedNewline) + +let notFollowedByNewline : Parser = + fun stream -> + match stream.Peek() with + |'\r' | '\n' -> Reply(Error, Errors.UnexpectedNewline) + | _ -> Reply(()) + +let followedByString (str: string) : Parser = + checkStringContainsNoNewlineOrEOSChar str "followedByString" + let error = expectedString str + if str.Length = 1 then + let chr = str.[0] + fun stream -> + if stream.Match(chr) then Reply(()) + else Reply(Error, error) + else + fun stream -> + if stream.Match(str) then Reply(()) + else Reply(Error, error) + +let followedByStringCI str : Parser = + checkStringContainsNoNewlineOrEOSChar str "followedByStringCI" + let error = expectedStringCI str + if str.Length = 1 then + let cfChr = Text.FoldCase(str.[0]) + fun stream -> + if stream.MatchCaseFolded(cfChr) then Reply(()) + else Reply(Error, error) + else + let cfStr = foldCase str + fun stream -> + if stream.MatchCaseFolded(cfStr) then Reply(()) + else Reply(Error, error) + +let notFollowedByString str : Parser = + checkStringContainsNoNewlineOrEOSChar str "notFollowedByString" + let error = unexpectedString str + if str.Length = 1 then + let chr = str.[0] + fun stream -> + if not (stream.Match(chr)) then Reply(()) + else Reply(Error, error) + else + fun stream -> + if not (stream.Match(str)) then Reply(()) + else Reply(Error, error) + +let notFollowedByStringCI str : Parser = + checkStringContainsNoNewlineOrEOSChar str "notFollowedByStringCI" + let error = unexpectedStringCI str + if str.Length = 1 then + let cfChr = Text.FoldCase(str.[0]) + fun stream -> + if not (stream.MatchCaseFolded(cfChr)) then Reply(()) + else Reply(Error, error) + else + let cfStr = foldCase str + fun stream -> + if not (stream.MatchCaseFolded(cfStr)) then Reply(()) + else Reply(Error, error) + + +let inline private charDoesSatisfy f c = + match c with + | EOS -> Error + | _ -> if f (if c <> '\r' then c else '\n') then Ok else Error + +let inline private charDoesSatisfyNot f c = + match c with + | EOS -> Ok + | _ -> if not (f (if c <> '\r' then c else '\n')) then Ok else Error + +let previousCharSatisfies f : Parser = + fun stream -> + let status = charDoesSatisfy f (stream.Peek(-1)) + Reply(status, (), NoErrorMessages) + +let previousCharSatisfiesNot f : Parser = + fun stream -> + let status = charDoesSatisfyNot f (stream.Peek(-1)) + Reply(status, (), NoErrorMessages) + +let nextCharSatisfies f : Parser = + fun stream -> + let status = charDoesSatisfy f (stream.Peek()) + Reply(status, (), NoErrorMessages) + +let nextCharSatisfiesNot f : Parser = + fun stream -> + let status = charDoesSatisfyNot f (stream.Peek()) + Reply(status, (), NoErrorMessages) + +let next2CharsSatisfy f : Parser = + let optF = OptimizedClosures.FSharpFunc.Adapt(f) + fun stream -> + let cs = stream.Peek2() + let status = match cs.Char0, cs.Char1 with + | _, EOS + | EOS, _ -> Error + | '\r', '\n' -> + match stream.Peek(2u) with + | EOS -> Error + | c1 -> if optF.Invoke('\n', if c1 <> '\r' then c1 else '\n') + then Ok else Error + | c0, c1 -> + if optF.Invoke((if c0 <> '\r' then c0 else '\n'), + (if c1 <> '\r' then c1 else '\n')) + then Ok else Error + Reply(status, (), NoErrorMessages) + +let next2CharsSatisfyNot f : Parser = + let optF = OptimizedClosures.FSharpFunc.Adapt(f) + fun stream -> + let cs = stream.Peek2() + let status = match cs.Char0, cs.Char1 with + | _, EOS + | EOS, _ -> Ok + | '\r', '\n' -> + match stream.Peek(2u) with + | EOS -> Ok + | c1 -> if not (optF.Invoke('\n', if c1 <> '\r' then c1 else '\n')) + then Ok else Error + | c0, c1 -> + if not (optF.Invoke((if c0 <> '\r' then c0 else '\n'), + (if c1 <> '\r' then c1 else '\n'))) + then Ok else Error + Reply(status, (), NoErrorMessages) diff --git a/src/FParsec/CharParsers.fsi b/src/FParsec/CharParsers.fsi new file mode 100644 index 0000000..170e4af --- /dev/null +++ b/src/FParsec/CharParsers.fsi @@ -0,0 +1,776 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.CharParsers + +open System.Text.RegularExpressions + +open Error +open Primitives + +// ======================== +// Running parsers on input +// ======================== + +/// Values of this type are returned by the runParser functions (not by `Parser<_,_>` functions). +type ParserResult<'Result,'UserState> = + /// Success(result, userState, endPos) holds the result and the user state returned by a successful parser, + /// together with the position where the parser stopped. + | Success of 'Result * 'UserState * Position + /// Failure(errorAsString, error, suserState) holds the parser error and the user state returned by a failing parser, + /// together with a string representation of the parser error. + | Failure of string * ParserError * 'UserState + +/// `runParserOnString p ustate streamName str` runs the parser `p` directly on the content of the string `str`, +/// starting with the initial user state `ustate`. The `streamName` is used in error messages to describe +/// the source of the input (e.g. a file path) and may be empty. +/// The parser's `Reply` is captured and returned as a `ParserResult` value. +val runParserOnString: Parser<'a,'u> -> 'u -> streamName: string -> string -> ParserResult<'a,'u> + +/// `runParserOnSubstring p ustate streamName str index count` runs the parser `p` directly on the content +/// of the string `str` between the indices `index` (inclusive) and `index + count` (exclusive), +/// starting with the initial user state `ustate`. The `streamName` is used in error messages to describe +/// the source of the input (e.g. a file path) and may be empty. +/// The parser's `Reply` is captured and returned as a `ParserResult` value. +val runParserOnSubstring: Parser<'a,'u> -> 'u -> streamName: string -> string -> int -> int -> ParserResult<'a,'u> + +/// `runParserOnStream p ustate streamName stream encoding` runs the parser `p` on the content of +/// the `System.IO.Stream` `stream`, starting with the initial user state `ustate`. The `streamName` +/// is used in error messages to describe the source of the input (e.g. a file path) and may be empty. +/// In case no unicode byte order mark is found, the stream data is assumed to be encoded with the given `encoding`. +/// The parser's `Reply` is captured and returned as a `ParserResult` value. +val runParserOnStream: Parser<'a,'u> -> 'u -> streamName: string -> System.IO.Stream -> System.Text.Encoding -> ParserResult<'a,'u> + +#if PCL +#else +/// `runParserOnFile p ustate path encoding` runs the parser `p` on the content of the file +/// at the given `path`, starting with the initial user state `ustate`. +/// In case no unicode byte order mark is found, the file data is assumed to be encoded with the given `encoding`. +/// The parser's `Reply` is captured and returned as a `ParserResult` value. +val runParserOnFile: Parser<'a,'u> -> 'u -> path: string -> System.Text.Encoding -> ParserResult<'a,'u> +#endif + +/// `run parser str` is a convenient abbreviation for `runParserOnString parser () "" str`. +val run: Parser<'Result, unit> -> string -> ParserResult<'Result,unit> + + +// ======= +// Parsers +// ======= + + +// ------------------------------------------------------------- +// Reading the input stream position and handling the user state +// ------------------------------------------------------------- + +/// The parser `getPosition` returns the current position in the input Stream. +/// `getPosition` is equivalent to `fun stream -> Reply(stream.Position)`. +val getPosition: Parser + +/// The parser `getUserState` returns the current user state. +/// `getUserState` is equivalent to `fun stream -> Reply(stream.UserState)`. +val getUserState: Parser<'u,'u> + +/// The parser `setUserState u` sets the user state to `u`. +/// `setUserState u` is equivalent to `fun stream -> stream.UserState <- u; Reply(())`. +val setUserState: 'u -> Parser + +/// `updateUserState f` is equivalent to `fun stream -> stream.UserState <- f stream.UserState; Reply(())`. +val updateUserState: ('u -> 'u) -> Parser + +/// The parser `userStateSatisfies f` succeeds if `f` returns `true` +/// when applied to the current user state, otherwise it fails. +val userStateSatisfies: ('u -> bool) -> Parser + + +// -------------------- +// Parsing single chars +// -------------------- + +/// `pchar c` parses the char `c` and returns `c`. +/// If `c = '\r'` or `c = '\n'` then `pchar c` will parse any one newline ("\n", "\r\n" or "\r") and return `c`. +val pchar: char -> Parser + +/// `skipChar c` is an optimized implementation of `pchar c |>> ignore`. +val skipChar: char -> Parser + +/// `charReturn c x` is an optimized implementation of `pchar c >>% x`. +val charReturn: char -> 'a -> Parser<'a,'u> + +/// `anyChar` parses any single char or newline ("\n", "\r\n" or "\r"). +/// Returns the parsed char, or '\n' in case a newline was parsed. +val anyChar: Parser + +/// `skipAnyChar` is an optimized implementation of `anyChar |>> ignore`. +val skipAnyChar: Parser + + +/// `satisfy f` parses any one char or newline for which the predicate function `f` returns `true`. +/// It returns the parsed char. +/// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. +/// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called +/// with '\r' and `satisfy f` will never return the result '\r'. +val satisfy: (char -> bool) -> Parser + +/// `skipSatisfy f` is an optimized implementation of `satisfy f |>> ignore`. +val skipSatisfy: (char -> bool) -> Parser + +/// `satisfy f label` is an optimized implementation of `satisfy f label`. +val satisfyL: (char -> bool) -> string -> Parser + +/// `skipSatisfyL f label` is an optimized implementation of `skipSatisfy f label`. +val skipSatisfyL: (char -> bool) -> string -> Parser + + +/// `anyOf str` parses any char contained in the string `str`. It returns the parsed char. +/// If `str` contains the char '\n', `anyOf str` parses any newline ("\n", "\r\n" or "\r") +/// and returns it as '\n'. (Note that it does not make a difference whether or not +/// `str` contains '\r'; `anyOf str` will never return '\r'.) +val anyOf: seq -> Parser + +/// `skipAnyOf str` is an optimized implementation of `anyOf str |>> ignore`. +val skipAnyOf: seq -> Parser + +/// `noneOf str` parses any char not contained in the string `str`. It returns the parsed char. +/// If `str` does not contain the char '\n', `noneOf str` parses any newline ("\n", "\r\n" or "\r") +/// and returns it as as '\n'. (Note that it does not make a difference whether or not +/// `str` contains '\r'; `noneOf str` will never return '\r'.) +val noneOf: seq -> Parser + +/// `skipNoneOf s` is an optimized implementation of `noneOf s |>> ignore`. +val skipNoneOf: seq -> Parser + + +/// Parses any char in the range 'A' - 'Z'. Returns the parsed char. +val asciiUpper: Parser + +/// Parses any char in the range 'a' - 'z'. Returns the parsed char. +val asciiLower: Parser + +/// Parses any char in the range 'a' - 'z' and 'A' - 'Z'. Returns the parsed char. +val asciiLetter: Parser + +/// Parses any UTF-16 uppercase letter char identified by `System.Char.IsUpper`. +/// Returns the parsed char. +val upper: Parser + +/// Parses any UTF-16 lowercase letter char identified by `System.Char.IsLower`. +/// Returns the parsed char. +val lower: Parser + +/// Parses any UTF-16 letter char identified by `System.Char.IsLetter`. +/// Returns the parsed char. +val letter: Parser + +/// Parses any char in the range '0' - '9'. Returns the parsed char. +val digit: Parser + +/// Parses any char in the range '0' - '9', 'a' - 'f' and 'A' - 'F'. Returns the parsed char. +val hex: Parser + +/// Parses any char in the range '0' - '7'. Returns the parsed char. +val octal: Parser + +// predicate functions corresponding to the above parsers + +/// `isAnyOf str` returns a predicate function. +/// When this predicate function is applied to a char, it returns `true` if and only if the char is contained in `str`. +val isAnyOf: seq -> (char -> bool) +/// `isNoneOf str` returns a predicate function. +/// When this predicate function is applied to a char, it returns `true` if and only if the char is not contained in `str`. +val isNoneOf: seq -> (char -> bool) +/// Returns `true` for any char in the range 'A' - 'Z' and `false` for all other chars. +val inline isAsciiUpper: char -> bool +/// Returns `true` for any char in the range 'a' - 'z' and `false` for all other chars. +val inline isAsciiLower: char -> bool +/// Returns `true` for any char in the range 'a' - 'z', 'A' - 'Z' and `false` for all other chars. +val inline isAsciiLetter: char -> bool +/// `isUpper` is equivalent to `System.Char.IsUpper`. +val inline isUpper: char -> bool +/// `isLower` is equivalent to `System.Char.IsLower`. +val inline isLower: char -> bool +/// `isLetter` is equivalent to `System.Char.IsLetter`. +val inline isLetter: char -> bool +/// Returns `true` for any char in the range '0' - '9' and `false` for all other chars. +val inline isDigit: char -> bool +/// Returns `true` for any char in the range '0' - '9', 'a' - 'f', 'A' - 'F' and `false` for all other chars. +val inline isHex: char -> bool +/// Returns `true` for any char in the range '0' - '7' and `false` for all other chars. +val inline isOctal: char -> bool + + +// ------------------ +// Parsing whitespace +// ------------------ + +/// Parses the tab char '\t' and returns '\t'. Note that a tab char is treated like any other non-newline char: +/// the column number is incremented by (only) 1. +val tab: Parser + +/// Parses a newline ("\n", "\r\n" or "\r"). Returns '\n'. +/// Is equivalent to `pchar '\n'`. +val newline<'u> : Parser + +/// `skipNewline` is an optimized implementation of `newline |>> ignore`. +val skipNewline<'u> : Parser + +/// `newlineReturn x` is an optimized implementation of `newline >>% x`. +val newlineReturn: 'a -> Parser<'a,'u> + +/// Parses a unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028", or "\u2029"). +/// Returns '\n'. Note that this parser does not accept the formfeed char '\f' as a newline. +/// In contrast to most other parsers in FParsec this parser also increments +/// the internal line count for unicode newline characters other than '\n' and '\r'. +val unicodeNewline<'u> : Parser + +/// `skipNewline` is an optimized implementation of `unicodeNewline |>> ignore`. +val skipUnicodeNewline<'u> : Parser + +/// `newlineReturn x` is an optimized implementation of `unicodeNewline >>% x`. +val unicodeNewlineReturn: 'a -> Parser<'a,'u> + +/// Skips over any sequence of *zero* or more whitespaces (space (' '), tab ('\t') +/// or newline ("\n", "\r\n" or "\r")). +val spaces: Parser + +/// Skips over any sequence of *one* or more whitespaces (space (' '), tab('\t') +/// or newline ("\n", "\r\n" or "\r")). +val spaces1: Parser + +/// Skips over any sequence of *zero* or more unicode whitespaces and +/// registers any unicode newline ("\n", "\r\n", "\r", "\u0085, "\u000C", +/// "\u2028"or "\u2029") as a newline. +val unicodeSpaces: Parser + +/// Skips over any sequence of *one* or more unicode whitespaces and +/// registers any unicode newline ("\n", "\r\n", "\r", "\u0085, "\u000C", +/// "\u2028"or "\u2029") as a newline. +val unicodeSpaces1: Parser + +/// The parser `eof` only succeeds at the end of the input. It never consumes input. +val eof: Parser + + +// ------------------------ +// Parsing strings directly +// ------------------------ + +/// `pstring str` parses the string `str` and returns `str`. +/// It is an atomic parser: either it succeeds or it fails without consuming any input. +/// `str` may not contain newline chars ('\n' or '\r'). +val pstring: string -> Parser +/// `skipString str` is an optimized implementation of `pstring str |>> ignore`. +val skipString: string -> Parser +/// `stringReturn str x` is an optimized implementation of `pstring str >>% x`. +val stringReturn: string -> 'a -> Parser<'a,'u> + +/// `pstringCI str` parses any string that case-insensitively matches the string `str`. +/// It returns the *parsed* string. +/// `str` may not contain newline chars ('\n' or '\r'). +val pstringCI: string -> Parser +/// `skipStringCI str` is an optimized implementation of `pstringCI str |>> ignore`. +val skipStringCI: string -> Parser +/// `stringCIReturn str x` is an optimized implementation of `pstringCI str >>% x`. +val stringCIReturn: string -> 'a -> Parser<'a,'u> + +/// `anyString n` parses any sequence of `n` chars or newlines ("\n", "\r\n" or "\r"). +/// It returns the parsed string. In the returned string all newlines are normalized to "\n". +/// `anyString n` is an atomic parser: either it succeeds or it fails without consuming any input. +val anyString: int32 -> Parser +/// `skipAnyString n` is an optimized implementation of `anyString n |>> ignore`. +val skipAnyString: int32 -> Parser + +/// `restOfLine skipNewline` parses any chars before the end of the line +/// and, if `skipNewline` is `true`, skips to the beginning of the next line (if there is one). +/// It returns the parsed chars before the end of the line as a string (without a newline). +/// A line is terminated by a newline ("\n", "\r\n" or "\r") or the end of the input stream. +val restOfLine: bool -> Parser + +/// `skipRestOfLine skipNewline` is an optimized implementation of `restOfLine skipNewline |>> ignore`. +val skipRestOfLine: bool -> Parser + +/// `charsTillString str skipString maxCount` parses all chars before the first occurance of the string `str` and, +/// if `skipString` is `true`, skips over `str`. It returns the parsed chars before the string. +/// If more than `maxCount` chars come before the first occurance of `str`, the parser *fails after consuming* `maxCount` chars. +/// Newlines ("\n", "\r\n" or "\r") are counted as single chars and +/// in the returned string all newlines are normalized to "\n". +/// `charsTillString str maxCount` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. +val charsTillString: string -> skipString: bool -> maxCount: int -> Parser +/// `skipCharsTillString str maxCount` is an optimized implementation of `charsTillString str maxCount |>> ignore`. +val skipCharsTillString: string -> skipString: bool -> maxCount: int -> Parser + +/// `charsTillStringCI str skipString maxCount` parses all chars before the first case-insensitive occurance of the string `str` and, +/// if `skipString` is `true`, skips over it. It returns the parsed chars before the string. +/// If more than `maxCount` chars come before the first case-insensitive occurance of `str`, +/// the parser *fails* after consuming `maxCount` chars. +/// Newlines ("\n", "\r\n" or "\r") are counted as single chars and +/// in the returned string all newlines are normalized to "\n". +/// `charsTillStringCI str maxCount` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. +val charsTillStringCI: string -> skipString: bool -> maxCount: int -> Parser +/// `skipCharsTillStringCI str maxCount` is an optimized implementation of `charsTillStringCI str maxCount |>> ignore`. +val skipCharsTillStringCI: string -> skipString: bool -> maxCount: int -> Parser + +/// `manySatisfy f` parses a sequence of *zero* or more chars that satisfy the predicate function `f` +/// (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. +/// +/// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. +/// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called +/// with '\r' and the string returned by `manySatisfy f` will never contain an '\r'. +val manySatisfy: (char -> bool) -> Parser +/// `manySatisfy2 f1 f` behaves like `manySatisfy f`, except that the +/// first char of the parsed string must satisfy `f1` instead of `f`. +val manySatisfy2: (char -> bool) -> (char -> bool) -> Parser +/// `skipManySatisfy f` is an optimized implementation of `manySatisfy f |>> ignore`. +val skipManySatisfy: (char -> bool) -> Parser +/// `skipManySatisfy2 f1 f` is an optimized implementation of `manySatisfy2 f1 f |>> ignore`. +val skipManySatisfy2: (char -> bool) -> (char -> bool) -> Parser + +/// `many1Satisfy f` parses a sequence of *one* or more chars that satisfy the predicate function `f` +/// (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. +/// If the first char does not satisfy `f`, this parser fails without consuming input. +/// +/// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. +/// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called +/// with '\r' and the string returned by `many1Satisfy f` will never contain an '\r'. +val many1Satisfy: (char -> bool) -> Parser +/// `many1Satisfy2 f1 f` behaves like `many1Satisfy f`, except that the +/// first char of the parsed string must satisfy `f1` instead of `f`. +val many1Satisfy2: (char -> bool) -> (char -> bool) -> Parser +/// `skipMany1Satisfy f` is an optimized implementation of `many1Satisfy f |>> ignore`. +val skipMany1Satisfy: (char -> bool) -> Parser +/// `skipMany1Satisfy2 f1 f` is an optimized implementation of `many1Satisfy2 f1 f |>> ignore`. +val skipMany1Satisfy2: (char -> bool) -> (char -> bool) -> Parser + +/// `many1SatisfyL f label` is an optimized implementation of `many1Satisfy f label`. +val many1SatisfyL: (char -> bool) -> string -> Parser +/// `many1Satisfy2L f1 f label` is an optimized implementation of `many1Satisfy2 f1 f label`. +val many1Satisfy2L: (char -> bool) -> (char -> bool) -> string -> Parser +/// `skipMany1SatisfyL f label` is an optimized implementation of `skipMany1Satisfy f label`. +val skipMany1SatisfyL: (char -> bool) -> string -> Parser +/// `skipMany1Satisfy2L f1 f label` is an optimized implementation of `skipMany1Satisfy2 f1 f label`. +val skipMany1Satisfy2L: (char -> bool) -> (char -> bool) -> string -> Parser + +/// `manyMinMaxSatisfy minCount maxCount f` parses a sequence of `minCount` or more chars that satisfy the +/// predicate function `f` (i.e. chars for which `f` returns `true`), but not more than `maxCount` chars. +/// It returns the parsed chars as a string. This parser is atomic, i.e. if the first `minCount` chars +/// do not all satisfy `f`, the parser fails without consuming any input. +/// +/// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. +/// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with '\r' +/// and the string returned by `manyMinMaxSatisfy minCount maxCount f` will never contain an '\r'. +/// +/// `manyMinMaxSatisfy` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. +val manyMinMaxSatisfy: int -> int -> (char -> bool) -> Parser +/// `manyMinMaxSatisfy2 minCount maxCount f1 f` behaves like `manyMinMaxSatisfy minCount maxCount f`, except that the first char of the parsed string must satisfy `f1` instead of `f`. +val manyMinMaxSatisfy2: int -> int -> (char -> bool) -> (char -> bool) -> Parser +/// `skipManyMinMaxSatisfy minCount maxCount f` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f |>> ignore`. +val skipManyMinMaxSatisfy: int -> int -> (char -> bool) -> Parser +/// `skipManyMinMaxSatisfy2 minCount maxCount f1 f` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f |>> ignore`. +val skipManyMinMaxSatisfy2: int -> int -> (char -> bool) -> (char -> bool) -> Parser + +/// `manyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f label`. +val manyMinMaxSatisfyL: int -> int -> (char -> bool) -> string -> Parser +/// `manyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f label`. +val manyMinMaxSatisfy2L: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser +/// `skipManyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `skipManyMinMaxSatisfy minCount maxCount f label`. +val skipManyMinMaxSatisfyL: int -> int -> (char -> bool) -> string -> Parser +/// `skipManyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `skipManyMinMaxSatisfy2 minCount maxCount f1 f label`. +val skipManyMinMaxSatisfy2L: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser + +/// `regex pattern` matches the .NET regular expression given by the string `pattern` on the chars +/// beginning at the current index in the input stream. It returns the string matched by the regular expression. +/// If the regular expression does not match, the parser fails without consuming input. +/// +/// The `System.Text.RegularExpressions.Regex` object that is internally used to match the pattern is constructed +/// with the `RegexOptions` `MultiLine` and `ExplicitCapture`. In order to ensure that the regular expression +/// can only match at the beginning of a string, "\\A" is automatically prepended to the pattern. +/// +/// Newline chars ('\r' and '\n') in the pattern are interpreted literally. +/// For example, an '\n' char in the pattern will only match "\n", not "\r" or "\r\n". +/// However, in the returned string all newlines ("\n", "\r\n" or "\r") are normalized to "\n". +/// +/// For large files the regular expression is *not* applied to a string containing *all* the remaining chars +/// in the stream. The number of chars that are guaranteed to be visible to the regular expression is specified +/// during construction of the `CharStream`. If one of the `runParser` function` is used to run the parser, +/// this number is 43690. +val regex: string -> Parser + +/// `regexL pattern label` is an optimized implementation of `regex pattern label`. +val regexL: string -> string -> Parser + +type IdentifierOptions = + new: ?isAsciiIdStart: (char -> bool) * + ?isAsciiIdContinue: (char -> bool) * + #if PCL + #else + ?normalization: System.Text.NormalizationForm * + ?normalizeBeforeValidation: bool * + #endif + ?allowJoinControlChars: bool * + ?preCheckStart: (char -> bool) * + ?preCheckContinue: (char -> bool) * + ?allowAllNonAsciiCharsInPreCheck: bool * + ?label: string * + ?invalidCharMessage: string -> IdentifierOptions + +/// The `identifier` parser is a configurable parser for the XID identifier syntax +/// specified in Unicode Standard Annex #31. +val identifier: IdentifierOptions -> Parser + +// ---------------------------------------------- +// Parsing strings with the help of other parsers +// ---------------------------------------------- + +/// `manyChars cp` parses a sequence of *zero* or more chars with the char parser `cp`. +/// It returns the parsed chars as a string. +/// +/// `manyChars cp` is an optimized implementation of `many (attempt cp)` that returns +/// the chars as a string instead of a char list. The equivalence to `many (attempt p)` +/// instead of `many p` implies that `manyChars` never fails. +val manyChars: Parser -> Parser +/// `manyChars2 cp1 cp` behaves like `manyChars2 cp`, except that it parses the first char with `cp1` instead of `cp`. +val manyChars2: Parser -> Parser -> Parser + +/// `many1Chars cp` parses a sequence of *one* or more chars with the char parser `cp`. +/// It returns the parsed chars as a string. +/// +/// `many1Chars cp` is an optimized implementation of `many1 (attempt cp)` that returns +/// the chars as a string instead of a char list. The equivalence to `many1 (attempt p)` +/// instead of `many1 p` implies that `many1Chars` never fails after consuming input. +val many1Chars: Parser -> Parser +/// `many1Chars2 cp1 cp` behaves like `many1Chars2 cp`, except that it parses the first char with `cp1` instead of `cp`. +val many1Chars2: Parser -> Parser -> Parser + +/// `manyCharsTill cp endp` parses chars with the char parser `cp` until the parser `endp` succeeds. +/// It stops after `endp` and returns the parsed chars as a string. +val manyCharsTill: Parser -> Parser<'b,'u> -> Parser +/// `manyCharsTill2 cp1 cp endp` behaves like `manyCharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. +val manyCharsTill2: Parser -> Parser -> Parser<'b,'u> -> Parser + +/// `manyCharsTillApply cp endp f` parses chars with the char parser `cp` until the parser `endp` succeeds. +/// It stops after `endp` and returns the result of the function application `f str b`, +/// where `str` is the parsed string and `b` is result returned by `endp`. +val manyCharsTillApply: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> +/// `manyCharsTillApply2 cp1 cp endp` behaves like `manyCharsTillApply cp endp`, except that it parses the first char with `cp1` instead of `cp`. +val manyCharsTillApply2: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> + +/// `many1CharsTill cp endp` parses one char with the char parser `cp`. +/// Then it parses more chars with `cp` until the parser `endp` succeeds. +/// It stops after `endp` and returns the parsed chars as a string. +/// +/// `many1CharsTill cp endp` is an optimized implementation of `pipe2 cp (manyCharsTill cp endp) (fun c1 str -> c1.ToString() + str)` +val many1CharsTill: Parser -> Parser<'b,'u> -> Parser +/// `many1CharsTill2 cp1 cp endp` behaves like `many1CharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. +val many1CharsTill2: Parser -> Parser -> Parser<'b,'u> -> Parser + +/// `many1CharsTillApply cp endp` parses one char with the char parser `cp`. +/// Then it parses more chars with `cp` until the parser `endp` succeeds. +/// It stops after `endp` and returns the result of the function application `f str b`, +/// where `str` is the parsed string and `b` is result returned by `endp`. +val many1CharsTillApply: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> +/// `many1CharsTillApply2 cp1 cp endp` behaves like `many1CharsTillApply cp endp`, except that it parses the first char with `cp1` instead of `cp`. +val many1CharsTillApply2: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> + +/// `manyStrings sp` parses a sequence of *zero* or more strings with the string parser `sp`. +/// It returns the strings in concatenated form. +/// `manyStrings sp` is an optimized implementation of `manyReduce (+) "" sp`. +val manyStrings: Parser -> Parser +/// `manyStrings2 sp1 sp` behaves like `manyStrings sp`, except that it parses the first string with `sp1` instead of `sp`. +val manyStrings2: Parser -> Parser -> Parser + +/// `many1Strings sp` parses a sequence of *one* or more strings with the string parser `sp`. +/// It returns the strings in concatenated form. +/// Note that `many1Strings sp` does not require the first string to be non-empty. +val many1Strings: Parser -> Parser +/// `many1Strings2 sp1 sp` behaves like `many1Strings sp`, except that it parses the first string with `sp1` instead of `sp`. +val many1Strings2: Parser -> Parser -> Parser + +/// `stringsSepBy sp sep` parses *zero* or more occurrences of `sp` separated by `sep`. +/// It returns the strings parsed by `sp` *and* `sep` in concatenated form. +val stringsSepBy: Parser -> Parser -> Parser + +/// `stringsSepBy1 sp sep` parses *one* or more occurrences of `sp` separated by `sep`. +/// It returns the strings parsed by `sp` *and* `sep` in concatenated form. +val stringsSepBy1: Parser -> Parser -> Parser + +/// `skipped p` applies the parser `p` and returns the chars skipped over by `p` as a string. +/// All newlines ("\r\n", "\r" or "\n") are normalized to "\n". +val skipped: Parser -> Parser + +/// `p |> withSkippedString f` applies the parser `p` and returns the result of `f str x`, +/// where `str` is the string skipped over by `p` and `x` is the result returned by `p`. +val withSkippedString: (string -> 'a -> 'b) -> Parser<'a,'u> -> Parser<'b,'u> + + +// --------------- +// Parsing numbers +// --------------- + +/// Encodes the various options of the `numberLiteral` parser. +[] +type NumberLiteralOptions = + | None = 0 + | AllowSuffix = 0b000000000001 + | AllowMinusSign = 0b000000000010 + | AllowPlusSign = 0b000000000100 + | AllowFraction = 0b000000001000 + | AllowFractionWOIntegerPart = 0b000000010000 + | AllowExponent = 0b000000100000 + | AllowHexadecimal = 0b000001000000 + | AllowBinary = 0b000010000000 + | AllowOctal = 0b000100000000 + | AllowInfinity = 0b001000000000 + | AllowNaN = 0b010000000000 + + | IncludeSuffixCharsInString = 0b100000000000 + + | DefaultInteger = 0b000111000110 + | DefaultUnsignedInteger = 0b000111000000 + | DefaultFloat = 0b011001101110 + +/// The return type of the `numberLiteral` parser. An instance contains the parsed +/// number literal and various bits of information about it. +/// Note that the `String` member contains the string literal without the suffix chars, +/// except if the `NumberLiteralOptions` passed to the `numberLiteral` parser have the +/// `IncludeSuffixCharsInString` flag set. +/// Any parsed suffix chars are always available through the `SuffixChar1` - `4` members. +type NumberLiteral = + new: string:string * info:NumberLiteralResultFlags + * suffixChar0: char * suffixChar1: char * suffixChar2: char * suffixChar3: char -> NumberLiteral + + /// The parsed number literal string. Only includes the parsed suffix chars if the + /// `NumberLiteralOptions` passed to the `numberLiteral` parser have the `IncludeSuffixCharsInString` flag set. + member String: string + /// Eencodes various bits of information on the string literal. + member Info: NumberLiteralResultFlags + + member SuffixLength: int + /// Returns the first suffix char, or EOS if no suffix char was parsed. + member SuffixChar1: char + /// Returns the second suffix char, or EOS if less than two suffix chars were parsed. + member SuffixChar2: char + /// Returns the third suffix char, or EOS if less than three suffix chars were parsed + member SuffixChar3: char + /// Returns the fourth suffix char, or EOS if less than four suffix chars were parsed + member SuffixChar4: char + + member HasMinusSign: bool + member HasPlusSign: bool + member HasIntegerPart: bool + member HasFraction: bool + member HasExponent: bool + member IsInteger: bool + member IsDecimal: bool + member IsHexadecimal: bool + member IsBinary: bool + member IsOctal: bool + member IsNaN: bool + member IsInfinity: bool + + override Equals: obj -> bool + override GetHashCode: unit -> int + +and /// Encodes various bits of information about a parsed number literal. + [] + NumberLiteralResultFlags = + | None = 0 + | SuffixLengthMask = 0b0000000000001111 + | HasMinusSign = 0b0000000000010000 + | HasPlusSign = 0b0000000000100000 + | HasIntegerPart = 0b0000000001000000 + | HasFraction = 0b0000000010000000 + | HasExponent = 0b0000000100000000 + | IsDecimal = 0b0000001000000000 + | IsHexadecimal = 0b0000010000000000 + | IsBinary = 0b0000100000000000 + | IsOctal = 0b0001000000000000 + | BaseMask = 0b0001111000000000 + | IsInfinity = 0b0010000000000000 + | IsNaN = 0b0100000000000000 + + +/// `numberLiteral options label` parses a number literal and returns the result in form +/// of a `NumberLiteral` value. The given `NumberLiteralOptions` argument determines the kind +/// of number literals accepted. The string `label` is used in the `Expected` error message +/// that is generated when the parser fails without consuming input. +/// +/// The parser fails without consuming input, if not at least one digit (including the 0 in the +/// format specifiers "0x" etc.) can be parsed. It fails after consuming input, if no decimal +/// digit comes after an exponent marker or no valid digit comes after a format specifier. +val numberLiteral: NumberLiteralOptions -> string + -> Parser + +/// `numberLiteralE` is an uncurried version of `numberLiteral` that can be used to +/// implement number parsers without having to construct a `numberLiteral` closure. +val numberLiteralE: NumberLiteralOptions -> errorInCaseNoLiteralFound: ErrorMessageList + -> CharStream<'u> -> Reply + +/// Parses a floating-point number in decimal or hexadecimal format. +/// The special values NaN and Inf(inity)? (case insensitive) are also recognized. +/// +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in "0x") can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after "0x", +/// after consuming input, if the value represented by the input string (after rounding) is greater than `System.Double.MaxValue` or less than `System.Double.MinValue`. +val pfloat: Parser + + +/// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.Int64.MaxValue` or less than `System.Int64.MinValue`. +val pint64: Parser + +/// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.Int32.MaxValue` or less than `System.Int32.MinValue`. +val pint32: Parser + +/// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.Int16.MaxValue` or less than `System.Int16.MinValue`. +val pint16: Parser + +/// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than 127 or less than -128. +val pint8: Parser + +/// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.UInt64.MaxValue`. +val puint64: Parser + +/// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.UInt32.MaxValue`. +val puint32: Parser + +/// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than `System.UInt16.MaxValue`. +val puint16: Parser + +/// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. +/// The parser fails +/// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, +/// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, +/// after consuming input, if the value represented by the input string is greater than 255. +val puint8: Parser + + +// ------------------- +// Conditional parsing +// ------------------- + +/// `notFollowedByEOF` is an optimized implementation of `notFollowedByL eof "end of input"`. +val notFollowedByEof: Parser + +/// `followedByNewline` is an optimized implementation of `followedByL newline "newline"`. +val followedByNewline: Parser + +/// `notFollowedByNewline` is an optimized implementation of `notFollowedByL newline "newline"`. +val notFollowedByNewline: Parser + +/// `followedByString str` is an optimized implementation of `followedByL (pstring str) ("'" + str + "'")`. +val followedByString: string -> Parser +/// `followedByStringCI str` is an optimized implementation of `followedByL (pstringCI str) ("'" + str + "'")`. +val followedByStringCI: string -> Parser +/// `notFollowedByString str` is an optimized implementation of `notFollowedByL (pstring str) ("'" + str + "'")`. +val notFollowedByString: string -> Parser +/// `notFollowedByStringCI str` is an optimized implementation of `notFollowedByL (pstringCI str) ("'" + str + "'")`. +val notFollowedByStringCI: string -> Parser + +/// `nextCharSatisfies f` is an optimized implementation of `followedBy (satisfy f)`. +val nextCharSatisfies: (char -> bool) -> Parser + +/// `nextCharSatisfiesNot f` is an optimized implementation of `notFollowedBy (satisfy f)`. +val nextCharSatisfiesNot: (char -> bool) -> Parser + +/// `next2CharsSatisfy f` succeeds if the predicate function `f` returns `true` +/// when applied to the next 2 chars in the input stream, otherwise it fails. +/// If there aren't 2 chars remaining in the input stream, this parser fails (as opposed to `next2CharsSatisfyNot`). +/// This parser never changes the parser state. +/// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. +/// If this parser fails, it returns no descriptive error message; hence it should only be +/// used together with parsers that take care of a potential error. +val next2CharsSatisfy: (char -> char -> bool) -> Parser + +/// `next2CharsSatisfyNot f` succeeds if the predicate function `f` returns `false` +/// when applied to the next 2 chars in the input stream, otherwise it fails. +/// If there aren't 2 chars remaining in the input stream, this parser succeeds (as opposed to `next2CharsSatisfy`). +/// This parser never changes the parser state. +/// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. +/// If this parser fails, it returns no descriptive error message; hence it should only be +/// used together with parsers that take care of a potential error. +val next2CharsSatisfyNot: (char -> char -> bool) -> Parser + +/// `previousCharSatisfies f` succeeds if the predicate function `f` returns `true` +/// when applied to the previous char in the stream, otherwise it fails. +/// If there is no previous char (because the stream is at the beginning), +/// this parser fails (as opposed to `previousCharSatisfiesNot`). +/// This parser never changes the parser state. +/// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. +/// If this parser fails, it returns no descriptive error message; hence it should only be +/// used together with parsers that take care of a potential error. +val previousCharSatisfies: (char -> bool) -> Parser + +/// `previousCharSatisfies f` succeeds if the predicate function `f` returns `false` +/// when applied to the previous char in the stream, otherwise it fails. +/// If there is no previous char (because the stream is at the beginning), +/// this parser succeeds (as opposed to `previousCharSatisfies`). +/// This parser never changes the parser state. +/// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. +/// If this parser fails, it returns no descriptive error message; hence it should only be +/// used together with parsers that take care of a potential error. +val previousCharSatisfiesNot: (char -> bool) -> Parser + + + +// ================ +// Helper functions +// ================ + +/// `EOS` is equal to `CharStream<'u>.EndOfStreamChar`. +[] +val EOS: char = '\uffff';; + +/// `foldCase str` returns a case-folded version of `str` +/// with all chars mappend using the (non-Turkic) Unicode 1-to-1 case folding mappings +/// for chars below 0x10000. If the argument is `null`, `null` is returned. +val foldCase: string -> string + +/// `normalizeNewlines str` returns a version of `str` +/// with all occurances of "\r\n" and "\r" replaced by "\n". +/// If the argument is `null`, `null` is returned. +val normalizeNewlines: string -> string + +/// Returns a hexadecimal string representation of the `float` argument. +val floatToHexString: float -> string + +/// Returns the `float` value represented by the given string in hexadecimal format. +/// Raises a `System.FormatException` in case the string representation is invalid. +/// Raises a `System.OverflowException` if the (absolute) value is too large to be represented by a `float`. +val floatOfHexString: string -> float + +/// Returns a hexadecimal string representation of the `float32` argument. +val float32ToHexString: float32 -> string + +/// Returns the `float32` value represented by the given string in hexadecimal format. +/// Raises a `System.FormatException` in case the string representation is invalid. +/// Raises a `System.OverflowException` if the (absolute) value is too large to be represented by a `float32`. +val float32OfHexString: string -> float32 diff --git a/src/FParsec/Emit.fs b/src/FParsec/Emit.fs new file mode 100644 index 0000000..e7ff4f7 --- /dev/null +++ b/src/FParsec/Emit.fs @@ -0,0 +1,575 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +module internal FParsec.Emit + +#if LOW_TRUST +#else + +open System.Diagnostics +open System.Reflection +open System.Reflection.Emit +open System.Collections.Generic + +open Microsoft.FSharp.NativeInterop + +open FParsec.Internals +open FParsec.Range + +#nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." + +let mutable private assemblyBuilder = null +let mutable private moduleBuilder = null +let private createTypeBuilderSyncRoot = new obj() + +let createTypeBuilder name args parent (interfaces : System.Type[]) = + lock createTypeBuilderSyncRoot (fun _ -> + if isNull moduleBuilder then + let assemblyName = new AssemblyName("FParsec.Emitted") + let access = + #if DEBUG + AssemblyBuilderAccess.RunAndSave + #else + AssemblyBuilderAccess.Run + #endif + assemblyBuilder <- System.Threading.Thread.GetDomain().DefineDynamicAssembly(assemblyName, access) + moduleBuilder <- assemblyBuilder.DefineDynamicModule("FParsec.Emitted" + #if DEBUG + , "FParsec.Emitted.dll" + #else + #endif + ) + moduleBuilder.DefineType("FParsec.Emitted." + name, args, parent, interfaces) + ) + +#if DEBUG +let saveEmitAssembly fileName = assemblyBuilder.Save(fileName) +#endif + +// Does anyone have an idea why the .NET System.Reflection.Emit.OpCode +// is implemented as a gigantic struct? (It has a size of ~36 bytes!) + + +let loadI4 (ilg: ILGenerator) (i: int32) = + // For run-time-only code generation it probably makes little difference + // whether we optimize the size of the IL, but we do it anyway. + match i with + | -1 -> ilg.Emit(OpCodes.Ldc_I4_M1) + | 0 -> ilg.Emit(OpCodes.Ldc_I4_0) + | 1 -> ilg.Emit(OpCodes.Ldc_I4_1) + | 2 -> ilg.Emit(OpCodes.Ldc_I4_2) + | 3 -> ilg.Emit(OpCodes.Ldc_I4_3) + | 4 -> ilg.Emit(OpCodes.Ldc_I4_4) + | 5 -> ilg.Emit(OpCodes.Ldc_I4_5) + | 6 -> ilg.Emit(OpCodes.Ldc_I4_6) + | 7 -> ilg.Emit(OpCodes.Ldc_I4_7) + | 8 -> ilg.Emit(OpCodes.Ldc_I4_8) + | _ -> + let i1 = int8 i + if i <> int32 i1 then ilg.Emit(OpCodes.Ldc_I4, i) + else ilg.Emit(OpCodes.Ldc_I4_S, i1) + +let loadI8 (ilg: ILGenerator) (i: int64) = + let i4 = int32 i + if i <> int64 i4 then ilg.Emit(OpCodes.Ldc_I8, i) + else + loadI4 ilg i4 + ilg.Emit(OpCodes.Conv_I8) + +let loadU8 (ilg: ILGenerator) (i: uint64) = + if i > uint64 System.UInt32.MaxValue then ilg.Emit(OpCodes.Ldc_I8, int64 i) + else + loadI4 ilg (int32 i) + ilg.Emit(OpCodes.Conv_U8) + +let loadI (ilg: ILGenerator) (i: nativeint) = + if sizeof = 4 then + ilg.Emit(OpCodes.Ldc_I4, int32 i) + else + ilg.Emit(OpCodes.Ldc_I8, int64 i) + ilg.Emit(OpCodes.Conv_I) + +let loadU (ilg: ILGenerator) (i: unativeint) = + if sizeof = 4 then + ilg.Emit(OpCodes.Ldc_I4, int32 i) + else + ilg.Emit(OpCodes.Ldc_I8, int64 i) + ilg.Emit(OpCodes.Conv_U) + +let private createLoaderForPrimitiveConstantsImpl (ty: System.Type) (ilg: ILGenerator) : ('T -> unit) = + let ty = if ty.IsEnum then System.Enum.GetUnderlyingType(ty) else ty + + if ty = typeof< int32> then fun x -> loadI4 ilg (box x :?> int32) + elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint32)) + elif ty = typeof then fun x -> loadI8 ilg (box x :?> int64) + elif ty = typeof then fun x -> loadU8 ilg (box x :?> uint64) + elif ty = typeof< int16> then fun x -> loadI4 ilg (int32 (box x :?> int16)) + elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint16)) + elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> char)) + elif ty = typeof< int8> then fun x -> loadI4 ilg (int32 (box x :?> int8)) + elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint8)) + elif ty = typeof then + fun x -> ilg.Emit(if box x :?> bool then OpCodes.Ldc_I4_1 else OpCodes.Ldc_I4_0) + elif ty = typeof then fun x -> ilg.Emit(OpCodes.Ldc_R8, (box x :?> double)) + elif ty = typeof then fun x -> ilg.Emit(OpCodes.Ldc_R4, (box x :?> float32)) + elif ty = typeof then fun x -> loadI ilg (box x :?> nativeint) + elif ty = typeof then fun x -> loadU ilg (box x :?> unativeint) + else invalidArg "ty" "Invalid type argument." + +let createLoaderForPrimitiveConstants<'T> ilg : ('T -> unit) = + createLoaderForPrimitiveConstantsImpl typeof<'T> ilg + +let createLoaderForBoxedPrimitiveConstants (ty: System.Type) ilg : (obj -> unit) = + createLoaderForPrimitiveConstantsImpl ty ilg + + +let emitRangeCheck branchIfInRange (ilg: ILGenerator) (label: Label) minValue maxValue (range: Range) = + Debug.Assert(minValue <= range.Min && range.Max <= maxValue) + if minValue = range.Min && range.Max = maxValue then + ilg.Emit(OpCodes.Pop) + if branchIfInRange then + ilg.Emit(OpCodes.Br, label) + elif range.Min = range.Max then + loadI4 ilg range.Min + if branchIfInRange then + ilg.Emit(OpCodes.Beq, label) + else + ilg.Emit(OpCodes.Bne_Un, label) + elif minValue = range.Min then + // we only have to check the right bound + loadI4 ilg range.Max + if branchIfInRange then + ilg.Emit(OpCodes.Ble, label) + else + ilg.Emit(OpCodes.Bgt, label) + elif range.Max = maxValue then + // we only have to check the left bound + loadI4 ilg range.Min + if branchIfInRange then + ilg.Emit(OpCodes.Bge, label) + else + ilg.Emit(OpCodes.Blt, label) + else + // we have to check both bounds + if range.Min <> 0 then + loadI4 ilg range.Min + ilg.Emit(OpCodes.Sub) + loadI4 ilg (range.Max - range.Min) + if branchIfInRange then + ilg.Emit(OpCodes.Ble_Un, label) // unsigned comparison + else + ilg.Emit(OpCodes.Bgt_Un, label) // unsigned comparison + +let emitBranchIfOutOfRange ilg label minValue maxValue range = + emitRangeCheck false ilg label minValue maxValue range + +let emitBranchIfInRange ilg label minValue maxValue range = + emitRangeCheck true ilg label minValue maxValue range + +let emitRangeTest pushFalseIfInRange (ilg: ILGenerator) minValue maxValue (range: Range) = + Debug.Assert(minValue <= range.Min && range.Max <= maxValue) + + let emitNot() = + ilg.Emit(OpCodes.Ldc_I4_0) + ilg.Emit(OpCodes.Ceq) + + if minValue = range.Min && range.Max = maxValue then + ilg.Emit(OpCodes.Pop) + if pushFalseIfInRange then + ilg.Emit(OpCodes.Ldc_I4_0) + else + ilg.Emit(OpCodes.Ldc_I4_1) + elif range.Min = range.Max then + loadI4 ilg range.Min + ilg.Emit(OpCodes.Ceq) + if pushFalseIfInRange then emitNot() + elif minValue = range.Min then + // we only have to check the right bound + loadI4 ilg range.Max + ilg.Emit(OpCodes.Cgt) + if not pushFalseIfInRange then emitNot() + elif range.Max = maxValue then + // we only have to check the left bound + loadI4 ilg range.Min + ilg.Emit(OpCodes.Clt) + if not pushFalseIfInRange then emitNot() + else + // we have to check both bounds + if range.Min <> 0 then + loadI4 ilg range.Min + ilg.Emit(OpCodes.Sub) + loadI4 ilg (range.Max - range.Min) + ilg.Emit(OpCodes.Cgt_Un) // unsigned comparison + if not pushFalseIfInRange then emitNot() + +let emitTwoRangeTest (ilg: ILGenerator) (loadVar: ILGenerator -> unit) inverse minValue maxValue (range1: Range) (range2: Range) = + assert (range1.Max < range2.Min && range1.Max + 1 < range2.Min) + let needOuterRangeCheck = minValue < range1.Min || range2.Max < maxValue + let w = sizeof*8 + if needOuterRangeCheck && (maxValue - minValue < w) then + // use a simple bit vector test: + // (bits >> (var - off)) & 1 + let off = if minValue > 0 && maxValue < w then 0 else minValue + let mutable bits = if inverse then unativeint -1n else 0un + for r in [range1; range2] do + for i in r.Min .. r.Max do + let b = i - off + if inverse then + bits <- bits ^^^ (1un <<< b) + else + bits <- bits ||| (1un <<< b) + loadU ilg bits + loadVar ilg + if off <> 0 then + loadI4 ilg off + ilg.Emit(OpCodes.Sub) + ilg.Emit(OpCodes.Shr_Un) + ilg.Emit(OpCodes.Ldc_I4_1) + ilg.Emit(OpCodes.And) + elif not needOuterRangeCheck + || (range1.Max + 2 = range2.Min && range1.Min <> range1.Max && range2.Min <> range2.Max) + then + if needOuterRangeCheck then + loadVar ilg + emitRangeTest inverse ilg minValue maxValue (Range(range1.Min, range2.Max)) + loadVar ilg + emitRangeTest (not inverse) ilg minValue maxValue (Range(range1.Max + 1, range2.Min - 1)) + if needOuterRangeCheck then + if inverse then + ilg.Emit(OpCodes.Or) + else + ilg.Emit(OpCodes.And) + else + loadVar ilg + emitRangeTest inverse ilg minValue maxValue range1 + loadVar ilg + emitRangeTest inverse ilg minValue maxValue range2 + if inverse then + ilg.Emit(OpCodes.And) + else + ilg.Emit(OpCodes.Or) + + +type TempLocals(ilg: ILGenerator) = + let mutable intLocal = null + let mutable boolLocal = null + + /// used by emitSetMembershipTest (and indirectly by emitSwitch) + member t.GetIntLocal() = + if isNull intLocal then + intLocal <- ilg.DeclareLocal(typeof) + intLocal + + /// used by emitSwitch + member t.GetBoolLocal() = + if isNull boolLocal then + boolLocal <- ilg.DeclareLocal(typeof) + boolLocal + +/// flag used for testing purposes +let mutable noBitVectorTests = false + +let emitSetMembershipTest (ilg: ILGenerator) + (loadVar: ILGenerator -> unit) (storeResult: ILGenerator -> unit) + (temps: TempLocals) + lengthCap densityThreshold + minValue maxValue + inverse (ranges: Range[]) = + + checkRangesAreValidSortedAndUnconnected ranges + + let endLabel = ilg.DefineLabel() + let outOfRangeLabel = ilg.DefineLabel() + + let emitBitVectorTest minValue maxValue iBegin iEnd = + let first, last = ranges.[iBegin].Min, ranges.[iEnd - 1].Max + // set up bit vector in unmanaged memory + let w = sizeof*8 + // save a subtraction if it doesn't cost too much memory + let off = if first > 0 && (last < w || (first < 3*w && (last >= first + w))) then 0 + else first + + let lastMinusOff = uint32 (last - off) + if lastMinusOff > uint32 System.Int32.MaxValue then + raise (System.ArgumentException("The ranges span width is too large.")) + + let length = int (lastMinusOff/uint32 w + 1u) + if uint32 length * uint32 w > uint32 System.Int32.MaxValue then + raise (System.ArgumentException("The ranges span width is too large.")) + + let mutable stackVar = 0un + let ptr = if length = 1 then NativePtr.ofNativeInt (NativePtr.toNativeInt &&stackVar) + else NativePtr.ofNativeInt (UnmanagedMemoryPool.Allocate(length*sizeof)) + + // fill bit vector ptr.[0..length - 1] + let r = ranges.[iBegin] + let mutable rMin, rMax = r.Min - off, r.Max - off + let mutable i = iBegin + 1 + if not inverse then + for j = 0 to length - 1 do + let mutable n = 0un + let j1w = (j + 1)*w + while rMin < j1w do + n <- n ||| (1un <<< rMin%w) + if rMin < rMax then rMin <- rMin + 1 + elif i < iEnd then + let r = ranges.[i] + rMin <- r.Min - off; rMax <- r.Max - off + i <- i + 1 + else rMin <- System.Int32.MaxValue // break + NativePtr.set ptr j n + else + for j = 0 to length - 1 do + let mutable n = unativeint -1n + let j1w = (j + 1)*w + while rMin < j1w do + n <- n ^^^ (1un <<< rMin%w) + if rMin < rMax then rMin <- rMin + 1 + elif i < iEnd then + let r = ranges.[i] + rMin <- r.Min - off; rMax <- r.Max - off + i <- i + 1 + else rMin <- System.Int32.MaxValue // break + NativePtr.set ptr j n + + let intTemp = temps.GetIntLocal() + + // t = (uint32)(x - off) + loadVar ilg + if off <> 0 then + loadI4 ilg off + ilg.Emit(OpCodes.Sub) + ilg.Emit(OpCodes.Stloc, intTemp) + + // if (t > (uint32)(last - off)) goto outOfRangeLabel + if minValue < off || length*w <= maxValue - off then + ilg.Emit(OpCodes.Ldloc, intTemp) + loadI4 ilg (last - off) + ilg.Emit(OpCodes.Bgt_Un, outOfRangeLabel) + + if length = 1 then + // x = *ptr + loadU ilg stackVar + else + // x = *(ptr + t/w) + loadU ilg (unativeint (NativePtr.toNativeInt ptr)) + ilg.Emit(OpCodes.Ldloc, intTemp) + loadI4 ilg w + ilg.Emit(OpCodes.Div_Un) + loadI4 ilg sizeof + ilg.Emit(OpCodes.Mul) + ilg.Emit(OpCodes.Add) + ilg.Emit(OpCodes.Ldind_I) + + // result = (x >> t%w) & 1 + ilg.Emit(OpCodes.Ldloc, intTemp) + if length > 1 then + loadI4 ilg w + ilg.Emit(OpCodes.Rem_Un) + ilg.Emit(OpCodes.Shr_Un) + ilg.Emit(OpCodes.Ldc_I4_1) + ilg.Emit(OpCodes.And) + storeResult ilg + ilg.Emit(OpCodes.Br, endLabel) + + let emitRangeTest inverse minValue maxValue (range: Range) = + loadVar ilg + emitRangeTest inverse ilg minValue maxValue range + storeResult ilg + ilg.Emit(OpCodes.Br, endLabel) + + let emitTwoRangeTest inverse minValue maxValue range1 range2 = + emitTwoRangeTest ilg loadVar inverse minValue maxValue range1 range2 + storeResult ilg + ilg.Emit(OpCodes.Br, endLabel) + + let rec emitRegion minValue maxValue iBegin iEnd = + Debug.Assert(iBegin < iEnd && minValue <= ranges.[iBegin].Min && ranges.[iEnd - 1].Max <= maxValue) + + match iEnd - iBegin with + | 0 -> failwith "emitSetMembershipTest.emitRegion" + | 1 -> emitRangeTest inverse minValue maxValue ranges.[iBegin] + | 2 -> emitTwoRangeTest inverse minValue maxValue ranges.[iBegin] ranges.[iBegin + 1] + | _ -> // at least 3 ranges + if not noBitVectorTests + && density lengthCap ranges iBegin iEnd >= densityThreshold + then + emitBitVectorTest minValue maxValue iBegin iEnd + else + let i, pivotAroundRangeMax = findPivot ranges iBegin iEnd + let label = ilg.DefineLabel() + let r = ranges.[i] + loadVar ilg + if pivotAroundRangeMax then + loadI4 ilg r.Max + ilg.Emit(OpCodes.Bgt, label) + emitRegion minValue r.Max iBegin (i + 1) + ilg.MarkLabel(label) + emitRegion (r.Max + 1) maxValue (i + 1) iEnd + else + loadI4 ilg r.Min + ilg.Emit(OpCodes.Blt, label) + emitRegion r.Min maxValue i iEnd + ilg.MarkLabel(label) + emitRegion minValue (r.Min - 1) iBegin i + + if ranges.Length <> 0 then + emitRegion minValue maxValue 0 ranges.Length + + ilg.MarkLabel(outOfRangeLabel) + if inverse then + ilg.Emit(OpCodes.Ldc_I4_1) + else + ilg.Emit(OpCodes.Ldc_I4_0) + storeResult ilg + ilg.MarkLabel(endLabel) + + + +let emitSwitch (ilg: ILGenerator) (loadVar: ILGenerator -> unit) (temps: TempLocals) + lengthCap densityThreshold + minValue maxValue + (defaultLabel: Label) (ranges: Range[]) (labels: Label[]) = + Debug.Assert(ranges.Length = labels.Length) + checkLabelRangesAreValidSortedAndUnconnected ranges labels + + let emitJumpTable (* minValue maxValue *) iBegin iEnd = + // We can't optimize the range check of the switch statement, + // so we have no use for minValue and maxValue arguments. + // (In LLVM we could use the 'unreachable' instruction for optimizing the range check.) + Debug.Assert(iBegin + 2 <= iEnd) + let first = ranges.[iBegin].Min + let off = first + let length = + let last = ranges.[iEnd - 1].Max + let lastMinusOff = last - off + if uint32 lastMinusOff >= uint32 System.Int32.MaxValue then + raise (System.ArgumentException("The ranges span width is too large.")) + lastMinusOff + 1 // length <= Int32.MaxValue + + let jt = Array.zeroCreate length + let mutable j = 0 + for i = iBegin to iEnd - 1 do + let r = ranges.[i] + let rMin, rMax = r.Min - off, r.Max - off + while j < rMin do + jt.[j] <- defaultLabel + j <- j + 1 + let label = labels.[i] + while j <= rMax do + jt.[j] <- label + j <- j + 1 + + loadVar ilg + if off <> 0 then + loadI4 ilg off + ilg.Emit(OpCodes.Sub) + ilg.Emit(OpCodes.Switch, jt) + ilg.Emit(OpCodes.Br, defaultLabel) + + let emitBranchIfInRange2 label (defaultLabel: Label) minValue maxValue (range: Range) = + if minValue < range.Min || range.Max < maxValue then + loadVar ilg + emitBranchIfInRange ilg label minValue maxValue range + ilg.Emit(OpCodes.Br, defaultLabel) + else + ilg.Emit(OpCodes.Br, label) + + let emitBranchIfInRange label minValue maxValue (range: Range) = + loadVar ilg + emitBranchIfInRange ilg label minValue maxValue range + + let emitBranchIfOutOfRange label minValue maxValue (range: Range) = + if minValue < range.Min || range.Max < maxValue then + loadVar ilg + emitBranchIfOutOfRange ilg label minValue maxValue range + + let rec emitRegion minValue maxValue iBegin iEnd = + Debug.Assert(iBegin < iEnd && minValue <= ranges.[iBegin].Min && ranges.[iEnd - 1].Max <= maxValue) + + let pivotAroundRange i pivotAroundRangeMax = + let label = ilg.DefineLabel() + let r = ranges.[i] + loadVar ilg + if pivotAroundRangeMax then + loadI4 ilg r.Max + ilg.Emit(OpCodes.Bgt, label) + emitRegion minValue r.Max iBegin (i + 1) + ilg.MarkLabel(label) + emitRegion (r.Max + 1) maxValue (i + 1) iEnd + else + loadI4 ilg r.Min + ilg.Emit(OpCodes.Blt, label) + emitRegion r.Min maxValue i iEnd + ilg.MarkLabel(label) + emitRegion minValue (r.Min - 1) iBegin i + + match iEnd - iBegin with + | 0 -> + failwith "emitSwitch.emitRegion" + | 1 -> + emitBranchIfInRange2 labels.[iBegin] defaultLabel minValue maxValue ranges.[iBegin] + | 2 -> + let r1, r2 = ranges.[iBegin], ranges.[iBegin + 1] + let l1, l2 = labels.[iBegin], labels.[iBegin + 1] + if l1 = l2 then + Debug.Assert(r1.Max + 1 < r2.Min) + //emitBranchIfOutOfRange defaultLabel minValue maxValue (Range(r1.Min, r2.Max)) + //emitBranchIfInRange2 defaultLabel l1 r1.Min r2.Max (Range(r1.Max + 1, r2.Min - 1)) + emitTwoRangeTest ilg loadVar false minValue maxValue r1 r2 + ilg.Emit(OpCodes.Brtrue, l1) + ilg.Emit(OpCodes.Br, defaultLabel) + else + let rangesAreConnected = r1.Max + 1 = r2.Min + let checkLeft, checkRight = minValue < r1.Min, r2.Max < maxValue + if rangesAreConnected && ((checkLeft && checkRight) || (not checkLeft && not checkRight)) then + emitBranchIfOutOfRange defaultLabel minValue maxValue (Range(r1.Min, r2.Max)) + // If 64-bit .NET JIT can substitute both of the branches emitted below with + // the code at the destination, it chooses to substitute the first. Hence, + // we put the more likely case first (assuming that values are + // uniformly distributed on {minValue ... maxValue}). + // (The 32-bit .NET JIT (version 4) doesn't yet seem to seriously attempt + // a code block reordering optimization.) + if uint32 (r1.Max - r1.Min) >= uint32 (r2.Max - r2.Min) then + emitBranchIfInRange2 l1 l2 r1.Min r2.Max r1 + else + emitBranchIfInRange2 l2 l1 r1.Min r2.Max r2 + else + if (if rangesAreConnected then checkRight (* not checkLeft *) + else uint32 (r1.Max - r1.Min) >= uint32 (r2.Max - r2.Min)) + then + emitBranchIfInRange l1 minValue maxValue r1 + let minRightValue = if checkLeft then minValue else r1.Max + 1 + emitBranchIfInRange2 l2 defaultLabel minRightValue maxValue r2 + else + emitBranchIfInRange l2 minValue maxValue r2 + let maxLeftValue = if checkRight then maxValue else r2.Min - 1 + emitBranchIfInRange2 l1 defaultLabel minValue maxLeftValue r1 + | _ -> // at least 3 ranges + let allLabelsAreIdentical = + let label = labels.[iBegin] + let mutable i = iBegin + 1 + while i < iEnd && label.Equals(labels.[i]) do i <- i + 1 + i = iEnd + if allLabelsAreIdentical then + let bl = temps.GetBoolLocal() + // emitSetMembershipTest doesn't use GetBoolLocal itself + emitSetMembershipTest ilg loadVar (fun ilg -> ilg.Emit(OpCodes.Stloc, bl)) temps + (lengthCap*8) (densityThreshold/32.) + minValue maxValue + false ranges.[iBegin..(iEnd - 1)] + ilg.Emit(OpCodes.Ldloc, bl) + ilg.Emit(OpCodes.Brtrue, labels.[iBegin]) + ilg.Emit(OpCodes.Br, defaultLabel) + elif density lengthCap ranges iBegin iEnd >= densityThreshold then + emitJumpTable iBegin iEnd + else + let i, pivotAroundRangeMax = findPivot ranges iBegin iEnd + pivotAroundRange i pivotAroundRangeMax + + if ranges.Length <> 0 then + emitRegion minValue maxValue 0 ranges.Length + else + ilg.Emit(OpCodes.Br, defaultLabel) + +#endif \ No newline at end of file diff --git a/src/FParsec/Error.fs b/src/FParsec/Error.fs new file mode 100644 index 0000000..8b5d90a --- /dev/null +++ b/src/FParsec/Error.fs @@ -0,0 +1,363 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: BSD-style. See accompanying documentation. + +[] +module FParsec.Error + +//open FParsec + +open System.Diagnostics +open System.Globalization +open System.IO +open FParsec.Internals + +// Unfortunately, F# currently doesn't support active patterns with more than 7 +// cases, so we have to use partial patterns. + +type Expected = ErrorMessage.Expected +type ExpectedString = ErrorMessage.ExpectedString +type ExpectedStringCI = ErrorMessage.ExpectedCaseInsensitiveString +type Unexpected = ErrorMessage.Unexpected +type UnexpectedString = ErrorMessage.UnexpectedString +type UnexpectedStringCI = ErrorMessage.UnexpectedCaseInsensitiveString +type Message = ErrorMessage.Message +type NestedError = ErrorMessage.NestedError +type CompoundError = ErrorMessage.CompoundError +type OtherErrorMessage = ErrorMessage.Other + +let (|Expected|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.Expected then Some msg.String else None + +let (|ExpectedString|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.ExpectedString then Some msg.String else None + +let (|ExpectedStringCI|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.ExpectedCaseInsensitiveString then Some msg.String else None + +let (|Unexpected|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.Unexpected then Some msg.String else None + +let (|UnexpectedString|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.UnexpectedString then Some msg.String else None + +let (|UnexpectedStringCI|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.UnexpectedCaseInsensitiveString then Some msg.String else None + +let (|Message|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.Message then Some msg.String else None + +let (|NestedError|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.NestedError then + let ne = msg :?> ErrorMessage.NestedError + Some((ne.Position, ne.UserState, ne.Messages)) + else + None + +let (|CompoundError|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.CompoundError then + let ce = msg :?> ErrorMessage.CompoundError + Some((ce.LabelOfCompound, ce.NestedErrorPosition, ce.NestedErrorUserState, ce.NestedErrorMessages)) + else + None + +let (|OtherErrorMessage|_|) (msg: ErrorMessage) = + if msg.Type = ErrorMessageType.Other then + let om = msg :?> ErrorMessage.Other + Some om.Data + else + None + +[] +let NoErrorMessages = null : ErrorMessageList + +let (|ErrorMessageList|NoErrorMessages|) (error: ErrorMessageList) = + if isNotNull error then ErrorMessageList(error.Head, error.Tail) + else NoErrorMessages + +let inline isSingleErrorMessageOfType (ty: ErrorMessageType) (error: ErrorMessageList) = + isNotNull error && error.Head.Type = ty && isNull error.Tail + +let expected label = ErrorMessageList(ErrorMessage.Expected(label)) +let expectedString str = ErrorMessageList(ErrorMessage.ExpectedString(str)) +let expectedStringCI str = ErrorMessageList(ErrorMessage.ExpectedCaseInsensitiveString(str)) +let unexpected label = ErrorMessageList(ErrorMessage.Unexpected(label)) +let unexpectedString str = ErrorMessageList(ErrorMessage.UnexpectedString(str)) +let unexpectedStringCI str = ErrorMessageList(ErrorMessage.UnexpectedCaseInsensitiveString(str)) +let messageError msg = ErrorMessageList(ErrorMessage.Message(msg)) +let otherError obj = ErrorMessageList(ErrorMessage.Other(obj : obj)) + +let nestedError (stream: CharStream<'u>) (error: ErrorMessageList) = + (* + // manually inlined: + match error with + | ErrorMessageList(NestedError _, NoErrorMessages) -> error + | _ -> ErrorMessageList(NestedError(stream.Position, stream.UserState, error), NoErrorMessages) + *) + if error |> isSingleErrorMessageOfType ErrorMessageType.NestedError + then error + else ErrorMessageList(ErrorMessage.NestedError(stream.Position, stream.UserState, error)) + +let compoundError label (stream: CharStream<'u>) (error: ErrorMessageList) = + // manually inlined: + (* + match error with + | ErrorMessageList(NestedError(pos, ustate, msgs), NoErrorMessages) -> + ErrorMessageList(CompoundError(label, pos, ustate, msgs), NoErrorMessages) + | _ -> ErrorMessageList(CompoundError(label, stream.Position, stream.UserState, error), NoErrorMessages) + *) + if error |> isSingleErrorMessageOfType ErrorMessageType.NestedError + then + let ne = error.Head :?> ErrorMessage.NestedError + ErrorMessageList(ErrorMessage.CompoundError(label, ne.Position, ne.UserState, ne.Messages)) + else + ErrorMessageList(ErrorMessage.CompoundError(label, stream.Position, stream.UserState, error)) + +let +#if NOINLINE +#else + inline +#endif + mergeErrors errorMessages1 errorMessages2 = ErrorMessageList.Merge(errorMessages1, errorMessages2) + +/// the default position printer +let internal printPosition (tw: System.IO.TextWriter) (p: Position) (indent: string) (columnWidth: int) = + tw.Write(indent) + tw.WriteLine(Strings.ErrorPosition(p)) + +let internal printErrorPosition (tabSize: int) (lw: LineWrapper) (stream: CharStream<'u>) (p: Position) = + /// writes the string with all whitespace chars replaced with ' ' + let writeStringWithSimplifiedWhitespace (tw: TextWriter) (s: string) = + let mutable i0 = 0 + for i = 0 to s.Length - 1 do + let c = s.[i] + if Text.IsWhitespace(c) then + if i0 < i then + tw.Write(s.Substring(i0, i - i0)) + tw.Write(' ') + i0 <- i + 1 + if i0 < s.Length then + if i0 = 0 then tw.Write(s) + else tw.Write(s.Substring(i0, s.Length - i0)) + + let sn = getLineSnippet stream p (lw.ColumnWidth - lw.Indentation.Length) tabSize lw.WriterIsMultiCharGraphemeSafe + let str = sn.String + + lw.PrintLine(Strings.ErrorPosition(p, sn.UnaccountedNewlines, sn.Column, sn.Utf16Column)) + + let msgs = ResizeArray<_>() + if sn.LineContainsTabsBeforeIndex then + let mutable msg = Strings.ColumnCountAssumesTabStopDistanceOfNChars(tabSize) + if sn.Column = sn.Utf16Column then + msg <- msg + Strings.Utf16ColumnCountOnlyCountsEachTabAs1Char + msgs.Add(msg) + + if str.Length > 0 then + let tw = lw.TextWriter + tw.Write(lw.Indentation) + writeStringWithSimplifiedWhitespace tw str + tw.WriteLine() + tw.Write(lw.Indentation) + if sn.TextElementIndex > 0 then + tw.Write(new string(' ', sn.TextElementIndex)) + tw.Write('^') + let d = sn.Index - sn.TextElementIndex + if d <> 0 && not lw.WriterIsMultiCharGraphemeSafe then + if d > 1 then + tw.Write(new string('-', d - 1)) + tw.Write('^') + msgs.Add(Strings.ExactPositionBetweenCaretsDependsOnDisplayUnicodeCapabilities) + tw.WriteLine() + + if sn.Index < str.Length then + let i = sn.Index + let c = str.[i] + if System.Char.IsSurrogate(c) then + if Text.IsHighSurrogate(c) then + if i + 1 < str.Length && Text.IsLowSurrogate(str.[i + 1]) then + msgs.Add(Strings.ErrorOccurredAtBeginningOfSurrogatePair(str.Substring(i, 2))) + else + msgs.Add(Strings.CharAtErrorPositionIsIsolatedHighSurrogate(c)) + else // low surrogate + if i > 0 && Text.IsHighSurrogate(str.[i - 1]) then + msgs.Add(Strings.ErrorOccurredAtSecondCharInSurrogatePair(str.Substring(i - 1, 2))) + else + msgs.Add(Strings.CharAtErrorPositionIsIsolatedLowSurrogate(c)) + elif i > 0 then + let c1 = str.[i - 1] + if Text.IsHighSurrogate(c1) then + msgs.Add(Strings.CharBeforeErrorPositionIsIsolatedHighSurrogate(c1)) + elif Text.IsLowSurrogate(c1) then + msgs.Add(Strings.CharBeforeErrorPositionIsIsolatedLowSurrogate(c1)) + else + if p.Index = stream.IndexOfLastCharPlus1 then msgs.Add(Strings.ErrorOccurredAtEndOfInputStream) + elif str.Length = 0 then msgs.Add(Strings.ErrorOccurredOnAnEmptyLine) + else msgs.Add(Strings.ErrorOccurredAtEndOfLine) + + if sn.LengthOfTextElement > 1 && (sn.LengthOfTextElement > 2 || not (System.Char.IsSurrogate(str.[sn.Index]))) then + let n = sn.Index - sn.IndexOfTextElement + 1 + let te = str.Substring(sn.IndexOfTextElement, sn.LengthOfTextElement) + msgs.Add(Strings.ErrorOccurredAtNthCharInCombiningCharacterSequence(n, te)) + elif sn.IsBetweenCRAndLF then + msgs.Add(Strings.ErrorOccurredAtSecondCharInNewline) + + if sn.UnaccountedNewlines > 0 then + let n = sn.UnaccountedNewlines + msgs.Add(Strings.InputContainsAtLeastNUnaccountedNewlines(n)) + + if msgs.Count = 1 then lw.PrintLine(Strings.Note, msgs.[0]) + elif msgs.Count > 1 then + let ind = lw.Indentation + let ind2 = ind + " " + lw.PrintLine(Strings.Note) + for msg in msgs do + lw.Print("* ") + lw.Indentation <- ind2 + lw.PrintLine(msg) + lw.Indentation <- ind + +[] +type ParserError(position: Position, userState: obj, messages: ErrorMessageList) = + do if isNull position then nullArg "pos" + + let defaultColumnWidth = 79 + let defaultIndentation = "" + let defaultIndentationIncrement = " " + let defaultTabSize = 8 + + member t.Position = position + member t.UserState = userState + member T.Messages = messages + + override t.ToString() = + use sw = new System.IO.StringWriter() + t.WriteTo(sw) + sw.ToString() + + member t.ToString(streamWhereErrorOccurred: CharStream<'u>) = + use sw = new System.IO.StringWriter() + t.WriteTo(sw, streamWhereErrorOccurred) + sw.ToString() + + member t.WriteTo(textWriter: System.IO.TextWriter, + ?positionPrinter: (System.IO.TextWriter -> Position -> string -> int -> unit), + ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = + + let positionPrinter = defaultArg positionPrinter printPosition + let columnWidth = defaultArg columnWidth defaultColumnWidth + let ind = defaultArg initialIndentation defaultIndentation + let indIncrement = defaultArg indentationIncrement defaultIndentationIncrement + let lw = new LineWrapper(textWriter, columnWidth, Indentation = ind) + t.WriteTo(lw, positionPrinter, indIncrement) + + member t.WriteTo(textWriter: System.IO.TextWriter, + streamWhereErrorOccurred: CharStream<'u>, + ?tabSize: int, + ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = + + let originalStreamName = t.Position.StreamName + let getStream = fun (pos: Position) -> if pos.StreamName = originalStreamName then streamWhereErrorOccurred else null + t.WriteTo(textWriter, getStream, ?tabSize = tabSize, ?columnWidth = columnWidth, ?initialIndentation = initialIndentation, ?indentationIncrement = indentationIncrement) + + member t.WriteTo(textWriter: System.IO.TextWriter, + getStream: (Position -> CharStream<'u>), + ?tabSize: int, + ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = + + let columnWidth = defaultArg columnWidth defaultColumnWidth + let ind = defaultArg initialIndentation defaultIndentation + let indIncrement = defaultArg indentationIncrement defaultIndentationIncrement + let tabSize = defaultArg tabSize defaultTabSize + let lw = new LineWrapper(textWriter, columnWidth, Indentation = ind) + let positionPrinter = + fun tw position indent columnWidth -> + let stream = getStream position + if isNotNull stream then + printErrorPosition tabSize lw stream position + else + printPosition lw.TextWriter position indent columnWidth + t.WriteTo(lw, positionPrinter, indIncrement) + + member private t.WriteTo(lw: LineWrapper, + positionPrinter: System.IO.TextWriter -> Position -> string -> int -> unit, + indentationIncrement: string) = + + let rec printMessages (position: Position) (msgs: ErrorMessageList) = + positionPrinter lw.TextWriter position lw.Indentation lw.ColumnWidth + let nra() = new ResizeArray<_>() + let expectedA, unexpectedA, messageA, nestedA, compoundA = nra(), nra(), nra(), nra(), nra() + let mutable otherCount = 0 + for msg in ErrorMessageList.ToSortedArray(msgs) do + match msg.Type with + | ErrorMessageType.Expected -> expectedA.Add(msg.String) + | ErrorMessageType.ExpectedString -> expectedA.Add(Strings.Quote(msg.String)) + | ErrorMessageType.ExpectedCaseInsensitiveString -> expectedA.Add(Strings.QuoteCaseInsensitive(msg.String)) + | ErrorMessageType.Unexpected -> unexpectedA.Add(msg.String) + | ErrorMessageType.UnexpectedString -> unexpectedA.Add(Strings.Quote(msg.String)) + | ErrorMessageType.UnexpectedCaseInsensitiveString -> unexpectedA.Add(Strings.QuoteCaseInsensitive(msg.String)) + | ErrorMessageType.Message -> messageA.Add(msg.String) + | ErrorMessageType.NestedError -> + let ne = msg :?> ErrorMessage.NestedError + nestedA.Add((ne.Position, ne.Messages)) + | ErrorMessageType.CompoundError -> + if not (isNullOrEmpty msg.String) then expectedA.Add(msg.String) + let ce = msg :?> ErrorMessage.CompoundError + compoundA.Add((ce.String, ce.NestedErrorPosition, ce.NestedErrorMessages)) + | ErrorMessageType.Other -> + otherCount <- otherCount + 1 + | _ -> + failwith "printMessages" + + let printArray title (a: ResizeArray) (sep: string) = + lw.Print(title, " ") + let n = a.Count + for i = 0 to n - 3 do + lw.Print(a.[i], ", ") + if n > 1 then lw.Print(a.[n - 2], sep) + if n > 0 then lw.Print(a.[n - 1]) + lw.Newline() + if expectedA.Count > 0 then + printArray Strings.Expecting expectedA Strings.Or + if unexpectedA.Count > 0 then + printArray Strings.Unexpected unexpectedA Strings.And + let ind = lw.Indentation + let indInd = ind + indentationIncrement + if messageA.Count > 0 then + if expectedA.Count > 0 || unexpectedA.Count > 0 then + lw.PrintLine(Strings.OtherErrors) + lw.Indentation <- indInd + for m in messageA do + lw.PrintLine(m) + if expectedA.Count > 0 || unexpectedA.Count > 0 then + lw.Indentation <- ind + for label, pos2, msgs2 in compoundA do + lw.Newline() + lw.PrintLine(Strings.CompoundCouldNotBeParsedBecause(label)) + lw.Indentation <- indInd + printMessages pos2 msgs2 + lw.Indentation <- ind + for pos2, msgs2 in nestedA do + lw.Newline() + lw.PrintLine(Strings.ParserBacktrackedAfter) + lw.Indentation <- indInd + printMessages pos2 msgs2 + lw.Indentation <- ind + if expectedA.Count = 0 && unexpectedA.Count = 0 && messageA.Count = 0 + && compoundA.Count = 0 && nestedA.Count = 0 + then + lw.PrintLine(Strings.UnknownErrors) + printMessages position messages + + override t.Equals(value: obj) = + referenceEquals (t :> obj) value + || match value with + | null -> false + | :? ParserError as other -> + t.Position = other.Position + && t.Messages = other.Messages + && t.UserState = other.UserState + | _ -> false + + override t.GetHashCode() = t.Position.GetHashCode() ^^^ hash t.Messages + +let inline internal raiseInfiniteLoopException name stream = + raise (FParsec.Internal.ParserCombinatorInInfiniteLoopHelper.CreateException(name, stream)) diff --git a/src/FParsec/Error.fsi b/src/FParsec/Error.fsi new file mode 100644 index 0000000..59de760 --- /dev/null +++ b/src/FParsec/Error.fsi @@ -0,0 +1,142 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.Error + +type Expected = ErrorMessage.Expected +type ExpectedString = ErrorMessage.ExpectedString +type ExpectedStringCI = ErrorMessage.ExpectedCaseInsensitiveString +type Unexpected = ErrorMessage.Unexpected +type UnexpectedString = ErrorMessage.UnexpectedString +type UnexpectedStringCI = ErrorMessage.UnexpectedCaseInsensitiveString +type Message = ErrorMessage.Message +type NestedError = ErrorMessage.NestedError +type CompoundError = ErrorMessage.CompoundError +type OtherErrorMessage = ErrorMessage.Other + +val (|Expected|_|): ErrorMessage -> string option +val (|ExpectedString|_|): ErrorMessage -> string option +val (|ExpectedStringCI|_|): ErrorMessage -> string option +val (|Unexpected|_|): ErrorMessage -> string option +val (|UnexpectedString|_|): ErrorMessage -> string option +val (|UnexpectedStringCI|_|): ErrorMessage -> string option +val (|Message|_|): ErrorMessage -> string option +val (|NestedError|_|): ErrorMessage -> (Position * obj * ErrorMessageList) option +val (|CompoundError|_|): ErrorMessage -> (string * Position * obj * ErrorMessageList) option +val (|OtherErrorMessage|_|): ErrorMessage -> obj option + +[] +val NoErrorMessages: ErrorMessageList = null;; +val (|ErrorMessageList|NoErrorMessages|): ErrorMessageList -> Choice + +val inline isSingleErrorMessageOfType: ErrorMessageType -> ErrorMessageList -> bool + +/// `expectedError label` creates an `ErrorMessageList` with a single `Expected label` message. +val expected: string -> ErrorMessageList +/// `expectedStringError str` creates an `ErrorMessageList` with a single `ExpectedString str` message. +val expectedString: string -> ErrorMessageList +/// `expectedStringCIError str` creates an `ErrorMessageList` with a single `ExpectedStringCI str` message. +val expectedStringCI: string -> ErrorMessageList + +/// `unexpectedError label` creates an `ErrorMessageList` with a single `Unexpected label` message. +val unexpected: string -> ErrorMessageList +/// `unexpectedStringError str` creates an `ErrorMessageList` with a single `UnexpectedString str` message. +val unexpectedString: string -> ErrorMessageList +/// `unexpectedStringCIError str` creates an `ErrorMessageList` with a single `UnexpectedStringCI str` message. +val unexpectedStringCI: string -> ErrorMessageList + +/// `messageError msg` creates an `ErrorMessageList` with a single `Message msg` message. +val messageError: string -> ErrorMessageList + +/// `otherError o` creates an `ErrorMessageList` with a single `OtherError o` message. +val otherError: obj -> ErrorMessageList + +/// `backtrackError stream msgs` creates an `ErrorMessageList` with a single `BacktrackPoint stream.Position msgs` message, +/// except if `msgs` is already an `ErrorMessageList` with a single `BacktrackPoint(_, _)` message, +/// in which case `msgs` is returned instead. +val nestedError: CharStream<'u> -> ErrorMessageList -> ErrorMessageList + +/// `compoundError label state msgs` creates an `ErrorMessageList` with a single `CompoundError label stream.Position msgs` message, +/// except if `msgs` is an `ErrorMessageList` with a single `BacktrackPoint(pos2, msgs2)` message, +/// in which case an `ErrorMessageList` with a single `CompoundError label pos2 msgs2` message is returned instead. +val compoundError: string -> CharStream<'u> -> ErrorMessageList -> ErrorMessageList + +/// `mergeErrors error1 error2` is equivalent to `ErrorMessageList.Merge(error1, error2)`. +val +#if NOINLINE +#else + inline +#endif + mergeErrors: ErrorMessageList -> ErrorMessageList -> ErrorMessageList + +/// Represents a simple container type that brings together the position, user state and error messages of a parser error. +[] +type ParserError = + new: Position * userState:obj * ErrorMessageList -> ParserError + + member Position: Position + member UserState: obj + member Messages: ErrorMessageList + + /// Returns a string representation of the `ParserError`. + override ToString: unit -> string + + /// Returns a string representation of the `ParserError`. + /// + /// The given `CharStream` must contain the content of the original `CharStream` + /// for which this `ParserError` was generated (at the original indices). + /// + /// For each error location the printed position information is augmented + /// with the line of text surrounding the error position, together with a '^'-marker + /// pointing to the exact location of the error in the input stream. + member ToString: streamWhereErrorOccurred: CharStream<'u> -> string + + /// Writes a string representation of the `ParserError` to the given `TextWriter` value. + /// + /// The given `CharStream` must contain the content of the original `CharStream` + /// for which this `ParserError` was generated (at the original indices). + /// + /// For each error location the printed position information is augmented + /// with the line of text surrounding the error position, together with a '^'-marker + /// pointing to the exact location of the error in the input stream. + member WriteTo: textWriter: System.IO.TextWriter + * streamWhereErrorOccurred: CharStream<'u> + * ?tabSize: int + * ?columnWidth: int + * ?initialIndention: string * ?indentionIncrement: string + -> unit + + /// Writes a string representation of the `ParserError` to the given `TextWriter` value. + /// + /// For each error position `getStreamByName` is called with the `StreamName` of the `Position`. + /// The returned `CharStream` must be `null` or contain the content of the `CharStream` for which + /// the error was generated (at the original indices). + /// + /// If `getStreamByName` returns a non-null `CharStream`, the printed error position information is + /// augmented with the line of text surrounding the error position, together with a '^'-marker + /// pointing to the exact location of the error in the input stream. + member WriteTo: textWriter: System.IO.TextWriter + * getStream: (Position -> CharStream<'u>) + * ?tabSize: int + * ?columnWidth: int + * ?initialIndention: string * ?indentionIncrement: string + -> unit + + /// Writes a string representation of the `ParserError` to the given `TextWriter` value. + /// + /// The format of the position information can be customized by specifying the `positionPrinter` + /// argument. The given function is expected to print a representation of the passed `Position` value + /// to the passed `TextWriter` value. If possible, it should indent text lines with the passed string + /// and take into account the maximum column count (including indention) passed as the last argument. + member WriteTo: textWriter: System.IO.TextWriter + * ?positionPrinter: (System.IO.TextWriter -> Position -> string -> int -> unit) + * ?columnWidth: int + * ?initialIndention: string * ?indentionIncrement: string + -> unit + + override Equals: obj -> bool + override GetHashCode: unit -> int + + +val inline internal raiseInfiniteLoopException: string -> CharStream -> 'a \ No newline at end of file diff --git a/src/FParsec/FParsec.fsproj b/src/FParsec/FParsec.fsproj new file mode 100644 index 0000000..731ce34 --- /dev/null +++ b/src/FParsec/FParsec.fsproj @@ -0,0 +1,32 @@ + + + + netstandard2.0 + LOW_TRUST + true + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/FParsec/Internals.fs b/src/FParsec/Internals.fs new file mode 100644 index 0000000..521f7a9 --- /dev/null +++ b/src/FParsec/Internals.fs @@ -0,0 +1,404 @@ +// Copyright (c) Stephan Tolksdorf 2009-2011 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.Internals + +open System.Diagnostics + +// The following functions are defined using inline IL to help fsc generate code +// the JIT knows better how to optimize. +// Should F# stop supporting inline IL outside the standard library, you can switch +// to the commented out alternatives (which by then will probably be just as efficient). +let inline referenceEquals<'a when 'a : not struct> (x: 'a) (y: 'a) = + LanguagePrimitives.PhysicalEquality x y +let inline isNull<'a when 'a : not struct> (x: 'a) = + referenceEquals (box x) null +let inline isNotNull<'a when 'a : not struct> (x: 'a) = + not (isNull x) + +let inline isNullOrEmpty (s: string) = System.String.IsNullOrEmpty(s) + +// the F# compiler doesn't yet "fuse" multiple '+' string concatenations into one, as the C# compiler does +let inline concat3 (a: string) (b: string) (c: string) = System.String.Concat(a, b, c) +let inline concat4 (a: string) (b: string) (c: string) (d: string) = System.String.Concat(a, b, c, d) +let inline concat5 (a: string) (b: string) (c: string) (d: string) (e: string) = System.String.Concat([|a;b;c;d;e|]) +let inline concat6 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) = System.String.Concat([|a;b;c;d;e;f|]) +let inline concat7 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) (g: string) = System.String.Concat([|a;b;c;d;e;f;g|]) + +let findNewlineOrEOSChar = Text.FindNewlineOrEOSChar + +let getSortedUniqueValues (s: seq<_>) = + let a = Array.ofSeq s + if a.Length = 0 then a + else + Array.sortInPlace a + let mutable previous = a.[0] + let mutable n = 1 + for i = 1 to a.Length - 1 do + let c = a.[i] + if c <> previous then n <- n + 1 + previous <- c + if n = a.Length then a + else + let b = Array.zeroCreate n + let mutable i = 0 + for j = 0 to b.Length - 1 do + let c = a.[i] + b.[j] <- c + i <- i + 1 + while i < a.Length && a.[i] = c do i <- i + 1 + b + +/// A primitive pretty printer. +type LineWrapper(tw: System.IO.TextWriter, columnWidth: int, writerIsMultiCharGraphemeSafe: bool) = + do if columnWidth < 1 then invalidArg "columnWidth" "columnWidth must be positive." + + let mutable indentation = "" + let mutable maxSpace = columnWidth + let mutable space = columnWidth + let mutable afterNewline = true + let mutable afterSpace = false + + new (tw: System.IO.TextWriter, columnWidth: int) = + new LineWrapper(tw, columnWidth, + #if PCL + true) + #else + not tw.Encoding.IsSingleByte) + #endif + + member t.TextWriter = tw + member t.ColumnWidth = columnWidth + member t.WriterIsMultiCharGraphemeSafe = writerIsMultiCharGraphemeSafe + + member t.Indentation + with get() = indentation + and set (s: string) = + let s = if s.Length <= columnWidth - 1 then s + else s.Substring(0, columnWidth - 1) // guarantee maxSpace >= 1 + indentation <- s + maxSpace <- columnWidth - s.Length + if afterNewline then space <- maxSpace + + member t.Newline() = + tw.WriteLine() + afterNewline <- true + afterSpace <- false + space <- maxSpace + + member t.Space() = + afterSpace <- true + + member t.Print(s: string) = + if isNotNull s then + let mutable start = 0 + for i = 0 to s.Length - 1 do + let c = s.[i] + if (if c <= ' ' then c = ' ' || (c >= '\t' && c <= '\r') + else c >= '\u0085' && (c = '\u0085' || c = '\u2028' || c = '\u2029')) + then // any ' ', tab or newlines + if start < i then + t.Write(s.Substring(start, i - start)) + t.Space() + start <- i + 1 + if start < s.Length then + if start = 0 then t.Write(s) + else t.Write(s.Substring(start, s.Length - start)) + + member t.Print(s1, s2) = t.Print(s1); t.Print(s2) + member t.Print(s1, s2, s3) = t.Print(s1); t.Print(s2); t.Print(s3) + member t.PrintLine(s: string) = t.Print(s); t.Newline() + member t.PrintLine(s1: string, s2: string) = t.Print(s1); t.Print(s2); t.Newline() + member t.PrintLine(s1: string, s2: string, s3: string) = t.Print(s1); t.Print(s2); t.Print(s3); t.Newline() + + member private t.Write(s: string) = + Debug.Assert(s.Length > 0) + if afterNewline then + tw.Write(indentation) + afterNewline <- false + let n = if writerIsMultiCharGraphemeSafe then Text.CountTextElements(s) else s.Length + match afterSpace with + | true when n + 1 <= space -> + tw.Write(' ') + tw.Write(s) + space <- space - 1 - n + afterSpace <- false + | false when n <= space -> + tw.Write(s) + space <- space - n + | _ when s.Length <= maxSpace -> + tw.WriteLine() + tw.Write(indentation) + tw.Write(s) + space <- maxSpace - n + afterSpace <- false + | _ -> + t.Break(s) + + /// breaks a string into multiple lines along text element boundaries. + member private t.Break(s: string) = + Debug.Assert(s.Length > 0 && not afterNewline) + if afterSpace then + afterSpace <- false + if space > 1 then + tw.Write(' ') + space <- space - 1 + else + tw.WriteLine() + tw.Write(indentation) + space <- maxSpace + elif space = 0 then + tw.WriteLine() + tw.Write(indentation) + space <- maxSpace + let te = System.Globalization.StringInfo.GetTextElementEnumerator(s) + te.MoveNext() |> ignore + Debug.Assert(te.ElementIndex = 0) + if writerIsMultiCharGraphemeSafe then + let mutable startIndex = 0 + while te.MoveNext() do + space <- space - 1 + if space = 0 then + let index = te.ElementIndex + tw.WriteLine(s.Substring(startIndex, index - startIndex)) + tw.Write(indentation) + space <- maxSpace + startIndex <- index + space <- space - 1 + tw.Write(s.Substring(startIndex, s.Length - startIndex)) + else + // We don't break up text elements, but when we fit string pieces into lines we + // use UTF-16 lengths instead of text element counts (in order to support displays + // that have problems with combining character sequences). + let mutable startIndex = 0 + let mutable lastIndex = 0 + while te.MoveNext() do + let index = te.ElementIndex + let count = index - startIndex + if count < space then + lastIndex <- index + elif count = space || lastIndex <= startIndex then + tw.WriteLine(s.Substring(startIndex, count)) + tw.Write(indentation) + space <- maxSpace + startIndex <- index + else + tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex)) + tw.Write(indentation) + space <- maxSpace + startIndex <- lastIndex + let index = s.Length + let count = index - startIndex + if count <= space then + tw.Write(s.Substring(startIndex, count)) + space <- space - count + elif lastIndex <= startIndex then + tw.WriteLine(s.Substring(startIndex, index - startIndex)) + space <- maxSpace + afterNewline <- true + else + tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex)) + tw.Write(indentation) + tw.Write(s.Substring(lastIndex, index - lastIndex)) + space <- maxSpace - (index - lastIndex) + if space < 0 then + tw.WriteLine() + space <- maxSpace + afterNewline <- true + + +type LineSnippet = { + String: string + TextElementIndex: int + Index: int + IndexOfTextElement: int + LengthOfTextElement: int + UnaccountedNewlines: int + Column: int64 + Utf16Column: int64 // the UTF16 tabs are only counted as 1 char + LineContainsTabsBeforeIndex: bool + IsBetweenCRAndLF: bool +} + +let getLineSnippet (stream: CharStream<'u>) (p: Position) (space: int) (tabSize: int) multiCharGraphemeSafe = + Debug.Assert(space > 0 && tabSize > 0) + Debug.Assert(p.Index >= stream.IndexOfFirstChar && p.Index <= stream.IndexOfLastCharPlus1) + + let isCombiningChar (s: string) = + match System.Globalization.CharUnicodeInfo.GetUnicodeCategory(s, 0) with + | System.Globalization.UnicodeCategory.NonSpacingMark + | System.Globalization.UnicodeCategory.SpacingCombiningMark + | System.Globalization.UnicodeCategory.EnclosingMark + | System.Globalization.UnicodeCategory.Surrogate + -> true + | _ -> false + + let isUnicodeNewlineOrEos c = + match c with + | '\n' | '\r'| '\u0085'| '\u2028'| '\u2029' + | '\uffff' -> true + | _ -> false + + // we restrict the maximum column count, so that we don't accidentally + // completely reread a multi-gigabyte file when it has no newlines + let maxColForColCount = 1000 + let maxExtraChars = 32 + let colTooLarge = p.Column > int64 maxColForColCount + + let oldState = stream.State + + let mutable index = p.Index + stream.Seek(index) // throws if index is too small + if index <> stream.Index then + raise (System.ArgumentException("The error position lies beyond the end of the stream.")) + let isBetweenCRAndLF = stream.Peek() = '\n' && stream.Peek(-1) = '\r' + if isBetweenCRAndLF then + stream.Skip(-1) + index <- index - 1L + else + let mutable c = stream.Peek() + let mutable n = 2*space + maxExtraChars + // skip to end of line, but not over more than n chars + while not (isUnicodeNewlineOrEos c) && n <> 0 do + c <- stream.SkipAndPeek() + n <- n - 1 + if not (isUnicodeNewlineOrEos c) then + n <- maxExtraChars + while isCombiningChar (stream.PeekString(2)) && n <> 0 do + stream.Skip() |> ignore + n <- n - 1 + let endIndexToken = stream.IndexToken + + stream.Seek(index) + let lineBegin = index - p.Column + 1L + // use SkipAndPeek instead of Skip, so that we can't move past the beginning of the stream + stream.SkipAndPeek(if not colTooLarge then -(int32 p.Column - 1) else -(maxColForColCount - 1)) |> ignore + if colTooLarge then + let mutable n = if p.Column > int64 System.Int32.MaxValue then maxExtraChars + else min maxExtraChars (int32 p.Column - maxColForColCount) + while isCombiningChar (stream.PeekString(2)) && n <> 0 do + stream.SkipAndPeek(-1) |> ignore + n <- n - 1 + let mutable beginIndex = stream.Index + let mutable columnOffset = beginIndex - lineBegin + let mutable idx = int (index - beginIndex) + + let beginIndexToken = stream.IndexToken + stream.Seek(endIndexToken) + let mutable str = stream.ReadFrom(beginIndexToken) + + // we're done with the stream now + stream.BacktrackTo(oldState) + + let mutable lastLineBeginIdx = 0 + let mutable unaccountedNLs = 0 + let mutable mayContainMultiCharGraphemes = false + let mutable nTabs = 0 + + for i = 0 to str.Length - 1 do + let c = str.[i] + if c >= ' ' then + if c >= '\u0300' then + mayContainMultiCharGraphemes <- true + elif c = '\t' then + nTabs <- nTabs + 1 + elif c = '\n' || (c = '\r' && (i + 1 >= str.Length || str.[i + 1] <> '\n')) then + // there can be no newline after idx + lastLineBeginIdx <- i + 1 + unaccountedNLs <- unaccountedNLs + 1 + mayContainMultiCharGraphemes <- false + nTabs <- 0 + + if unaccountedNLs <> 0 then + str <- str.Substring(lastLineBeginIdx) + idx <- idx - lastLineBeginIdx + columnOffset <- 0L + + let utf16Column = columnOffset + int64 (idx + 1) + let mutable lineContainsTabsBeforeIndex = false + if nTabs > 0 then // replace tabs with spaces + let mutable off = if columnOffset = 0L then 0 + else int32 (columnOffset%(int64 tabSize)) + let sb = new System.Text.StringBuilder(str.Length + nTabs*tabSize) + let mutable i0 = 0 + let mutable idxIncr = 0 + for i = 0 to str.Length - 1 do + if str.[i] = '\t' then + if i > i0 then sb.Append(str, i0, i - i0) |> ignore + let n = tabSize - (off + i)%tabSize + sb.Append(' ', n) |> ignore + off <- off + (n - 1) + if i < idx then + lineContainsTabsBeforeIndex <- true + idxIncr <- idxIncr + (n - 1) + i0 <- i + 1 + if i0 < str.Length then sb.Append(str, i0, str.Length - i0) |> ignore + str <- sb.ToString() + idx <- idx + idxIncr + + let clip nBefore nAfter = + let mutable nBefore, nAfter = nBefore, nAfter + let mutable diff = nBefore + nAfter + 1 - space + if diff > 0 then + let d = nBefore - nAfter + if d > 0 then + let dd = min diff d + nBefore <- nBefore - dd + diff <- diff - dd + elif d < 0 then + let dd = min diff -d + nAfter <- nAfter - dd + diff <- diff - dd + if diff <> 0 then + if diff%2 = 0 then + nBefore <- nBefore - diff/2 + nAfter <- nAfter - diff/2 + else + nBefore <- nBefore - diff/2 + nAfter <- nAfter - diff/2 - 1 + nBefore, nAfter + + if not mayContainMultiCharGraphemes then + let nBefore, nAfter = clip idx (if idx < str.Length then str.Length - idx - 1 else 0) + {String = str.Substring(idx - nBefore, nBefore + nAfter + (if idx < str.Length then 1 else 0)) + Index = nBefore + TextElementIndex = nBefore + IndexOfTextElement = nBefore + LengthOfTextElement = 1 + UnaccountedNewlines = unaccountedNLs + Column = columnOffset + int64 (idx + 1) + Utf16Column = utf16Column + LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex + IsBetweenCRAndLF = isBetweenCRAndLF} + else + let indices = System.Globalization.StringInfo.ParseCombiningCharacters(str) + let mutable idxIdx = 0 // the indices index of the text element containing the str char at idx + while idxIdx < indices.Length && indices.[idxIdx] < idx do idxIdx <- idxIdx + 1 + if (if idxIdx < indices.Length then indices.[idxIdx] > idx else idx < str.Length) then idxIdx <- idxIdx - 1 + let col = columnOffset + int64 (idxIdx + 1) + let teIdx = if idxIdx < indices.Length then indices.[idxIdx] else str.Length + let teLength = (if idxIdx + 1 < indices.Length then indices.[idxIdx + 1] else str.Length) - teIdx + let mutable nBefore, nAfter = clip idxIdx (if idxIdx = indices.Length then 0 else indices.Length - idxIdx - 1) + let mutable strBegin = let ii = idxIdx - nBefore in if ii < indices.Length then indices.[ii] else str.Length + let mutable strEnd = let ii = idxIdx + nAfter + 1 in if ii < indices.Length then indices.[ii] else str.Length + if not multiCharGraphemeSafe then + while strEnd - strBegin > space && (nBefore > 0 || nAfter > 0) do + if nBefore > nAfter then + nBefore <- nBefore - 1 + strBegin <- indices.[idxIdx - nBefore] + else + nAfter <- nAfter - 1 + strEnd <- indices.[idxIdx + nAfter + 1] + {String = str.Substring(strBegin, strEnd - strBegin) + Index = idx - strBegin + TextElementIndex = nBefore + IndexOfTextElement = teIdx - strBegin + LengthOfTextElement = teLength + UnaccountedNewlines = unaccountedNLs + Column = col + Utf16Column = utf16Column + LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex + IsBetweenCRAndLF = isBetweenCRAndLF} + + diff --git a/src/FParsec/Primitives.fs b/src/FParsec/Primitives.fs new file mode 100644 index 0000000..257de76 --- /dev/null +++ b/src/FParsec/Primitives.fs @@ -0,0 +1,947 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.Primitives + +open FParsec.Internals +open FParsec.Error + +[] +let Ok = ReplyStatus.Ok +[] +let Error = ReplyStatus.Error +[] +let FatalError = ReplyStatus.FatalError + +type Parser<'a, 'u> = CharStream<'u> -> Reply<'a> + +// The `PrimitiveTests.Reference` module contains simple (but inefficient) +// reference implementations of most of the functions below. + +// ================================= +// Parser primitives and combinators +// ================================= + +let preturn x : Parser<_,_> = fun stream -> Reply(x) +let pzero : Parser<_,_> = fun stream -> Reply() + +// --------------------------- +// Chaining and piping parsers +// --------------------------- + +let (>>=) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) = + match box f with + // optimization for uncurried functions + | :? OptimizedClosures.FSharpFunc<'a, CharStream<'u>, Reply<'b>> as optF -> + fun stream -> + let reply1 = p stream + if reply1.Status = Ok then + if isNull reply1.Error then + // in separate branch because the JIT can produce better code for a tail call + optF.Invoke(reply1.Result, stream) + else + let stateTag1 = stream.StateTag + let mutable reply2 = optF.Invoke(reply1.Result, stream) + if stateTag1 = stream.StateTag then + reply2.Error <- mergeErrors reply2.Error reply1.Error + reply2 + else + Reply(reply1.Status, reply1.Error) + | _ -> + fun stream -> + let reply1 = p stream + if reply1.Status = Ok then + let p2 = f reply1.Result + if isNull reply1.Error then + // in separate branch because the JIT can produce better code for a tail call + p2 stream + else + let stateTag1 = stream.StateTag + let mutable reply2 = p2 stream + if stateTag1 = stream.StateTag then + reply2.Error <- mergeErrors reply2.Error reply1.Error + reply2 + else + Reply(reply1.Status, reply1.Error) + +let (>>%) (p: Parser<'a,'u>) x = + fun stream -> + let reply = p stream + Reply(reply.Status, x, reply.Error) + +let (>>.) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = + fun stream -> + let mutable reply1 = p stream + if reply1.Status = Ok then + if isNull reply1.Error then + // in separate branch because the JIT can produce better code for a tail call + q stream + else + let stateTag1 = stream.StateTag + let mutable reply2 = q stream + if stateTag1 = stream.StateTag then + reply2.Error <- mergeErrors reply2.Error reply1.Error + reply2 + else + Reply(reply1.Status, reply1.Error) + +let (.>>) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = + fun stream -> + let mutable reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = q stream + let error = if isNull reply1.Error then reply2.Error + elif stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors reply2.Error reply1.Error + reply1.Error <- error + reply1.Status <- reply2.Status + reply1 + + +let (.>>.) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = + fun stream -> + let reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = q stream + let error = if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors reply1.Error reply2.Error + let result = if reply2.Status = Ok then (reply1.Result, reply2.Result) + else Unchecked.defaultof<_> + Reply(reply2.Status, result, error) + else + Reply(reply1.Status, reply1.Error) + +let between (popen: Parser<_,'u>) (pclose: Parser<_,'u>) (p: Parser<_,'u>) = + fun stream -> + let reply1 = popen stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let mutable reply2 = p stream + if reply2.Status = Ok then + let stateTag2 = stream.StateTag + let reply3 = pclose stream + let error = if stateTag2 <> stream.StateTag then reply3.Error + else + let error2 = mergeErrors reply2.Error reply3.Error + if stateTag1 <> stateTag2 then error2 + else mergeErrors reply1.Error error2 + reply2.Error <- error + reply2.Status <- reply3.Status + reply2 + else + let error = if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors reply1.Error reply2.Error + reply2.Error <- error + reply2 + else + Reply(reply1.Status, reply1.Error) + +let (|>>) (p: Parser<'a,'u>) f = + fun stream -> + let reply = p stream + Reply(reply.Status, + (if reply.Status = Ok then f reply.Result else Unchecked.defaultof<_>), + reply.Error) + +let pipe2 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) f = + let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) + fun stream -> + let mutable reply = Reply() + let reply1 = p1 stream + let mutable error = reply1.Error + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = p2 stream + error <- if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors error reply2.Error + if reply2.Status = Ok then + reply.Result <- optF.Invoke(reply1.Result, reply2.Result) + reply.Status <- Ok + else reply.Status <- reply2.Status + else reply.Status <- reply1.Status + reply.Error <- error + reply + +let pipe3 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) f = + let optF = OptimizedClosures.FSharpFunc<_,_,_,_>.Adapt(f) + fun stream -> + let mutable reply = Reply() + let reply1 = p1 stream + let mutable error = reply1.Error + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = p2 stream + error <- if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors error reply2.Error + if reply2.Status = Ok then + let stateTag2 = stream.StateTag + let reply3 = p3 stream + error <- if stateTag2 <> stream.StateTag then reply3.Error + else mergeErrors error reply3.Error + if reply3.Status = Ok then + reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result) + reply.Status <- Ok + else reply.Status <- reply3.Status + else reply.Status <- reply2.Status + else reply.Status <- reply1.Status + reply.Error <- error + reply + +let pipe4 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) (p4: Parser<'d,'u>) f = + let optF = OptimizedClosures.FSharpFunc<_,_,_,_,_>.Adapt(f) + fun stream -> + let mutable reply = Reply() + let reply1 = p1 stream + let mutable error = reply1.Error + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = p2 stream + error <- if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors error reply2.Error + if reply2.Status = Ok then + let stateTag2 = stream.StateTag + let reply3 = p3 stream + error <- if stateTag2 <> stream.StateTag then reply3.Error + else mergeErrors error reply3.Error + if reply3.Status = Ok then + let stateTag3 = stream.StateTag + let reply4 = p4 stream + error <- if stateTag3 <> stream.StateTag then reply4.Error + else mergeErrors error reply4.Error + if reply4.Status = Ok then + reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result, reply4.Result) + reply.Status <- Ok + else reply.Status <- reply4.Status + else reply.Status <- reply3.Status + else reply.Status <- reply2.Status + else reply.Status <- reply1.Status + reply.Error <- error + reply + +let pipe5 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) (p4: Parser<'d,'u>) (p5: Parser<'e,'u>) f = + let optF = OptimizedClosures.FSharpFunc<_,_,_,_,_,_>.Adapt(f) + fun stream -> + let mutable reply = Reply() + let reply1 = p1 stream + let mutable error = reply1.Error + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = p2 stream + error <- if stateTag1 <> stream.StateTag then reply2.Error + else mergeErrors error reply2.Error + if reply2.Status = Ok then + let stateTag2 = stream.StateTag + let reply3 = p3 stream + error <- if stateTag2 <> stream.StateTag then reply3.Error + else mergeErrors error reply3.Error + if reply3.Status = Ok then + let stateTag3 = stream.StateTag + let reply4 = p4 stream + error <- if stateTag3 <> stream.StateTag then reply4.Error + else mergeErrors error reply4.Error + if reply4.Status = Ok then + let stateTag4 = stream.StateTag + let reply5 = p5 stream + error <- if stateTag4 <> stream.StateTag then reply5.Error + else mergeErrors error reply5.Error + if reply5.Status = Ok then + reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result, reply4.Result, reply5.Result) + reply.Status <- Ok + else reply.Status <- reply5.Status + else reply.Status <- reply4.Status + else reply.Status <- reply3.Status + else reply.Status <- reply2.Status + else reply.Status <- reply1.Status + reply.Error <- error + reply + + +// ----------------------------------------------- +// Parsing alternatives and recovering from errors +// ----------------------------------------------- + +let (<|>) (p1: Parser<'a,'u>) (p2: Parser<'a,'u>) : Parser<'a,'u> = + fun stream -> + let mutable stateTag = stream.StateTag + let mutable reply = p1 stream + if reply.Status = Error && stateTag = stream.StateTag then + let error = reply.Error + reply <- p2 stream + if stateTag = stream.StateTag then + reply.Error <- mergeErrors reply.Error error + reply + +let choice (ps: seq>) = + match ps with + | :? (Parser<'a,'u>[]) as ps -> + if ps.Length = 0 then pzero + else + fun stream -> + let stateTag = stream.StateTag + let mutable error = NoErrorMessages + let mutable reply = ps.[0] stream + let mutable i = 1 + while reply.Status = Error && stateTag = stream.StateTag && i < ps.Length do + error <- mergeErrors error reply.Error + reply <- ps.[i] stream + i <- i + 1 + if stateTag = stream.StateTag then + error <- mergeErrors error reply.Error + reply.Error <- error + reply + | :? (Parser<'a,'u> list) as ps -> + match ps with + | [] -> pzero + | hd::tl -> + fun stream -> + let stateTag = stream.StateTag + let mutable error = NoErrorMessages + let mutable hd, tl = hd, tl + let mutable reply = hd stream + while reply.Status = Error && stateTag = stream.StateTag + && (match tl with + | h::t -> hd <- h; tl <- t; true + | _ -> false) + do + error <- mergeErrors error reply.Error + reply <- hd stream + if stateTag = stream.StateTag then + error <- mergeErrors error reply.Error + reply.Error <- error + reply + | _ -> fun stream -> + use iter = ps.GetEnumerator() + if iter.MoveNext() then + let stateTag = stream.StateTag + let mutable error = NoErrorMessages + let mutable reply = iter.Current stream + while reply.Status = Error && stateTag = stream.StateTag && iter.MoveNext() do + error <- mergeErrors error reply.Error + reply <- iter.Current stream + if stateTag = stream.StateTag then + error <- mergeErrors error reply.Error + reply.Error <- error + reply + else + Reply() + + +let choiceL (ps: seq>) label : Parser<_,_> = + let error = expected label + match ps with + | :? (Parser<'a,'u>[]) as ps -> + if ps.Length = 0 then + fun stream -> Reply(Error, error) + else + fun stream -> + let stateTag = stream.StateTag + let mutable reply = ps.[0] stream + let mutable i = 1 + while reply.Status = Error && stateTag = stream.StateTag && i < ps.Length do + reply <- ps.[i] stream + i <- i + 1 + if stateTag = stream.StateTag then + reply.Error <- error + reply + | :? (Parser<'a,'u> list) as ps -> + match ps with + | [] -> fun stream -> Reply(Error, error) + | hd::tl -> + fun stream -> + let stateTag = stream.StateTag + let mutable hd, tl = hd, tl + let mutable reply = hd stream + while reply.Status = Error && stateTag = stream.StateTag + && (match tl with + | h::t -> hd <- h; tl <- t; true + | _ -> false) + do + reply <- hd stream + if stateTag = stream.StateTag then + reply.Error <- error + reply + | _ -> fun stream -> + use iter = ps.GetEnumerator() + if iter.MoveNext() then + let stateTag = stream.StateTag + let mutable reply = iter.Current stream + while reply.Status = Error && stateTag = stream.StateTag && iter.MoveNext() do + reply <- iter.Current stream + if stateTag = stream.StateTag then + reply.Error <- error + reply + else + Reply(Error, error) + +let (<|>%) (p: Parser<'a,'u>) x : Parser<'a,'u> = + fun stream -> + let stateTag = stream.StateTag + let mutable reply = p stream + if reply.Status = Error && stateTag = stream.StateTag then + reply.Result <- x + reply.Status <- Ok + reply + +let opt (p: Parser<'a,'u>) : Parser<'a option,'u> = + fun stream -> + let stateTag = stream.StateTag + let reply = p stream + if reply.Status = Ok then + Reply(Ok, Some reply.Result, reply.Error) + else + // None is represented as null + let status = if reply.Status = Error && stateTag = stream.StateTag then Ok else reply.Status + Reply(status, reply.Error) + +let optional (p: Parser<'a,'u>) : Parser = + fun stream -> + let stateTag = stream.StateTag + let reply = p stream + let status = if reply.Status = Error && stateTag = stream.StateTag then Ok else reply.Status + Reply(status, (), reply.Error) + +let attempt (p: Parser<'a,'u>) : Parser<'a,'u> = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let mutable reply = p stream + if reply.Status <> Ok then + if state.Tag <> stream.StateTag then + reply.Error <- nestedError stream reply.Error + reply.Status <- Error // turns FatalErrors into Errors + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + elif reply.Status = FatalError then + reply.Status <- Error + reply + +let (>>=?) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) : Parser<'b,'u> = + let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let mutable reply2 = optF.Invoke(reply1.Result, stream) + if stateTag1 = stream.StateTag then + let error = mergeErrors reply1.Error reply2.Error + if reply2.Status <> Error || stateTag1 = state.Tag then + reply2.Error <- error + else + reply2.Error <- nestedError stream error + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + reply2 + else + Reply(reply1.Status, reply1.Error) + +let (>>?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'b,'u> = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let mutable reply2 = q stream + if stateTag1 = stream.StateTag then + let error = mergeErrors reply1.Error reply2.Error + if reply2.Status <> Error || stateTag1 = state.Tag then + reply2.Error <- error + else + reply2.Error <- nestedError stream error + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + reply2 + else + Reply(reply1.Status, reply1.Error) + +let (.>>.?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'a*'b,'u> = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let mutable reply2 = q stream + if stateTag1 = stream.StateTag then + let error = mergeErrors reply1.Error reply2.Error + if reply2.Status <> Error || stateTag1 = state.Tag then + reply2.Error <- error + else + reply2.Error <- nestedError stream error + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + let result = if reply2.Status = Ok then (reply1.Result, reply2.Result) + else Unchecked.defaultof<_> + Reply(reply2.Status, result, reply2.Error) + else + Reply(reply1.Status, reply1.Error) + +let (.>>?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'a,'u> = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let mutable reply1 = p stream + if reply1.Status = Ok then + let stateTag1 = stream.StateTag + let reply2 = q stream + if stateTag1 = stream.StateTag then + let error = mergeErrors reply1.Error reply2.Error + if reply2.Status <> Error || stateTag1 = state.Tag then + reply1.Error <- error + reply1.Status <- reply2.Status + else + reply1.Error <- nestedError stream error + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + reply1.Status <- Error + else + reply1.Error <- reply2.Error + reply1.Status <- reply2.Status + reply1 + + +// ------------------------------------- +// Conditional parsing and looking ahead +// ------------------------------------- + +let notEmpty (p: Parser<'a,'u>) : Parser<'a,'u> = + fun stream -> + let stateTag = stream.StateTag + let mutable reply = p stream + if stateTag = stream.StateTag && reply.Status = Ok then + reply.Status <- Error + reply + +// REVIEW: should `followedBy` use the error messages generated by `p`? + +let internal followedByE (p: Parser<'a,'u>) error : Parser = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let reply = p stream + if state.Tag <> stream.StateTag then + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + if reply.Status = Ok then Reply(()) + else Reply(Error, error) + +let followedBy p = followedByE p NoErrorMessages +let followedByL p label = followedByE p (expected label) + +let internal notFollowedByE (p: Parser<'a,'u>) error : Parser = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let reply = p stream + if state.Tag <> stream.StateTag then + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + if reply.Status <> Ok then Reply(()) + else Reply(Error, error) + +let notFollowedBy p = notFollowedByE p NoErrorMessages +let notFollowedByL p label = notFollowedByE p (unexpected label) + +let lookAhead (p: Parser<'a,'u>) : Parser<'a,'u> = + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let mutable reply = p stream + if reply.Status = Ok then + reply.Error <- NoErrorMessages + if state.Tag <> stream.StateTag then + stream.BacktrackTo(&state) // passed by ref as a (slight) optimization + else + if state.Tag <> stream.StateTag then + reply.Error <- nestedError stream reply.Error + stream.BacktrackTo(&state) + reply.Status <- Error // turn FatalErrors into normal Errors + reply + + +// -------------------------- +// Customizing error messages +// -------------------------- + +let () (p: Parser<'a,'u>) label : Parser<'a,'u> = + let error = expected label + fun stream -> + let stateTag = stream.StateTag + let mutable reply = p stream + if stateTag = stream.StateTag then + reply.Error <- error + reply + +let () (p: Parser<'a,'u>) label : Parser<'a,'u> = + let expErr = expected label + fun stream -> + // state is only declared mutable so it can be passed by ref, it won't be mutated + let mutable state = CharStreamState(stream) // = stream.State (manually inlined) + let mutable reply = p stream + if reply.Status = Ok then + if state.Tag = stream.StateTag then + reply.Error <- expErr + else + if state.Tag = stream.StateTag then + (* + // manually inlined: + let error = match reply.Error with + | ErrorMessageList(NestedError(pos, userState, msgs), NoErrorMessages) + -> ErrorMessageList(CompoundError(label, pos, userState, msgs), NoErrorMessages) + | _ -> expErr + *) + let error = if reply.Error |> isSingleErrorMessageOfType ErrorMessageType.NestedError then + let ne = reply.Error.Head :?> NestedError + ErrorMessageList(CompoundError(label, ne.Position, ne.UserState, ne.Messages)) + else expErr + reply.Error <- error + else + reply.Error <- compoundError label stream reply.Error + stream.BacktrackTo(&state) // we backtrack ... + reply.Status <- FatalError // ... so we need to make sure normal parsing doesn't continue + reply + +let fail msg : Parser<'a,'u> = + let error = messageError msg + fun stream -> Reply(Error, error) + +let failFatally msg : Parser<'a,'u> = + let error = messageError msg + fun stream -> Reply(FatalError, error) + +// ----------------- +// Parsing sequences +// ----------------- + +let tuple2 p1 p2 = p1 .>>. p2 +let tuple3 p1 p2 p3 = pipe3 p1 p2 p3 (fun a b c -> (a, b, c)) +let tuple4 p1 p2 p3 p4 = pipe4 p1 p2 p3 p4 (fun a b c d -> (a, b, c, d)) +let tuple5 p1 p2 p3 p4 p5 = pipe5 p1 p2 p3 p4 p5 (fun a b c d e -> (a, b, c, d, e)) + +let parray n (p: Parser<'a,'u>) = + if n = 0 then preturn [||] + else + fun stream -> + let mutable reply = p stream + let mutable error = reply.Error + let mutable newReply = Reply() + if reply.Status = Ok then + let mutable xs = Array.zeroCreate n + xs.[0] <- reply.Result + let mutable i = 1 + while i < n do + let mutable stateTag = stream.StateTag + reply <- p stream + error <- if stateTag <> stream.StateTag then reply.Error + else mergeErrors error reply.Error + if reply.Status = Ok then + xs.[i] <- reply.Result + i <- i + 1 + else + i <- n // break + newReply.Result <- xs // we set the result even if there was an error + newReply.Error <- error + newReply.Status <- reply.Status + newReply + +let skipArray n (p: Parser<'a,'u>) = + if n = 0 then preturn () + else + fun stream -> + let mutable reply = p stream + let mutable error = reply.Error + let mutable newReply = Reply() + if reply.Status = Ok then + let mutable i = 1 + while i < n do + let mutable stateTag = stream.StateTag + reply <- p stream + error <- if stateTag <> stream.StateTag then reply.Error + else mergeErrors error reply.Error + if reply.Status = Ok then + i <- i + 1 + else + i <- n // break + // () is represented as null + newReply.Error <- error + newReply.Status <- reply.Status + newReply + +[] +type Inline = + +#if NOINLINE + static member +#else + [] + static member inline +#endif + Many(stateFromFirstElement, + foldState, + resultFromState, + elementParser: Parser<_,_>, + ?firstElementParser: Parser<_,_>, + ?resultForEmptySequence) : Parser<_,_> = + fun stream -> + let mutable stateTag = stream.StateTag + let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser + let mutable reply = firstElementParser stream + if reply.Status = Ok then + let mutable xs = stateFromFirstElement reply.Result + let mutable error = reply.Error + stateTag <- stream.StateTag + reply <- elementParser stream + while reply.Status = Ok do + if stateTag = stream.StateTag then + raiseInfiniteLoopException "many" stream + xs <- foldState xs reply.Result + error <- reply.Error + stateTag <- stream.StateTag + reply <- elementParser stream + if reply.Status = Error && stateTag = stream.StateTag then + error <- mergeErrors error reply.Error + Reply(Ok, resultFromState xs, error) + else + error <- if stateTag <> stream.StateTag then reply.Error + else mergeErrors error reply.Error + Reply(reply.Status, error) + else + match resultForEmptySequence with + | Some _ (* if we bind f here, fsc won't be able to inline it *) + when reply.Status = Error && stateTag = stream.StateTag -> + Reply(Ok, (match resultForEmptySequence with Some f -> f() | _ -> Unchecked.defaultof<_>), reply.Error) + | _ -> + Reply(reply.Status, reply.Error) + +#if NOINLINE + static member +#else + [] + static member inline +#endif + SepBy(stateFromFirstElement, + foldState, + resultFromState, + elementParser: Parser<_,_>, + separatorParser: Parser<_,_>, + ?firstElementParser: Parser<_,'u>, + ?resultForEmptySequence, + ?separatorMayEndSequence) : Parser<_,'u> = + fun stream -> + let mutable stateTag = stream.StateTag + let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser + let mutable reply = firstElementParser stream + if reply.Status = Ok then + let mutable xs = stateFromFirstElement reply.Result + let mutable error = reply.Error + stateTag <- stream.StateTag + let mutable sepReply = separatorParser stream + let mutable sepStateTag = stream.StateTag + while sepReply.Status = Ok && (reply <- elementParser stream; reply.Status = Ok) do + xs <- foldState xs sepReply.Result reply.Result + if sepStateTag <> stream.StateTag then + error <- reply.Error + elif stateTag <> sepStateTag then + error <- mergeErrors sepReply.Error reply.Error + else + raiseInfiniteLoopException "sep(End)By" stream + stateTag <- stream.StateTag + sepReply <- separatorParser stream + sepStateTag <- stream.StateTag + if sepReply.Status = Error && stateTag = sepStateTag then + Reply(Ok, resultFromState xs, mergeErrors error sepReply.Error) + else + match separatorMayEndSequence with + | Some true when reply.Status = Error && sepStateTag = stream.StateTag -> + error <- mergeErrors (if stateTag <> sepStateTag then sepReply.Error + else mergeErrors error sepReply.Error) reply.Error + Reply(Ok, resultFromState xs, error) + | _ when reply.Status <> Ok -> + error <- if sepStateTag <> stream.StateTag then reply.Error + else + let error2 = mergeErrors sepReply.Error reply.Error + if stateTag <> sepStateTag then error2 + else mergeErrors error error2 + Reply(reply.Status, error) + | _ -> + let error = if stateTag <> sepStateTag then sepReply.Error + else mergeErrors error sepReply.Error + Reply(sepReply.Status, error) + else + match resultForEmptySequence with + | Some _ (* if we bind f here, fsc won't be able to inline it *) + when reply.Status = Error && stateTag = stream.StateTag -> + Reply(Ok, (match resultForEmptySequence with Some f -> f() | _ -> Unchecked.defaultof<_>), reply.Error) + | _ -> + Reply(reply.Status, reply.Error) + +#if NOINLINE + static member +#else + [] + static member inline +#endif + ManyTill(stateFromFirstElement, + foldState, + resultFromStateAndEndParserResult, + elementParser: Parser<_,_>, + endParser: Parser<_,_>, + ?firstElementParser: Parser<_,_>, + ?resultForEmptySequence) : Parser<_,_> = + fun stream -> + // This is really, really ugly, but it does the job, + // and it does it about as efficient as it can be done here. + let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser + match resultForEmptySequence with + | None -> // require at least one element + let mutable reply = firstElementParser stream + if reply.Status = Ok then + // ------------------------------------------------------------------ + // the following code is duplicated in the match branch below + let mutable xs = stateFromFirstElement reply.Result + let mutable error = reply.Error + let mutable stateTag = stream.StateTag + let mutable endReply = endParser stream + while endReply.Status = Error && stateTag = stream.StateTag do + endReply.Status <- enum System.Int32.MinValue + reply <- elementParser stream + if reply.Status = Ok then + if stateTag = stream.StateTag then + raiseInfiniteLoopException "manyTill" stream + xs <- foldState xs reply.Result + error <- reply.Error + stateTag <- stream.StateTag + endReply <- endParser stream + if endReply.Status = Ok then + error <- if stateTag <> stream.StateTag then endReply.Error + else mergeErrors error endReply.Error + Reply(Ok, resultFromStateAndEndParserResult xs endReply.Result, error) + elif endReply.Status = enum System.Int32.MinValue then + error <- if stateTag <> stream.StateTag then reply.Error + else mergeErrors (mergeErrors error endReply.Error) reply.Error + Reply(reply.Status, error) + else + error <- if stateTag <> stream.StateTag then endReply.Error + else mergeErrors error endReply.Error + Reply(endReply.Status, error) + // ------------------------------------------------------------------ + else + Reply(reply.Status, reply.Error) + | Some _ -> + let mutable stateTag = stream.StateTag + let mutable endReply = endParser stream + if endReply.Status = Error && stateTag = stream.StateTag then + let mutable reply = firstElementParser stream + if reply.Status = Ok then + // ------------------------------------------------------------------ + // the following code is duplicated in the match branch above + let mutable xs = stateFromFirstElement reply.Result + let mutable error = reply.Error + stateTag <- stream.StateTag + endReply <- endParser stream + while endReply.Status = Error && stateTag = stream.StateTag do + endReply.Status <- enum System.Int32.MinValue + reply <- elementParser stream + if reply.Status = Ok then + if stateTag = stream.StateTag then + raiseInfiniteLoopException "manyTill" stream + xs <- foldState xs reply.Result + error <- reply.Error + stateTag <- stream.StateTag + endReply <- endParser stream + if endReply.Status = Ok then + error <- if stateTag <> stream.StateTag then endReply.Error + else mergeErrors error endReply.Error + Reply(Ok, resultFromStateAndEndParserResult xs endReply.Result, error) + elif endReply.Status = enum System.Int32.MinValue then + error <- if stateTag <> stream.StateTag then reply.Error + else mergeErrors (mergeErrors error endReply.Error) reply.Error + Reply(reply.Status, error) + else + error <- if stateTag <> stream.StateTag then endReply.Error + else mergeErrors error endReply.Error + Reply(endReply.Status, error) + // ------------------------------------------------------------------ + else + let error = if stateTag <> stream.StateTag then reply.Error + else mergeErrors endReply.Error reply.Error + Reply(reply.Status, error) + elif endReply.Status = Ok then + Reply(Ok, (match resultForEmptySequence with Some f -> f endReply.Result | _ -> Unchecked.defaultof<_>), endReply.Error) + else + Reply(endReply.Status, endReply.Error) + +let many p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), List.rev, p, resultForEmptySequence = fun () -> []) +let many1 p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), List.rev, p) + +let skipMany p = Inline.Many((fun _ -> ()), (fun _ _ -> ()), (fun xs -> xs), p, resultForEmptySequence = fun () -> ()) +let skipMany1 p = Inline.Many((fun _ -> ()), (fun _ _ -> ()), (fun xs -> xs), p) + +let sepBy p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, resultForEmptySequence = fun () -> []) +let sepBy1 p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep) + +let skipSepBy p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, resultForEmptySequence = fun () -> ()) +let skipSepBy1 p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep) + +let sepEndBy p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> []) +let sepEndBy1 p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, separatorMayEndSequence = true) + +let skipSepEndBy p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> ()) +let skipSepEndBy1 p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, separatorMayEndSequence = true) + +let manyTill p endp = Inline.ManyTill((fun x -> [x]), (fun xs x -> x::xs), (fun xs _ -> List.rev xs), p, endp, resultForEmptySequence = fun _ -> []) +let many1Till p endp = Inline.ManyTill((fun x -> [x]), (fun xs x -> x::xs), (fun xs _ -> List.rev xs), p, endp) + +let skipManyTill p endp = Inline.ManyTill((fun _ -> ()), (fun _ _ -> ()), (fun _ _ -> ()), p, endp, resultForEmptySequence = fun _ -> ()) +let skipMany1Till p endp = Inline.ManyTill((fun _ -> ()), (fun _ _ -> ()), (fun _ _ -> ()), p, endp) + +let chainl1 p op = + Inline.SepBy((fun x0 -> x0), (fun x f y -> f x y), (fun x -> x), p, op) + +let chainl p op x = chainl1 p op <|>% x + +let chainr1 p op = + Inline.SepBy(elementParser = p, separatorParser = op, + stateFromFirstElement = (fun x0 -> [(Unchecked.defaultof<_>, x0)]), + foldState = (fun acc op x -> (op, x)::acc), + resultFromState = function // is called with (op, y) list in reverse order + | ((op, y)::tl) -> + let rec calc op y lst = + match lst with + | (op2, x)::tl -> calc op2 (op x y) tl + | [] -> y // op is null + calc op y tl + | [] -> // shouldn't happen + failwith "chainr1") + + +let chainr p op x = chainr1 p op <|>% x + + +// ------------------------------ +// Computation expression syntax +// ------------------------------ +[] +type ParserCombinator() = + member t.Delay(f:(unit -> Parser<'a,'u>)) = fun stream -> (f()) stream + member t.Return(x) = preturn x + member t.Bind(p, f) = p >>= f + member t.Zero() : Parser<'a,'u> = pzero + member t.ReturnFrom(p: Parser<'a,'u>) = p + // no Combine member by purpose + member t.TryWith(p:Parser<'a,'u>, cf:(exn -> Parser<'a,'u>)) = + fun stream -> + (try p stream with e -> (cf e) stream) + member t.TryFinally(p:Parser<'a,'u>, ff:(unit -> unit)) = + fun stream -> + try p stream finally ff () + +let parse = ParserCombinator() + + +// ---------------------- +// Other helper functions +// ---------------------- + +let createParserForwardedToRef() = + let dummyParser = fun stream -> failwith "a parser created with createParserForwardedToRef was not initialized" + let r = ref dummyParser + (fun stream -> !r stream), r : Parser<_,'u> * Parser<_,'u> ref diff --git a/src/FParsec/Primitives.fsi b/src/FParsec/Primitives.fsi new file mode 100644 index 0000000..4da38ec --- /dev/null +++ b/src/FParsec/Primitives.fsi @@ -0,0 +1,422 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: Simplified BSD License. See accompanying documentation. + +[] +module FParsec.Primitives + +open FParsec +open FParsec.Error + +/// The parser succeeded. +[] val Ok: ReplyStatus = ReplyStatus.Ok;; + +/// The parser failed. +[] val Error: ReplyStatus = ReplyStatus.Error;; + +/// The parser failed and no error recovery (except after backtracking) should be tried. +[] val FatalError: ReplyStatus = ReplyStatus.FatalError;; + +/// The type of the parser functions supported by FParsec combinators. +type Parser<'Result, 'UserState> = CharStream<'UserState> -> Reply<'Result> + +// ================================= +// Parser primitives and combinators +// ================================= + +// Two basic primitives that are only seldomly directly used in user code: + +/// The parser `preturn x` always succeeds with the result `x` (without changing the parser state). +/// `preturn x` is defined as `fun stream -> Reply(x)`. +val preturn: 'a -> Parser<'a,'u> + +/// The parser `pzero` always fails with an empty error message list, i.e. an unspecified error. +/// `pzero x` is defined as `fun stream -> Reply(Error, NoErrorMessages)`. +val pzero: Parser<'a,'u> + +// --------------------------- +// Chaining and piping parsers +// --------------------------- + +/// The parser `p >>= f` first applies the parser `p` to the input, then applies the function `f` +/// to the result returned by `p` and finally applies the parser returned by `f` to the input. +val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> + +/// The parser `p >>% x` applies the parser `p` and returns the result `x`. +val (>>%): Parser<'a,'u> -> 'b -> Parser<'b,'u> + +/// The parser `p1 >>. p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p2`. +val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> + +/// The parser `p1 .>> p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p1`. +val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> + +/// The parser `p1 .>>. p2` applies the parsers `p1` and `p2` in sequence and returns the results in a tuple. +val (.>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> + +/// The parser `between popen pclose p` applies the parsers `pOpen`, `p` and `pEnd` in sequence. +/// It returns the result of `p`. +val between: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'c,'u> + +/// The parser `p |>> f` applies the parser `p` and +/// returns the result `f x`, where `x` is the result returned by `p`. +val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b,'u> + +/// The parser `pipe2 p1 p2 f` applies the parsers `p1` and `p2` in sequence. +/// It returns the result `f a b`, where `a` and `b` are the results returned by `p1` and `p2`. +val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u> + +/// The parser `pipe3 p1 p2 p3 f` applies the parsers `p1`, `p2` and `p3` in sequence. +/// It returns the result `f a b c`, where `a`, `b` and `c` are the results returned by `p1`, `p2` and `p3`. +val pipe3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> ('a -> 'b -> 'c -> 'd) -> Parser<'d,'u> + +/// The parser `pipe4 p1 p2 p3 p4 f` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence. +/// It returns the result `f a b c d`, where `a`, `b`, `c` and `d` are the results returned by `p1`, `p2`, `p3` and `p4`. +val pipe4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> ('a -> 'b -> 'c -> 'd -> 'e) -> Parser<'e,'u> + +/// The parser `pipe5 p1 p2 p3 p4 p5 f` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence. +/// It returns the result of the function application `f a b c d e`, where `a`, `b`, `c`, `d` and `e` are the results returned by `p1`, `p2`, `p3`, `p4` and `p5`. +val pipe5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> ('a -> 'b -> 'c -> 'd -> 'e -> 'f) -> Parser<'f, 'u> + + +// ----------------------------------------------- +// Parsing alternatives and recovering from errors +// ----------------------------------------------- + +/// The parser `p1 <|> p2` first applies the parser `p1`. +/// If `p1` succeeds, the result of `p1` is returned. +/// If `p1` fails with a non-fatal error and *without changing the parser state*, +/// the parser `p2` is applied. +/// Note: The stream position is part of the parser state, so if `p1` fails after consuming input, +/// `p2` will not be applied. +val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u> + +/// The parser `choice ps` is an optimized implementation of `p1 <|> p2 <|> ... <|> pn`, +/// where `p1` ... `pn` are the parsers in the sequence `ps`. +val choice: seq> -> Parser<'a,'u> + +/// The parser `choiceL ps label` is an optimized implementation of `choice ps label`. +val choiceL: seq> -> string -> Parser<'a,'u> + +/// The parser `p <|>% x` is an optimized implementation of `p <|> preturn x`. +val (<|>%): Parser<'a,'u> -> 'a -> Parser<'a,'u> + +/// The parser `opt p` parses an optional occurrence of `p` as an option value. +/// `opt p` is an optimized implementation of `(p |>> Some) <|>% None`. +val opt: Parser<'a,'u> -> Parser<'a option,'u> + +/// The parser `optional p` skips over an optional occurrence of `p`. +/// `optional p` is an optimized implementation of `(p >>% ()) <|>% ()`. +val optional: Parser<'a,'u> -> Parser + + +/// The parser `attempt p` applies the parser `p`. +/// If `p` fails after changing the parser state or with a fatal error, +/// `attempt p` will backtrack to the original parser state and report a non-fatal error. +val attempt: Parser<'a,'u> -> Parser<'a,'u> + +/// The parser `p >>=? f` behaves like `p >>= f`, except that it will backtrack to the beginning +/// if the parser returned by `f` fails with a non-fatal error and without changing the parser state, +/// even if `p` has changed the parser state. +val (>>=?): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> + +/// The parser `p1 >>? p2` behaves like `p1 >>. p2`, except that it will backtrack +/// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, +/// even if `p1` has changed the parser state. +val (>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> + +/// The parser `p1 .>>? p2` behaves like `p1 .>> p2`, except that it will backtrack +/// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, +/// even if `p1` has changed the parser state. +val (.>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> + +/// The parser `p1 .>>.? p2` behaves like `p1 .>>. p2`, except that it will backtrack +/// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, +/// even if `p1` has changed the parser state. +val (.>>.?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> + +// ------------------------------------- +// Conditional parsing and looking ahead +// ------------------------------------- + +/// The parser `notEmpty p` behaves like `p`, +/// except that it fails when `p` succeeds without consuming input +/// or changing the parser state in any other way. +val notEmpty: Parser<'a,'u> -> Parser<'a,'u> + +/// The parser `followedBy p` succeeds if the parser `p` succeeds at the current position. +/// Otherwise it fails with a non-fatal error. This parser never changes the parser state. +/// If the parser `followedBy p` fails, it returns no descriptive error message. +/// Hence it should only be used together with other parsers that take care of a potential error. +/// Alternatively, `followedByL p label` can be used to ensure a more descriptive error message. +val followedBy: Parser<'a,'u> -> Parser + +/// The parser `followedByL p` behaves like `followedBy p`, +/// except that it returns an `Expected label` error message when the parser `p` fails. +val followedByL: Parser<'a,'u> -> string -> Parser + +/// The parser `notFollowedBy p` succeeds if the parser `p` fails to parse at the current position. +/// Otherwise it fails with a non-fatal error. This parser never changes the parser state. +/// If the parser `notFollowedBy p` fails, it returns no descriptive error message. +/// Hence it should only be used together with other parsers that take care of a potential error. +/// Alternatively, `notFollowedByL p label` can be used to ensure a more descriptive error message. +val notFollowedBy: Parser<'a,'u> -> Parser + +/// The parser `notFollowedByL p` behaves like `notFollowedBy p`, +/// except that it returns an `Unexpected label` error message when the parser `p` fails. +val notFollowedByL: Parser<'a,'u> -> string -> Parser + +/// The parser `lookAhead p` parses `p` and restores the original parse state afterwards. +/// In case `p` fails after changing the parser state, the error messages are wrapped in a `NestedError`. +/// If it succeeds, any error messages are discarded. Fatal errors are turned into normal errors. +val lookAhead: Parser<'a,'u> -> Parser<'a,'u> + + +// -------------------------- +// Customizing error messages +// -------------------------- + +/// The parser `p label` applies the parser `p`. If `p` does not change the parser state +/// (usually because `p` failed), the error messages are replaced with `expected label`. +val (): Parser<'a,'u> -> string -> Parser<'a,'u> + +/// The parser `p label` behaves like `p label`, except that when `p` fails +/// after changing the parser state (for example, because `p` consumes input before it fails), +/// a `CompoundError` message is generated with both the given string `label` and the +/// error messages generated by `p`. +val (): Parser<'a,'u> -> string -> Parser<'a,'u> + +/// The parser `fail msg` always fails with a `messageError msg`. +/// The error message will be displayed together with other error messages generated for +/// the same input position. +val fail: string -> Parser<'a,'u> + +/// The parser `failFatally msg` always fails with a `messageError msg`. It signals a +/// FatalError, so that no error recovery is attempted (except via backtracking constructs). +val failFatally: string -> Parser<'a,'u> + +// ----------------- +// Parsing sequences +// ----------------- + +/// The parser `tuple2 p1 p2` applies the parsers `p1` and `p2` in sequence and +/// returns the results in a tuple. +/// `tuple2 p1 p2` is defined as `p1 .>>. p2`. +val tuple2: Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> + +/// The parser `tuple3 p1 p2 p3` applies the parsers `p1`, `p2` and `p3` in sequence and +/// returns the results in a tuple. +val tuple3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<('a * 'b * 'c),'u> + +/// The parser `tuple4 p1 p2 p3 p4` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence and +/// returns the results in a tuple. +val tuple4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<('a * 'b * 'c * 'd),'u> + +/// The parser `tuple5 p1 p2 p3 p4 p5` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence and +/// returns the results in a tuple. +val tuple5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> Parser<('a * 'b * 'c * 'd * 'e),'u> + + +// p{n} + +/// The parser `parray n p` parses `n` occurences of `p` and +/// returns the returns the results in an array. +/// For example, `parray 3 p` is equivalent to `pipe3 p p p (fun a b c -> [|a;b;c|])`. +val parray: int -> Parser<'a,'u> -> Parser<'a[],'u> + +/// The parser `skipArray n p` is an optimized implementation of `parray n p |>> ignore`. +val skipArray: int -> Parser<'a,'u> -> Parser + + +// p* + +/// The parser `many p` repeatedly applies the parser `p` until `p` fails. +/// It returns a list of the results returned by `p`. +/// At the end of the sequence `p` must fail without changing the parser state and without +/// signalling a `FatalError`, otherwise `many p` will fail with the error reported by `p`. +/// `many p` tries to guard against an infinite loop by throwing an exception +/// if `p` succeeds without changing the parser state. +val many: Parser<'a,'u> -> Parser<'a list,'u> + +/// The parser `skipMany p` is an optimized implementation of `many p |>> ignore`. +val skipMany: Parser<'a,'u> -> Parser + + +// p+ + +/// The parser `many1 p` behaves like `many p`, except that it requires `p` to succeed at least one time. +/// `many1 p` is an optimized implementation of `pipe2 p (many p) (fun hd tl -> hd::tl)`. +val many1: Parser<'a,'u> -> Parser<'a list,'u> + +/// The parser `skipMany1 p` is an optimized implementation of `many1 p |>> ignore`. +val skipMany1: Parser<'a,'u> -> Parser + + +// (p (sep p)*)? + +/// The parser `sepBy p sep` parses *zero* or more occurrences of `p` separated by `sep` +/// (in EBNF notation: `(p (sep p)*)?`). +val sepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +/// The parser `skipSepBy p sep` is an optimized implementation of `sepBy p sep |>> ignore`. +val skipSepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser + + +// p (sep p)* + +/// The parser `sepBy1 p sep` parses *one* or more occurrences of `p` separated by `sep` +/// (in EBNF notation: `p (sep p)*`). +val sepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +/// The parser `skipSepBy1 p sep` is an optimized implementation of `sepBy1 p sep |>> ignore`. +val skipSepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser + + +// (p (sep p)* sep?)? + +/// The parser `sepEndBy p sep` parses *zero* or more occurrences of `p` separated and +/// optionally ended by `sep` (in EBNF notation: `(p (sep p)* sep?)?`). +/// It returns a list of the results returned by `p`. +val sepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +/// The parser `skipSepEndBy p sep` is an optimized implementation of `sepEndBy p sep |>> ignore`. +val skipSepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser + + +// p (sep p)* sep? + +/// The parser `sepEndBy1 p sep` parses *one* or more occurrences of `p` separated and +/// optionally ended by `sep` (in EBNF notation: `p (sep p)* sep?`). +/// It returns a list of the results returned by `p`. +val sepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +/// The parser `skipSepEndBy1 p sep` is an optimized implementation of `sepEndBy1 p sep |>> ignore`. +val skipSepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser + + +/// The `parser manyTill p endp` repeatedly applies the parser `p` +/// for as long as `endp` fails (without changing the parser state). +/// It returns a list of the results returned by `p`. +val manyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +/// The parser `skipManyTill p endp` is an optimized implementation of `manyTill p endp |>> ignore`. +val skipManyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser + +/// The parser `many1Till p endp` behaves like `manyTill p endp`, except that it requires `p` to succeed at least one time. +/// `many1Till p endp` is an optimized implementation of `pipe2 p (manyTill p endp) (fun hd tl -> hd::tl)`. +val many1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> + +val skipMany1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser + + +[] +type Inline = + +#if NOINLINE + static member +#else + [] + static member inline +#endif + Many: stateFromFirstElement: ('T -> 'State) + * foldState: ('State -> 'T -> 'State) + * resultFromState: ('State -> 'Result) + * elementParser: Parser<'T,'U> + * ?firstElementParser: Parser<'T,'U> + * ?resultForEmptySequence: (unit -> 'Result) + -> Parser<'Result,'U> + +#if NOINLINE + static member +#else + [] + static member inline +#endif + SepBy: stateFromFirstElement: ('T -> 'State) + * foldState: ('State -> 'Separator -> 'T -> 'State) + * resultFromState: ('State -> 'Result) + * elementParser: Parser<'T,'U> + * separatorParser: Parser<'Separator,'U> + * ?firstElementParser: Parser<'T,'U> + * ?resultForEmptySequence: (unit -> 'Result) + * ?separatorMayEndSequence: bool + -> Parser<'Result,'U> + +#if NOINLINE + static member +#else + [] + static member inline +#endif + ManyTill: stateFromFirstElement: ('T -> 'State) + * foldState: ('State -> 'T -> 'State) + * resultFromStateAndEnd: ('State -> 'E -> 'Result) + * elementParser: Parser<'T,'U> + * endParser: Parser<'E,'U> + * ?firstElementParser: Parser<'T,'U> + * ?resultForEmptySequence: ('E -> 'Result) + -> Parser<'Result,'U> + +// (((p op p) op p) ... op p) + +/// The parser `chainl1 p op` parses one or more occurrences of `p` separated by `op` +/// (in EBNF notation: `p (op p)*`). +/// It returns the value obtained by *left* associative application of all functions +/// returned by `op` to the results returned by `p`, +/// i.e. `f_n (... (f_2 (f_1 x_1 x_2) x_3) ...) x_n+1`, +/// where `f_1` to `f_n` are the functions returned by the parser `op` and +/// `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance +/// of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. +val chainl1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> + +/// The parser `chainl p op defVal` is equivalent to `chainl1 p op <|>% defVal`. +val chainl: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> + + +// (p op ... (p op (p op p))) + +/// The parser `chainr1 p op` parses one or more occurrences of `p` separated by `op` +/// (in EBNF notation: `p (op p)*`). +/// It returns the value obtained by *right* associative application of all functions +/// returned by `op` to the results returned by `p`, +/// i.e. `f1 x_1 (f_2 x_2 (... (f_n x_n x_n+1) ...))`, +/// where `f_1` to `f_n` are the functions returned by the parser `op` and +/// `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance +/// of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. +val chainr1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> + +/// The parser `chainr p op defVal` is equivalent to `chainr1 p op <|>% defVal`. +val chainr: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> + + +// ------------------------------ +// Computation expression syntax +// ------------------------------ + +/// The type of the "builder object" that can be used to build parsers with +/// F#'s "computation expression" syntax a.k.a. "workflow" syntax. +[] +type ParserCombinator = + new : unit -> ParserCombinator + member Delay: f:(unit -> Parser<'a,'u>) -> Parser<'a,'u> + member Return: 'a -> Parser<'a,'u> + member Bind: Parser<'a,'u>*('a -> Parser<'b,'u>) -> Parser<'b,'u> + member Zero: unit -> Parser<'a,'u> + member ReturnFrom: Parser<'a,'u> -> Parser<'a,'u> + // no Combine member by purpose + member TryWith: p:Parser<'a,'u> * cf:(exn -> Parser<'a,'u>) -> Parser<'a,'u> + member TryFinally: p:Parser<'a,'u>* ff:(unit -> unit) -> Parser<'a,'u> + +/// The builder object for building parsers using F#'s computation expression syntax. +val parse : ParserCombinator + + +// ---------------------- +// Other helper functions +// ---------------------- + +// a helper function for defining mutually recursive parser values + +/// `let p, pRef = createParserForwardedToRef()` creates a parser `p` that forwards all +/// calls to the parser in the reference cell `pRef`. Initially, `pRef` holds a reference +/// to a dummy parser that raises an exception on any invocation. +val createParserForwardedToRef: unit -> Parser<'a,'u> * Parser<'a,'u> ref diff --git a/src/FParsec/Range.fs b/src/FParsec/Range.fs new file mode 100644 index 0000000..c77f8ae --- /dev/null +++ b/src/FParsec/Range.fs @@ -0,0 +1,325 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +namespace FParsec + +#if LOW_TRUST + // we don't need the Range code in LOW_TRUST builds +#else + +type Range = struct + val Min: int + val Max: int + new (min, max) = assert (min <= max) + {Min = min; Max = max} +end + +[] +module internal Range = + open System.Collections.Generic + open FParsec.Internals + + let int32Max = System.Int32.MaxValue + + let createInvalidRangeException() = + System.ArgumentException("A range passed as an argument is invalid.") + + let checkRangesAreValidSortedAndUnconnected (ranges: Range[]) = + if ranges.Length <> 0 then + let r = ranges.[0] + if r.Min > r.Max then raise (createInvalidRangeException()) + let mutable prevMax = r.Max + for i = 1 to ranges.Length - 1 do + let r = ranges.[i] + if r.Min > r.Max then raise (createInvalidRangeException()) + if prevMax = int32Max || prevMax + 1 >= r.Min then + invalidArg "ranges" "The ranges must be sorted and neither overlapping nor immediately adjacent." + prevMax <- r.Max + + let checkLabelRangesAreValidSortedAndUnconnected (ranges: Range[]) (labels: System.Reflection.Emit.Label[]) = + if ranges.Length <> labels.Length then + invalidArg "labels" "The range and label arrays must have the same lengths." + if ranges.Length <> 0 then + let r = ranges.[0] + if r.Min > r.Max then raise (createInvalidRangeException()) + let mutable prevMax = r.Max + for i = 1 to ranges.Length - 1 do + let r = ranges.[i] + if r.Min > r.Max then raise (createInvalidRangeException()) + if prevMax = int32Max then + invalidArg "ranges" "The ranges must be sorted and non-overlapping." + if prevMax + 1 >= r.Min then + if prevMax + 1 = r.Min then + if labels.[i - 1].Equals(labels.[i]) then + raise (System.ArgumentException("Ranges with the same associated label must not be immediately adjacent.")) + else + invalidArg "ranges" "The ranges must be sorted and non-overlapping." + prevMax <- r.Max + + let rangeComparer = {new Comparer() with + member t.Compare(r1, r2) = compare r1.Min r2.Min} + + let sortAndMergeRanges allowOverlappingRanges (ranges: Range[]) = + if ranges.Length = 0 then [||] + else + System.Array.Sort(ranges, rangeComparer) + let mutable connected = 0 + let r = ranges.[0] + if r.Min > r.Max then raise (createInvalidRangeException()) + let mutable prevMax = r.Max + for i = 1 to ranges.Length - 1 do + let r = ranges.[i] + if r.Min > r.Max then raise (createInvalidRangeException()) + if prevMax < r.Min then + if prevMax + 1 = r.Min then + connected <- connected + 1 + prevMax <- r.Max + elif allowOverlappingRanges then + connected <- connected + 1 + if prevMax < r.Max then + prevMax <- r.Max + else + invalidArg "ranges" "The value ranges must be non-overlapping." + + if connected = 0 then ranges + else + let rs = Array.zeroCreate (ranges.Length - connected) + let mutable j = 0 + for r in ranges do + if j = 0 || prevMax <> int32Max && prevMax + 1 < r.Min then + prevMax <- r.Max + rs.[j] <- r + j <- j + 1 + elif prevMax < r.Max then + prevMax <- r.Max + rs.[j - 1] <- Range(rs.[j - 1].Min, r.Max) + rs + + /// If the comparer is not null, adjacent ranges with the same value are merged. + let sortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValueRanges: seq) = + // 'T could potentially be a large value type, + // so we are trying to avoid copying 'T values where possible. + let rvs = Array.ofSeq keyValueRanges + if rvs.Length = 0 then [||], [||] + else + System.Array.Sort(rvs, {new Comparer() with + member t.Compare((r1, _), (r2, _)) = compare r1.Min r2.Min}) + let mutable connected = 0 + let (r, _) as rv = rvs.[0] + if r.Min > r.Max then raise (createInvalidRangeException()) + let mutable prevMax = r.Max + let mutable prevRV = rv + for i = 1 to rvs.Length - 1 do + let (r, _) as rv = rvs.[i] + if r.Min > r.Max then raise (createInvalidRangeException()) + if prevMax >= r.Min then + invalidArg "keyValueRanges" "The ranges must be non-overlapping." + if prevMax + 1 = r.Min && isNotNull cmp && cmp.Equals(snd prevRV, snd rv) then + connected <- connected + 1 + prevMax <- r.Max + prevRV <- rv + let n = rvs.Length - connected + let rs, vs = Array.zeroCreate n, Array.zeroCreate n + if connected = 0 then + for i = 0 to rvs.Length - 1 do + let rv = rvs.[i] + rs.[i] <- fst rv + vs.[i] <- snd rv + else + let mutable j = 0 + for ((r, _) as rv) in rvs do + if j = 0 || not (prevMax + 1 = r.Min && cmp.Equals(snd prevRV, snd rv)) then + rs.[j] <- r + vs.[j] <- snd rv + j <- j + 1 + else + rs.[j - 1] <- Range(rs.[j - 1].Min, r.Max) + prevMax <- r.Max + prevRV <- rv + rs, vs + + let mergeSortedKeyLabelRanges (keys: int[]) (labels: System.Reflection.Emit.Label[]) = + if keys.Length <> labels.Length then + invalidArg "keys" "The key and label arrays must have the same lengths." + if keys.Length = 0 then [||], [||] + else + let mutable prevKey = keys.[0] + let mutable connected = 0 + for i = 1 to keys.Length - 1 do + let key = keys.[i] + if key <= prevKey then + invalidArg "keys" "The keys must be sorted and distinct." + if key = prevKey + 1 && labels.[i] = labels.[i - 1] then + connected <- connected + 1 + prevKey <- key + if connected = 0 then + (keys |> Array.map (fun k -> Range(k, k))), labels + else + let ranges = Array.zeroCreate (keys.Length - connected) + let newLabels = Array.zeroCreate (keys.Length - connected) + let mutable i = 0 + for j = 0 to ranges.Length - 1 do + let label = labels.[i] + newLabels.[j] <- label + let first = keys.[i] + let mutable last = first + i <- i + 1 + while i < keys.Length && keys.[i] = last + 1 + && labels.[i] = label + do last <- last + 1 + i <- i + 1 + ranges.[j] <- Range(first, last) + ranges, newLabels + + /// Duplicate values are allowed. + let collectSortAndMergeRanges (values: seq) = + use iter = values.GetEnumerator() + if not (iter.MoveNext()) then [||] + else + let ranges = ResizeArray<_>() + let rec loop sorted min max = + if iter.MoveNext() then + let k = iter.Current + if max <> int32Max && max + 1 = k then loop sorted min k + else + ranges.Add(Range(min, max)) + loop (sorted && max < k) k k + else + ranges.Add(Range(min, max)) + sorted + let value = iter.Current + let sorted = loop true value value + let ranges = ranges.ToArray() + if sorted then ranges + else sortAndMergeRanges true ranges + + /// ranges, values = collectSortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValues: seq) + /// Duplicate keys are not allowed. + /// If the comparer is not null, consecutive keys with the same value are combined. + let collectSortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValues: seq) = + // 'T could potentially be a large value type, + // so we are trying to avoid copying 'T values where possible. + let kvs = Array.ofSeq keyValues + System.Array.Sort(kvs, {new Comparer() with + member t.Compare((k1, _), (k2,_)) = compare k1 k2}) + if kvs.Length = 0 then [||], [||] + else + let mutable prevKey, _ = kvs.[0] + for i = 1 to kvs.Length - 1 do + let k, _ = kvs.[i] + if k = prevKey then + invalidArg "keyValues" "The sequence contains a duplicate key." + prevKey <- k + if isNull cmp then + let ranges = Array.zeroCreate kvs.Length + let values = Array.zeroCreate kvs.Length + for i = 0 to kvs.Length - 1 do + let k, _ as kv = kvs.[i] + ranges.[i] <- Range(k, k) + values.[i] <- snd kv + ranges, values + else + let ranges = ResizeArray<_>() + let mutable kv = kvs.[0] + let mutable i = 0 + while i < kvs.Length do + let kv0 = kv + let mutable k = fst kv + i <- i + 1 + while i < kvs.Length && (kv <- kvs.[i] + k + 1 = fst kv && cmp.Equals(snd kv0, snd kv)) + do k <- k + 1 + i <- i + 1 + ranges.Add(Range(fst kv0, k)) + let ranges = ranges.ToArray() + let values = Array.zeroCreate ranges.Length + let mutable j = 0 + for i = 0 to ranges.Length - 1 do + let r = ranges.[i] + values.[i] <- snd kvs.[j] + j <- j + (r.Max - r.Min + 1) + ranges, values + + /// sumOfLengths (ranges: Range[]) (iBegin: int) (iEnd: int) + /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping + let sumOfLengths (ranges: Range[]) iBegin iEnd = + assert (iBegin < iEnd) + // since the ranges are sorted non-overlapping, their sum is <= UInt32.MaxValue + 1 + let mutable n = uint32 (iEnd - iBegin) + for i = iBegin to iEnd - 1 do + let r = ranges.[i] + n <- n + uint32 (r.Max - r.Min) + if n <> 0u then double n + else double System.UInt32.MaxValue + 1. // n has overflown by exactly 1 + + /// sumOfCappedLengths (lengthCap: int32) (ranges: Range[]) (iBegin: int) (iEnd: int) + /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping + /// a lengthCap <= 0 is interpreted as a lengthCap of 2^32 + let sumOfCappedLengths lengthCap (ranges: Range[]) iBegin iEnd = + assert (iBegin < iEnd) + // since the ranges are sorted non-overlapping, their sum is <= UInt32.MaxValue + 1 + let lengthCapM1 = if lengthCap > 0 then uint32 (lengthCap - 1) else System.UInt32.MaxValue + let mutable n = uint32 (iEnd - iBegin) + for i = iBegin to iEnd - 1 do + let r = ranges.[i] + n <- n + min (uint32 (r.Max - r.Min)) lengthCapM1 + if n <> 0u then double n + else double System.UInt32.MaxValue + 1. // n has overflown by exactly 1 + + /// density lengthCap (ranges: Range[]) iBegin iEnd + /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping + let density lengthCap (ranges: Range[]) iBegin iEnd = + assert (iBegin < iEnd) + let n = sumOfCappedLengths lengthCap ranges iBegin iEnd + let d = double ranges.[iEnd - 1].Max - double ranges.[iBegin].Min + 1. + n/d + + /// rangeIndex, pivotAroundRangeMax = findPivot (ranges: Range[]) iBegin iEnd + /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping + let findPivot (ranges: Range[]) iBegin iEnd = + assert (iBegin < iEnd) + // the pivot heuristic is based on Korobeynikov (2007), http://llvm.org/pubs/2007-05-31-Switch-Lowering.pdf + let mutable first, last = double ranges.[iBegin].Min, double ranges.[iEnd - 1].Max + let mutable pivot, pivotAroundPreviousRangeMax = iBegin, false + let mutable sumLeft, sumRight = 0., sumOfLengths ranges iBegin iEnd + let sumHalf = sumRight*0.5 + let mutable maxQuality, maxDistanceToMiddle = -1., sumRight + let r = ranges.[iBegin] + let mutable nextMin, nextMax = double r.Min, double r.Max + for i = iBegin + 1 to iEnd - 1 do + let prevMax = nextMax + let prevLength = nextMax - nextMin + 1. + sumLeft <- sumLeft + prevLength + sumRight <- sumRight - prevLength + let r = ranges.[i] + nextMin <- double r.Min + nextMax <- double r.Max + let logDistance = System.Math.Log(nextMin - prevMax) + let leftDensity = sumLeft/(prevMax - first + 2.) // add 2 instead of 1 to decrease the quality of + let rightDensity = sumRight/(last - nextMin + 2.) // of the two most extreme possible pivot points + let quality = (leftDensity + rightDensity)*logDistance + if quality >= maxQuality then + let distanceToMiddle = System.Math.Abs(sumLeft - sumHalf); + if quality > maxQuality || distanceToMiddle < maxDistanceToMiddle then + maxQuality <- quality + maxDistanceToMiddle <- distanceToMiddle + pivot <- i + pivotAroundPreviousRangeMax <- sumLeft >= sumRight + if pivotAroundPreviousRangeMax then + (pivot - 1), true + else + pivot, false + + let rec findInSortedNonOverlappingRanges (ranges: Range[]) value = + let rec loop iFirst iLast = + if iFirst <= iLast then + let middle = int ((uint32 (iFirst + iLast))/2u) + let middleRange = ranges.[middle] + if value < middleRange.Min then loop iFirst (middle - 1) + elif value > middleRange.Max then loop (middle + 1) iLast + else middle + else ~~~iFirst + loop 0 (ranges.Length - 1) + +#endif + diff --git a/src/FParsec/StaticMapping.fs b/src/FParsec/StaticMapping.fs new file mode 100644 index 0000000..e2c6372 --- /dev/null +++ b/src/FParsec/StaticMapping.fs @@ -0,0 +1,839 @@ +// Copyright (c) Stephan Tolksdorf 2010-2012 +// License: Simplified BSD License. See accompanying documentation. + +module FParsec.StaticMapping + +#if LOW_TRUST +#else +open System.Reflection +open System.Reflection.Emit +open System.Runtime.Serialization +open System.Diagnostics +open System.Collections.Generic +open System.Threading + +open FParsec +open FParsec.Internals +open FParsec.Range +open FParsec.Emit + +/// Unsafe because it doesn't constrain the type argument to reference types. +let private UnsafeReferenceEqualityComparer<'T> = + { new EqualityComparer<'T>() with + override t.Equals(x, y) = obj.ReferenceEquals(x, y) + override t.GetHashCode(x) = System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(x) + } + +type PhysicalEqualityComparer<'T> private () = + static let instanceOrNull = + let t = typeof<'T> + if not t.IsValueType then + UnsafeReferenceEqualityComparer<'T> + elif t.IsEnum || typeof>.IsAssignableFrom(t) then + EqualityComparer<'T>.Default + else + null + + static member InstanceOrNull = instanceOrNull + +let mutable private staticMappingCounter = 0 + +let private createStaticMappingTypeBuilder<'TIn,'TOut>() = + let name = "StaticMapping" + (string (Interlocked.Increment(&staticMappingCounter))) + let tb = createTypeBuilder + name + (TypeAttributes.Public ||| TypeAttributes.Sealed ||| TypeAttributes.Class) + typeof> null + let mb = tb.DefineMethod("Invoke", + MethodAttributes.Public ||| MethodAttributes.HideBySig ||| MethodAttributes.Virtual, + CallingConventions.HasThis, + typeof<'TOut>, [|typeof<'TIn>|]) + tb, mb.GetILGenerator() + +let createStaticMappingAssertException() = + System.Exception("An internal assert check in FParsec.StaticMapping failed. Please report this error to fparsec@quanttec.com. (The Data member of the exception object contains the information needed to reproduce the error.)") + +let internal defaultMappingLengthCap = 32 +let internal defaultMappingDensityThreshold = 0.4 +let internal defaultIndicatorLengthCap = 32*8 +let internal defaultIndicatorDensityThreshold = 0.4/32. + +let internal createStaticIntIndicatorFunctionImpl<'TInt when 'TInt : struct> + lengthCap densityThreshold minValue maxValue invert ranges : ('TInt -> bool) = + + if not (typeof<'TInt> = typeof || typeof<'TInt> = typeof) then + failwith "Only char and int are supported as input types." + + let tb, ilg = createStaticMappingTypeBuilder<'TInt, bool>() + + let resultLocal = ilg.DeclareLocal(typeof) // local 0 + emitSetMembershipTest ilg + (fun ilg -> ilg.Emit(OpCodes.Ldarg_1)) // loads var + (fun ilg -> ilg.Emit(OpCodes.Stloc_0)) // stores result + (TempLocals(ilg)) + lengthCap densityThreshold + minValue maxValue + invert ranges + ilg.Emit(OpCodes.Ldloc_0) + ilg.Emit(OpCodes.Ret) + + let t = tb.CreateType() + let indicator = FormatterServices.GetUninitializedObject(t) :?> ('TInt -> bool) + +#if DEBUG_STATIC_MAPPING + // saveEmitAssembly "FParsec.Emitted.dll" + + let raiseException key : unit = + let e = createStaticMappingAssertException() + e.Data.["Argument"] <- key + e.Data.["IsInverted"] <- invert + e.Data.["Ranges"] <- ranges + raise e + + let findKeyinRanges = + (if typeof<'TInt> = typeof then + (box (fun (key: char) -> findInSortedNonOverlappingRanges ranges (int key))) + else + (box (findInSortedNonOverlappingRanges ranges)) + ) :?> ('TInt -> int) + + fun key -> + let b1 = indicator key + let b2_ = findKeyinRanges key >= 0 + let b2 = if invert then not b2_ else b2_ + if b1 <> b2 then raiseException key + b1 +#else + indicator +#endif + +let createStaticCharIndicatorFunction invert (charsInSet: seq) = + let ranges = collectSortAndMergeRanges (charsInSet |> Seq.map (fun c -> int c)) + createStaticIntIndicatorFunctionImpl + defaultIndicatorLengthCap defaultIndicatorDensityThreshold + 0 0xffff + invert ranges + +let createStaticCharRangeIndicatorFunction invert (rangesInSet: seq) = + let ranges = sortAndMergeRanges true (Array.ofSeq rangesInSet) + if ranges.Length <> 0 && ranges.[0].Min < 0 || ranges.[ranges.Length - 1].Max > 0xffff then + invalidArg "charRanges" "A range contains values outside the range of valid UTF-16 char values (0 - 0xffff)." + createStaticIntIndicatorFunctionImpl + defaultIndicatorLengthCap defaultIndicatorDensityThreshold + 0 0xffff + invert ranges + +let createStaticIntIndicatorFunction invert (valuesInSet: seq) = + let ranges = collectSortAndMergeRanges valuesInSet + createStaticIntIndicatorFunctionImpl + defaultIndicatorLengthCap defaultIndicatorDensityThreshold + System.Int32.MinValue System.Int32.MaxValue + invert ranges + +let createStaticIntRangeIndicatorFunction invert (rangesInSet: seq) = + let ranges = sortAndMergeRanges true (Array.ofSeq rangesInSet) + createStaticIntIndicatorFunctionImpl + defaultIndicatorLengthCap defaultIndicatorDensityThreshold + System.Int32.MinValue System.Int32.MaxValue + invert ranges + + +let internal createStaticIntMappingImpl + lengthCap densityThreshold + minKey maxKey + (defaultValue: 'T) (ranges: Range[]) (values: 'T[]) : (int -> 'T) = + assert (ranges.Length = values.Length) + + if ranges.Length = 0 then fun _ -> defaultValue + else + let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + let T = typeof<'T> + if T = typeof then + let values = box values :?> bool[] + let defaultValue = box defaultValue :?> bool + box (createStaticIntIndicatorFunctionImpl + (lengthCap*(defaultIndicatorLengthCap/defaultMappingLengthCap)) + (densityThreshold*(defaultIndicatorDensityThreshold/defaultMappingDensityThreshold)) + minKey maxKey + defaultValue ranges) :?> (int -> 'T) + else + let tb, ilg = createStaticMappingTypeBuilder() + + let isPrimitive = T.IsPrimitive || T.IsEnum + let loadConstant = if isPrimitive then createLoaderForPrimitiveConstants ilg + else Unchecked.defaultof<_> + // local 0 + let resultOrIndexLocal = ilg.DeclareLocal(if isPrimitive then T else typeof) + + let defaultLabel = ilg.DefineLabel() + let returnLabel = ilg.DefineLabel() + + let labels = Array.zeroCreate ranges.Length + + let mutable needToEmit = null + let mutable needToEmitCount = 0 + let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + + if isNull physicalEqualityComparer then + for i = 0 to labels.Length - 1 do + labels.[i] <- ilg.DefineLabel() + else + // we don't need to emit multiple case handlers for identical values + needToEmit <- Array.zeroCreate values.Length + let valueLabels = Dictionary<'T,Label>(values.Length, physicalEqualityComparer) + for i = 0 to values.Length - 1 do + let value = values.[i] + let mutable label = Unchecked.defaultof<_> + if not (valueLabels.TryGetValue(value, &label)) then + needToEmit.[i] <- true + label <- ilg.DefineLabel() + valueLabels.Add(value, label) + labels.[i] <- label + needToEmitCount <- valueLabels.Count + if needToEmitCount = values.Length then + needToEmit <- null + + emitSwitch ilg + (fun ilg -> ilg.Emit(OpCodes.Ldarg_1)) // loads key + (TempLocals(ilg)) + lengthCap densityThreshold + minKey maxKey + defaultLabel ranges labels + + let returnedValues = if isPrimitive || isNull needToEmit then null + else Array.zeroCreate needToEmitCount + let mutable returnedValuesCount = 0 + + for i = 0 to labels.Length - 1 do + if isNull needToEmit || needToEmit.[i] then + ilg.MarkLabel(labels.[i]) + if isPrimitive then + loadConstant (values.[i]) + else + if isNotNull returnedValues then + returnedValues.[returnedValuesCount] <- values.[i] + loadI4 ilg returnedValuesCount + returnedValuesCount <- returnedValuesCount + 1 + ilg.Emit(OpCodes.Stloc_0) + ilg.Emit(OpCodes.Br, returnLabel) + + // return default value + let defaultValueIsNull = not T.IsValueType && isNull (box defaultValue) + ilg.MarkLabel(defaultLabel) + if isPrimitive then + loadConstant defaultValue + ilg.Emit(OpCodes.Stloc_0) + else + if defaultValueIsNull then + ilg.Emit(OpCodes.Ldnull) + else + ilg.Emit(OpCodes.Ldarg_0) + ilg.Emit(OpCodes.Ldfld, tb.DefineField("DefaultValue", T, FieldAttributes.Public)) + ilg.Emit(OpCodes.Ret) + + // return result + ilg.MarkLabel(returnLabel) + if isPrimitive then + ilg.Emit(OpCodes.Ldloc_0) + else + // We could store all the values in individual fields to avoid the bounds check + // and indirect load, but that probably wouldn't be worth the additional + // code generation (and garbage collection?) costs (except for tiny mappings). + ilg.Emit(OpCodes.Ldarg_0) + ilg.Emit(OpCodes.Ldfld, tb.DefineField("Values", values.GetType(), FieldAttributes.Public)) + ilg.Emit(OpCodes.Ldloc_0) + ilg.Emit(OpCodes.Ldelem, T) + ilg.Emit(OpCodes.Ret) + + let t = tb.CreateType() + let mapping = FormatterServices.GetUninitializedObject(t) :?> (int -> 'T) + if not isPrimitive then + // we can't use the previously used Fieldbuilders here, because SetValue is not implemented in FieldBuilders + if not defaultValueIsNull then t.GetField("DefaultValue").SetValue(mapping, defaultValue) + t.GetField("Values").SetValue(mapping, if isNotNull returnedValues then returnedValues else values) + + #if DEBUG_STATIC_MAPPING + //saveEmitAssembly "FParsec.Emitted.dll" + + if isNull physicalEqualityComparer then mapping + else + let raiseException key : unit = + let e = createStaticMappingAssertException() + e.Data.["Argument"] <- key + e.Data.["Ranges"] <- ranges + e.Data.["Values"] <- values + e.Data.["DefaultValue"] <- defaultValue + raise e + + fun key -> + let value = mapping key + let index = findInSortedNonOverlappingRanges ranges key + if index >= 0 then + if not (physicalEqualityComparer.Equals(value, values.[index])) then raiseException key + else + if not (physicalEqualityComparer.Equals(value, defaultValue)) then raiseException key + value + #else + mapping + #endif + + +let internal filterOutDefaultValueRanges (comparer: EqualityComparer<_>) (ranges: Range[]) (values: _[]) defaultValue = + if isNull comparer then ranges, values + else + let mutable n = 0 + for v in values do + if comparer.Equals(v, defaultValue) then n <- n + 1 + if n = 0 then ranges, values + else + let N = values.Length - n + let newRanges, newValues = Array.zeroCreate N, Array.zeroCreate N + let mutable j = 0 + for i = 0 to values.Length - 1 do + let v = values.[i] + if not (comparer.Equals(v, defaultValue)) then + newValues.[j] <- v + newRanges.[j] <- ranges.[i] + j <- j + 1 + newRanges, newValues + +// we need to use #seq instead of seq here to prevent the F# compiler +// from unnecessarily wrapping the returned function value + +let createStaticIntMapping (defaultValue: 'T) (keyValues: #seq) = + let valueComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + let ranges, values = collectSortAndMergeKeyValueRanges valueComparer keyValues + let ranges, values = filterOutDefaultValueRanges valueComparer ranges values defaultValue + createStaticIntMappingImpl + defaultMappingLengthCap defaultMappingDensityThreshold + System.Int32.MinValue System.Int32.MaxValue + defaultValue ranges values + +let createStaticIntRangeMapping (defaultValue: 'T) (keyValues: #seq) = + let valueComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + let ranges, values = sortAndMergeKeyValueRanges valueComparer keyValues + let ranges, values = filterOutDefaultValueRanges valueComparer ranges values defaultValue + createStaticIntMappingImpl + defaultMappingLengthCap defaultMappingDensityThreshold + System.Int32.MinValue System.Int32.MaxValue + defaultValue ranges values + +type private IntType = U2 + | U4 + | U8 + +[] +type Subtree(stringIndex: int, index: int, count: int) = struct + member t.StringIndex = stringIndex + member t.Index = index + member t.Count = count // must be greater 0 +end + +type SubtreeEqualityComparer<'T>(stringValues: (string*'T)[], valueComparer: EqualityComparer<'T>) = + inherit EqualityComparer() + + override t.Equals(subtree1: Subtree, subtree2: Subtree) = + let aligned = subtree1.StringIndex%2 = subtree2.StringIndex%2 // our string comparison code assumes an identical 4-byte-alignment + let count = subtree1.Count + count = subtree2.Count + && (let mutable i = 0 + while uint32 i < uint32 count do + let string1, value1 = stringValues.[subtree1.Index + i] + let string2, value2 = stringValues.[subtree2.Index + i] + let remaining = string1.Length - subtree1.StringIndex + if remaining = string2.Length - subtree2.StringIndex + && (aligned || remaining <= 1) + && valueComparer.Equals(value1, value2) + && System.String.CompareOrdinal(string1, subtree1.StringIndex, + string2, subtree2.StringIndex, remaining) = 0 + then i <- i + 1 + else i <- System.Int32.MinValue // break + i = count) + + override t.GetHashCode(subtree: Subtree) = + subtree.Count ^^^ valueComparer.GetHashCode(snd stringValues.[subtree.Index]) + +let createStaticStringMapping (defaultValue: 'T) (keyValues: #seq) : (string -> 'T) = + let T = typeof<'T> + + let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + + let kvs = Array.ofSeq keyValues + System.Array.Sort(kvs, {new Comparer() with + member t.Compare((k1, _), (k2, _)) = System.String.CompareOrdinal(k1, k2)}) + + let mutable previousKey = null + for (key, _) in kvs do + if isNull key then invalidArg "keyValues" "The string keys must not be null." + if key = previousKey then invalidArg "keyValues" "The strings keys must be different." + previousKey <- key + + match kvs.Length with + | 0 -> fun str -> + let throwIfStringIsNull = str.Length + defaultValue + | 1 -> let key, value = kvs.[0] + fun str -> + let throwIfStringIsNull = str.Length + if str = key then value else defaultValue + | _ -> + let mutable i0 = if fst kvs.[0] = "" then 1 else 0 + + let getMinMaxLength iBegin iEnd = + assert (iBegin < iEnd) + let firstKey, _ = kvs.[iBegin] + let mutable minLength = firstKey.Length + let mutable maxLength = minLength + for i = iBegin + 1 to iEnd - 1 do + let key, _ = kvs.[i] + let length = key.Length + minLength <- min length minLength + maxLength <- max length maxLength + minLength, maxLength + + let minLength, maxLength = getMinMaxLength i0 kvs.Length + + let findIndexOfFirstCharAfterCommonPrefix startIndex iBegin iEnd minKeyLength = + let rec loop index = + if index = minKeyLength then index + else + let c = (fst kvs.[iBegin]).[index] + let rec keysEqualAtX i = + if i = iEnd then true + elif (fst kvs.[i]).[index] <> c then false + else keysEqualAtX (i + 1) + if not (keysEqualAtX (iBegin + 1)) then index + else loop (index + 1) + loop startIndex + + let prefixLength = findIndexOfFirstCharAfterCommonPrefix 0 i0 kvs.Length minLength + + // sort by first char after common prefix, then by length, then lexicographical + System.Array.Sort(kvs, {new Comparer() with + member t.Compare((k1, _), (k2, _)) = + if k1.Length > prefixLength && k2.Length > prefixLength then + let d = int k1.[prefixLength] - int k2.[prefixLength] + if d <> 0 then d + else + let d = k1.Length - k2.Length + if d <> 0 then d + else System.String.CompareOrdinal(k1, k2) + else + k1.Length - k2.Length}) + + let tb, ilg = createStaticMappingTypeBuilder() + + let isPrimitive = T.IsPrimitive || T.IsEnum + + let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull + let loadConstant = if isPrimitive then createLoaderForPrimitiveConstants ilg + else Unchecked.defaultof<_> + + let lengthLocal = ilg.DeclareLocal(typeof) + let loadLength() = ilg.Emit(OpCodes.Ldloc_0) + let storeLength() = ilg.Emit(OpCodes.Stloc_0) + + let charPointerType = typeof.MakePointerType() + let charPointerLocal = ilg.DeclareLocal(charPointerType) + let loadPtr() = ilg.Emit(OpCodes.Ldloc_1) + let storePtr() = ilg.Emit(OpCodes.Stloc_1) + + // Declaring the following local as int instead of char improves + // code generation on the 64-bit JIT. + let chLocal = ilg.DeclareLocal(typeof) + let loadCh = fun (_: ILGenerator) -> ilg.Emit(OpCodes.Ldloc_2) + let storeCh() = ilg.Emit(OpCodes.Stloc_2) + + let resultOrIndexLocal = ilg.DeclareLocal(if isPrimitive then T else typeof) + let loadResult() = ilg.Emit(OpCodes.Ldloc_3) + let storeResult() = ilg.Emit(OpCodes.Stloc_3) + + let stringLocal = ilg.DeclareLocal(typeof, true) // pinned string + let storeString() = ilg.Emit(OpCodes.Stloc_S, 4uy) + + // set up local variables + ilg.Emit(OpCodes.Ldarg_1) // load string argument + ilg.Emit(OpCodes.Dup) + ilg.Emit(OpCodes.Dup) + storeString() // pins string + // accessing .Length triggers null reference exception if string is null + ilg.EmitCall(OpCodes.Call, typeof.GetMethod("get_Length"), null) + storeLength() + ilg.Emit(OpCodes.Conv_I) + ilg.EmitCall(OpCodes.Call, typeof.GetMethod("get_OffsetToStringData"), null) + ilg.Emit(OpCodes.Add) + storePtr() + + let defaultLabel = ilg.DefineLabel() + let returnLabel = ilg.DefineLabel() + + // some helper functions + + let dereferenceAndIncrementPtr intType doIncrement = + loadPtr() + if doIncrement then + ilg.Emit(OpCodes.Dup) + loadI4 ilg (match intType with + | U2 -> 1*sizeof + | U4 -> 2*sizeof + | U8 -> 4*sizeof) + ilg.Emit(OpCodes.Add) + storePtr() + match intType with + | U2 -> ilg.Emit(OpCodes.Ldind_U2) + | U4 -> ilg.Emit(OpCodes.Ldind_U4) + | U8 -> ilg.Emit(OpCodes.Ldind_I8) + + let incrementPtrByNumberOfChars i = + loadPtr() + loadI4 ilg (i*sizeof) + ilg.Emit(OpCodes.Add) + storePtr() + + let returnedValueIndices = if isPrimitive then null else ResizeArray<_>(kvs.Length) + let returnValue i = + if isPrimitive then + loadConstant (snd kvs.[i]) + else + loadI4 ilg (returnedValueIndices.Count) + returnedValueIndices.Add(i) + storeResult() + ilg.Emit(OpCodes.Br, returnLabel) + + let longKeyData = ref (new ResizeArray<_>(), null, null, null) + + /// Emit a call to FParsec.Buffer.Equal helper function to compare + /// a long segment of the input string. + let emitLongStringComparison dataIndex dataLength isFinal = + let data, fieldBuilder, methodInfo, pinnedDataLocal = !longKeyData + let mutable f, m, pdl = fieldBuilder, methodInfo, pinnedDataLocal + if isNull f then + f <- tb.DefineField("longKeyData", typeof, FieldAttributes.Public) + let ptrType = typeof.MakePointerType() + m <- typeof.GetMethod("Equals", [|ptrType; ptrType; typeof|]) + pdl <- ilg.DeclareLocal(typeof, true) + longKeyData:= (data, f, m, pdl) + + ilg.Emit(OpCodes.Ldarg_0) + ilg.Emit(OpCodes.Ldfld, f) + ilg.Emit(OpCodes.Dup) + ilg.Emit(OpCodes.Stloc_S, pdl) // pin data array + loadI4 ilg dataIndex + ilg.Emit(OpCodes.Ldelema, typeof) + ilg.Emit(OpCodes.Conv_I) + + loadPtr() + if not isFinal then + incrementPtrByNumberOfChars (dataLength*2) + loadI4 ilg dataLength + ilg.EmitCall(OpCodes.Call, m, null) + ilg.Emit(OpCodes.Ldnull) + ilg.Emit(OpCodes.Stloc_S, pdl) // unpin data array + ilg.Emit(OpCodes.Brfalse, defaultLabel) + + let emitStringComparison (key: string) idx length isFinal = + if length > 0 then + let mutable idx, length = idx, length + if idx%2 = 1 then + // align ptr to 4-byte boundary + // (this assumes that the first char in a string is aligned) + dereferenceAndIncrementPtr U2 (not isFinal || length > 1) + loadI4 ilg (int key.[idx]) + ilg.Emit(OpCodes.Bne_Un, defaultLabel) + idx <- idx + 1 + length <- length - 1 + + if length > sizeof*4 then + // store string data into longStringData + let data, _, _, _ = !longKeyData + let dataIndex = data.Count + while length >= 2 do + // if necessary we will swap the byte order of the whole data array + // when we assign it to the longKeyData field + let v = uint32 key.[idx] ||| (uint32 key.[idx + 1] <<< 16) + data.Add(v) + idx <- idx + 2 + length <- length - 2 + if isFinal && length = 1 then + data.Add(uint32 key.[idx]) + length <- 0 + // emit call to string comparison function + emitLongStringComparison dataIndex (data.Count - dataIndex) isFinal + else + #if UNALIGNED_READS + if sizeof = 8 then + while length >= 4 || (isFinal && length = 3) do + dereferenceAndIncrementPtr U8 (not isFinal || length > 4) + let v = (uint64 key.[idx] ) + ||| (uint64 key.[idx + 1] <<< 16) + ||| (uint64 key.[idx + 2] <<< 32) + ||| (if length > 3 then uint64 key.[idx + 3] <<< 48 else 0UL) + let v = if System.BitConverter.IsLittleEndian then v + else Buffer.SwapByteOrder(v) + loadU8 ilg v + ilg.Emit(OpCodes.Bne_Un, defaultLabel) + idx <- idx + 4 + length <- length - 4 + #endif + while length >= 2 || (isFinal && length = 1) do + dereferenceAndIncrementPtr U4 (not isFinal || length > 2) + let v = if length = 1 then int key.[idx] + else int key.[idx] ||| (int key.[idx + 1] <<< 16) + let v = if System.BitConverter.IsLittleEndian then v + else int (Buffer.SwapByteOrder(uint32 v)) + loadI4 ilg v + ilg.Emit(OpCodes.Bne_Un, defaultLabel) + idx <- idx + 2 + length <- length - 2 + if length > 0 then + Debug.Assert(not isFinal) + dereferenceAndIncrementPtr U2 true + loadI4 ilg (int key.[idx]) + ilg.Emit(OpCodes.Bne_Un, defaultLabel) + + let subtreeLabels = if isNull physicalEqualityComparer then null + else System.Collections.Generic.Dictionary(SubtreeEqualityComparer<'T>(kvs, physicalEqualityComparer)) + + // Partitions the key pairs iBegin..(iEnd - 1) into branches with identical "branch-key". + // Returns [|iBegin, i2, ..., iN, iEnd], [|fst kvs.[iBegin], fst kvs.[i2], ..., fst kvs.[iN]|] + // where iBegin .. indexN are the indices where the branches start. + let getBranchIndicesAndKeys (iBegin: int) iEnd getBranchKey = + let mutable n = 0 + let indices, keys = new ResizeArray(iEnd - iBegin), new ResizeArray(iEnd - iBegin) + indices.Add(iBegin) + let mutable prevKey : int = getBranchKey (fst kvs.[iBegin]) + keys.Add(prevKey) + for i = iBegin + 1 to iEnd - 1 do + let key = getBranchKey (fst kvs.[i]) + if key <> prevKey then + prevKey <- key + indices.Add(i) + keys.Add(key) + indices.Add(iEnd) // the indices array has one element more + indices.ToArray(), keys.ToArray() + + // Returns labels for the subtrees given by the branchIndices and the subtreeStringIndex, + // and an array with bools indicating whether the respective label was newly created. + // If the dictionary already contains a label for an equivalent subtree, that label is returned; + // otherwise, a new label is created. + let getBranchLabels (subtreeLabels: Dictionary) subtreeStringIndex (branchIndices: int[]) = + assert (branchIndices.Length >= 2 && branchIndices.[0] < branchIndices.[1]) + let n = branchIndices.Length - 1 + let isNewLabel = Array.zeroCreate n + let labels = Array.zeroCreate n + if isNull subtreeLabels then + for i = 0 to n - 1 do + isNewLabel.[i] <- true + labels.[i] <- ilg.DefineLabel() + else + let mutable iBegin = branchIndices.[0] + for j = 1 to branchIndices.Length - 1 do + let iEnd = branchIndices.[j] + let subtree = Subtree(subtreeStringIndex, iBegin, iEnd - iBegin) + iBegin <- iEnd + + let b = j - 1 + let mutable label = Unchecked.defaultof<_> + if subtreeLabels.TryGetValue(subtree, &label) then + labels.[b] <- label + else + isNewLabel.[b] <- true + let label = ilg.DefineLabel() + labels.[b] <- label + subtreeLabels.Add(subtree, label) + labels, isNewLabel + + let tempLocals = new TempLocals(ilg) + + // Assumes keys in iBegin..(iEnd - 1) are sorted by the branch-key returned by getBranchKey. + let switch getBranchKey loadVar minVarValue maxVarValue subtreeLabels iBegin iEnd subtreeStringIndex emitBranchIter = + let branchIndices, branchKeys = getBranchIndicesAndKeys iBegin iEnd getBranchKey + let branchLabels, isNewLabel = getBranchLabels subtreeLabels subtreeStringIndex branchIndices + let switchRanges, switchLabels = mergeSortedKeyLabelRanges branchKeys branchLabels + emitSwitch ilg loadVar tempLocals + defaultMappingLengthCap defaultMappingDensityThreshold + minVarValue maxVarValue + defaultLabel switchRanges switchLabels + for i = 0 to isNewLabel.Length - 1 do + if isNewLabel.[i] then + ilg.MarkLabel(branchLabels.[i]) + emitBranchIter branchIndices.[i] branchIndices.[i + 1] + + let subtreeEqualityComparer = if isNull physicalEqualityComparer then Unchecked.defaultof<_> + else SubtreeEqualityComparer<'T>(kvs, physicalEqualityComparer) + let subtreeLabels = if isNull physicalEqualityComparer then null + else Dictionary(subtreeEqualityComparer) + + let rec emitSubtree length idx iBegin iEnd = + assert ( iBegin < iEnd + && kvs.[iBegin..(iEnd - 1)] + |> Array.map (fun (k,_) -> k.Length) + |> Array.forall ((=) length)) + let idx1 = findIndexOfFirstCharAfterCommonPrefix idx iBegin iEnd length + if idx <> idx1 then + emitStringComparison (fst kvs.[iBegin]) idx (idx1 - idx) (idx1 = length) + if idx1 = length then + assert (iBegin + 1 = iEnd) + returnValue iBegin + else + let mutable emit = true + if idx <> idx1 && isNotNull subtreeLabels then + let subtree = Subtree(idx1, iBegin, iEnd - iBegin) + let mutable label = Unchecked.defaultof<_> + if subtreeLabels.TryGetValue(subtree, &label) then + // an equivalent subtree has already been handled elsewhere + ilg.Emit(OpCodes.Br, label) // jump to that code + emit <- false + else + let label = ilg.DefineLabel() + ilg.MarkLabel(label) + subtreeLabels.Add(subtree, label) + + if emit then + dereferenceAndIncrementPtr U2 (idx1 + 1 < length) + storeCh() + switch (fun str -> int str.[idx1]) loadCh 0 0xffff + (if idx1 + 1 < length || isNull subtreeLabels then subtreeLabels // we want to keep the switch branches local + else Dictionary(subtreeEqualityComparer)) // when they only contain a return statement + iBegin iEnd + (idx1 + 1) (emitSubtree length (idx1 + 1)) + + let emitMaxLengthSubtree stringIndex iBegin iEnd = + loadLength() + loadI4 ilg maxLength + ilg.Emit(OpCodes.Bne_Un, defaultLabel) + emitSubtree maxLength stringIndex iBegin iEnd + + Debug.Assert(i0 < kvs.Length) + + if i0 <> 0 then // first key is empty + let label = ilg.DefineLabel() + loadLength() + ilg.Emit(OpCodes.Brtrue, label) + returnValue 0 + ilg.MarkLabel(label) + + if minLength = maxLength then + emitMaxLengthSubtree 0 i0 kvs.Length + else // at least two non-empty keys with different lengths + let checkMinLength() = + loadLength() + loadI4 ilg minLength + ilg.Emit(OpCodes.Blt, defaultLabel) + + if prefixLength <> 0 then + checkMinLength() + emitStringComparison (fst kvs.[i0]) 0 prefixLength false + if prefixLength = minLength then + let label = ilg.DefineLabel() + loadLength() + loadI4 ilg minLength + ilg.Emit(OpCodes.Bne_Un, label) + returnValue i0 + ilg.MarkLabel(label) + i0 <- i0 + 1 + + else // prefixLength = 0 + if i0 = 0 && (fst kvs.[0]).[0] = '\u0000' then + // If a key contains a zero as the first char, we can't avoid + // the following length check (which we otherwise don't need for + // the switch because of the null termination of strings). + checkMinLength() + + if prefixLength + 1 = maxLength then // prefixLength <> 0 + emitMaxLengthSubtree prefixLength i0 kvs.Length + else + let topLevelTreeLabels = if isNull subtreeEqualityComparer then null + else Dictionary(subtreeEqualityComparer) + // switch over char after prefix + dereferenceAndIncrementPtr U2 (prefixLength + 1 < maxLength) + storeCh() + switch (fun str -> int str.[prefixLength]) loadCh 0 0xffff + topLevelTreeLabels + i0 kvs.Length + (prefixLength + 1) + (fun iBegin iEnd -> + // switch over length + switch (fun str -> str.Length) (fun ilg -> loadLength()) 0 System.Int32.MaxValue + subtreeLabels + iBegin iEnd + (prefixLength + 1) + (fun iBegin iEnd -> + emitSubtree (fst kvs.[iBegin]).Length (prefixLength + 1) iBegin iEnd)) + + // return default value + let defaultValueIsNull = not T.IsValueType && isNull (box defaultValue) + ilg.MarkLabel(defaultLabel) + if isPrimitive then + loadConstant defaultValue + storeResult() + else + if defaultValueIsNull then + ilg.Emit(OpCodes.Ldnull) + else + ilg.Emit(OpCodes.Ldarg_0) + ilg.Emit(OpCodes.Ldfld, tb.DefineField("DefaultValue", T, FieldAttributes.Public)) + ilg.Emit(OpCodes.Ret) + + // return result + ilg.MarkLabel(returnLabel) + if isPrimitive then + loadResult() + else + // We could store all the values in individual fields to avoid the bounds check + // and indirect load, but that probably wouldn't be worth the additional + // code generation (and garbage collection?) costs (except for tiny mappings). + ilg.Emit(OpCodes.Ldarg_0) + ilg.Emit(OpCodes.Ldfld, tb.DefineField("Values", typeof<'T[]>, FieldAttributes.Public)) + loadResult() + ilg.Emit(OpCodes.Ldelem, T) + ilg.Emit(OpCodes.Ret) + + // compile type + let t = tb.CreateType() + // instantiate type + let mapping = FormatterServices.GetUninitializedObject(t) :?> (string -> 'T) + if not isPrimitive then + // we can't use the previously used Fieldbuilders here, because SetValue is not implemented in FieldBuilders + if not defaultValueIsNull then t.GetField("DefaultValue").SetValue(mapping, defaultValue) + let values = Array.zeroCreate returnedValueIndices.Count + let mutable j = 0 + for i in returnedValueIndices do + values.[j] <- snd kvs.[i] + j <- j + 1 + t.GetField("Values").SetValue(mapping, values) + + let data, _, _, _ = !longKeyData + if data.Count <> 0 then + let dataArray = data.ToArray() + if not (System.BitConverter.IsLittleEndian) then + FParsec.Buffer.SwapByteOrder(dataArray) + t.GetField("longKeyData").SetValue(mapping, dataArray) + + + #if DEBUG_STATIC_MAPPING + // saveEmitAssembly "FParsec.Emitted.dll" + + if isNull physicalEqualityComparer then mapping + else + let dict = new System.Collections.Generic.Dictionary(kvs.Length) + for k, v in kvs do + dict.Add(k, v) + let errorHandler (key: string) : unit = + let e = new System.Exception("An internal assert check in FParsec.StaticMapping.createStringMapping failed. Please report this error to fparsec@quanttec.com. (The Data member of the exception object contains the information needed to reproduce the error.)") + e.Data.["Argument"] <- key + e.Data.["KeysValues"] <- dict + e.Data.["DefaultValue"] <- defaultValue + raise e + + fun key -> + let mutable value = Unchecked.defaultof<_> + if not (dict.TryGetValue(key, &value)) then value <- defaultValue + let value2 = mapping key + if not (physicalEqualityComparer.Equals(value, value2)) then errorHandler key + value + #else + mapping + #endif + +#endif \ No newline at end of file diff --git a/src/FParsec/StaticMapping.fsi b/src/FParsec/StaticMapping.fsi new file mode 100644 index 0000000..8b740ad --- /dev/null +++ b/src/FParsec/StaticMapping.fsi @@ -0,0 +1,78 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +module FParsec.StaticMapping + +#if LOW_TRUST +#else + +/// `createStaticCharIndicatorFunction invert charsInSet` +/// creates an optimized indicator function for the chars specified by the `charsInSet` sequence. +/// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) +/// if and only if it is called with a char contained in `charsInSet`. +val createStaticCharIndicatorFunction: + invert: bool -> charsInSet: seq -> (char -> bool) + +/// `createStaticCharRangeIndicatorFunction invert rangesInSet` +/// creates an optimized indicator function for the chars in the ranges specified by the `rangesInSet` sequence. +/// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is +/// called with a char contained in at least one of the ranges of `rangesInSet`. +val createStaticCharRangeIndicatorFunction: + invert: bool -> rangesInSet: seq -> (char -> bool) + +/// `createStaticIntIndicatorFunction invert valuesInSet` +/// creates an optimized indicator function for the integers specified by the `valuesInSet` sequence. +/// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is +/// called with an integer contained in `valuesInSet`. +val createStaticIntIndicatorFunction: + invert: bool -> valuesInSet: seq -> (int -> bool) + +/// `createStaticIntRangeIndicatorFunction invert rangesInSet` +/// creates an optimized indicator function for the integers in the ranges specified by the `rangesInSet` sequence. +/// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is +/// called with an `int` contained in at least one of the ranges of `rangesInSet`. +val createStaticIntRangeIndicatorFunction: + invert: bool -> rangesInSet: seq -> (int -> bool) + +/// `createStaticIntMapping defaultValue keyValues` +/// creates an optimized mapping function that maps integer keys to values. +/// The `keyValues` sequence specifies the key-value pairs for the mapping. +/// All keys not specified in `keyValues` are mapped to `defaultValue`. +val createStaticIntMapping: + defaultValue: 'T -> keyValues: #seq -> (int -> 'T) + +/// `createStaticIntRangeMapping defaultValue keyValues` +/// creates an optimized mapping function that maps integer key ranges to values. +/// The `keyValues` sequence specifies the range-value pairs for the mapping. +/// All keys not contained in one of the ranges in `keyValues` are mapped to `defaultValue`. +val createStaticIntRangeMapping: + defaultValue: 'T -> keyValues: #seq -> (int -> 'T) + +/// `createStaticStringMapping defaultValue keyValues` +/// creates an optimized mapping function that maps string keys to values. +/// The `keyValues` sequence specifies the key-value pairs for the mapping. +/// All keys not specified in `keyValues` are mapped to `defaultValue`. A `null` key is not supported. +val createStaticStringMapping: + defaultValue: 'T -> keyValues: #seq -> (string -> 'T) + + +val internal filterOutDefaultValueRanges: + comparer: System.Collections.Generic.EqualityComparer<'T> + -> ranges: Range[] + -> values: 'T[] + -> defaultValue: 'T + -> Range[]*'T[] + +val internal createStaticIntIndicatorFunctionImpl<'TInt when 'TInt : struct> : + lengthCap: int -> densityThreshold: double + -> minValue: int -> maxValue: int + -> invert: bool -> ranges: Range[] + -> ('TInt -> bool) + +val internal createStaticIntMappingImpl: + lengthCap: int -> densityThreshold: double + -> minKey: int -> maxKey: int + -> defaultValue: 'T -> ranges: Range[] -> values: 'T[] + -> (int -> 'T) + +#endif \ No newline at end of file diff --git a/src/FParsecCS/Buffer.cs b/src/FParsecCS/Buffer.cs new file mode 100644 index 0000000..7967da7 --- /dev/null +++ b/src/FParsecCS/Buffer.cs @@ -0,0 +1,233 @@ +// Copyright (c) Stephan Tolksdorf 2007-2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +using System.Diagnostics; + +namespace FParsec { + +public static class Buffer { + +#if !LOW_TRUST + +/// Calculates: end - begin.
+/// Precondition: 2^31 > end - begin >= 0.
+internal static unsafe uint PositiveDistance(char* begin, char* end) { + return (uint)((byte*)end - (byte*)begin)/2; +} + +/// Calculates: end - begin.
+/// Precondition: end - begin >= 0.
+internal static unsafe long PositiveDistance64(char* begin, char* end) { + return (long)((ulong)((byte*)end - (byte*)begin)/2); +} + +// Probably for pedagogical reasons there is no System.Buffer.BlockCopy +// that takes pointers, hence we are forced to write our own version. + +/// Copies size bytes from src to dst. Correctly handles overlapped memory blocks. +static internal unsafe void Copy(byte* dst, byte* src, int size) { + if (size < 0) throw new ArgumentOutOfRangeException("size", "The size must be non-negative."); + + // C# doesn't support native ints and the 32-bit .NET JIT can't optimize the + // 64-comparison into a single 32-bit one, so we have to get our hands dirty... + + // goto Reverse if src < dst && dst - src < size + if (sizeof(IntPtr) == 4) { + if (unchecked((uint)(dst - src)) < (uint)size) goto Reverse; + } else { + if (unchecked((ulong)(dst - src)) < (ulong)size) goto Reverse; + } + +#if UNALIGNED_READS + // with UNALIGNED_READS we don't require identical 2-byte alignment + if (((uint)dst & 1) == ((uint)src & 1)) { +#else + if (((uint)dst & 3) == ((uint)src & 3)) { +#endif + // the pointers have identical byte (and 2-byte) alignment + + // align dst + if (((uint)dst & 1) != 0 && size != 0) { + *dst = *src; + ++src; ++dst; --size; + } + if (((uint)dst & 2) != 0 && size >= 2) { + *((short*)dst) = *((short*)src); + src += 2; dst += 2;; size -= 2; + } + + for (; size >= 16; size -= 16) { + ((int*)dst)[0] = ((int*)src)[0]; + ((int*)dst)[1] = ((int*)src)[1]; + ((int*)dst)[2] = ((int*)src)[2]; + ((int*)dst)[3] = ((int*)src)[3]; + src += 16; dst += 16; + } + if ((size != 0)) { + if ((size & 8) != 0) { + ((int*)dst)[0] = ((int*)src)[0]; + ((int*)dst)[1] = ((int*)src)[1]; + src += 8; dst += 8; + } + if ((size & 4) != 0) { + *((int*)dst) = *((int*)src); + src += 4; dst += 4; + } + if ((size & 2) != 0) { + *((short*)dst) = *((short*)src); + src += 2; dst += 2; + } + if ((size & 1) != 0) { + *dst = *src; + } + } + return; + } else { + // backup path for pointers with different byte (or 2-byte) alignment + for (; size != 0; --size) { + *dst = *src; + ++src; ++dst; + } + return; + } + +Reverse: + src += size; dst += size; +#if UNALIGNED_READS + // with UNALIGNED_READS we don't require identical 2-byte alignment + if (((uint)dst & 1) == ((uint)src & 1)) { +#else + if (((uint)dst & 3) == ((uint)src & 3)) { +#endif + // the pointers have identical byte (and 2-byte) alignment + + // align dst + if (((uint)dst & 1) != 0 && size != 0) { + --src; --dst; --size; + *dst = *src; + } + if (((uint)dst & 2) != 0 && size >= 2) { + src -= 2; dst -= 2; size -= 2; + *((short*)dst) = *((short*)src); + } + for (; size >= 16; size -= 16) { + src -= 16; dst -= 16; + ((int*)dst)[3] = ((int*)src)[3]; + ((int*)dst)[2] = ((int*)src)[2]; + ((int*)dst)[1] = ((int*)src)[1]; + ((int*)dst)[0] = ((int*)src)[0]; + } + if ((size & 0xf) != 0) { + if ((size & 8) != 0) { + src -= 8; dst -= 8; + ((int*)dst)[1] = ((int*)src)[1]; + ((int*)dst)[0] = ((int*)src)[0]; + } + if ((size & 4) != 0) { + src -= 4; dst -= 4; + *((int*)dst) = *((int*)src); + } + if ((size & 2) != 0) { + src -= 2; dst -= 2; + *((short*)dst) = *((short*)src); + } + if ((size & 1) != 0) { + src -= 1; dst -= 1; + *dst = *src; + } + } + return; + } else { + // backup path for pointers with different byte (or 2-byte) alignment + for (; size != 0; --size) { + --src; --dst; + *dst = *src; + } + return; + } +} + +#endif + +internal static uint SwapByteOrder(uint value) { + return (((value << 24) | (value >> 8)) & 0xff00ff00U) + | (((value << 8) | (value >> 24)) & 0x00ff00ffU); +} + +internal static ulong SwapByteOrder(ulong value) { + return (((value << 56) | (value >> 8)) & 0xff000000ff000000UL) + | (((value << 8) | (value >> 56)) & 0x000000ff000000ffUL) + | (((value << 40) | (value >> 24)) & 0x00ff000000ff0000UL) + | (((value << 24) | (value >> 40)) & 0x0000ff000000ff00UL); +} + +internal static void SwapByteOrder(uint[] array) { + for (int i = 0; i < array.Length; ++i) { + var v = array[i]; + array[i] = (((v << 24) | (v >> 8)) & 0xff00ff00U) + | (((v << 8) | (v >> 24)) & 0x00ff00ffU); + } +} + +#if !LOW_TRUST + +internal static unsafe void SwapByteOrder(uint* buffer, uint length) { + for (int i = 0; i < length; ++i) { + var v = buffer[i]; + buffer[i] = (((v << 24) | (v >> 8)) & 0xff00ff00U) + | (((v << 8) | (v >> 24)) & 0x00ff00ffU); + } +} + +#endif + +#if LOW_TRUST + +internal static byte[] CopySubarray(byte[] array, int index, int length) { + var subArray = new byte[length]; + System.Buffer.BlockCopy(array, index, subArray, 0, length); + return subArray; +} + +internal static uint[] CopyUIntsStoredInLittleEndianByteArray(byte[] src, int srcIndex, int srcLength) { + Debug.Assert(srcLength%sizeof(uint) == 0); + var subArray = new uint[srcLength/sizeof(uint)]; + System.Buffer.BlockCopy(src, srcIndex, subArray, 0, srcLength); + if (!BitConverter.IsLittleEndian) SwapByteOrder(subArray); + return subArray; +} + +#endif + +#if !LOW_TRUST +// used by StaticMapping.createStaticStringMapping +public static unsafe bool Equals(uint* ptr1, uint* ptr2, uint length) { + Debug.Assert(length >= 0); + for (; length >= 4; length -= 4) { + if ( ptr1[0] != ptr2[0] + || ptr1[1] != ptr2[1] + || ptr1[2] != ptr2[2] + || ptr1[3] != ptr2[3]) goto ReturnFalse; + ptr1 += 4; + ptr2 += 4; + } + if ((length & 2) != 0) { + if ( ptr1[0] != ptr2[0] + || ptr1[1] != ptr2[1]) goto ReturnFalse; + ptr1 += 2; + ptr2 += 2; + } + if ((length & 1) != 0) { + if (ptr1[0] != ptr2[0]) goto ReturnFalse; + } + return true; +ReturnFalse: + return false; +} +#endif + +} + +} \ No newline at end of file diff --git a/src/FParsecCS/CaseFoldTable.cs b/src/FParsecCS/CaseFoldTable.cs new file mode 100644 index 0000000..d21b0bd --- /dev/null +++ b/src/FParsecCS/CaseFoldTable.cs @@ -0,0 +1,1557 @@ +// Copyright (c) Stephan Tolksdorf 2009-2012 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Runtime.InteropServices; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; + +namespace FParsec { + +internal static class CaseFoldTable { +#if LOW_TRUST + public static readonly char[] FoldedChars = CreateFoldedCharsArray(); + + private static char[] CreateFoldedCharsArray() { + Debug.Assert(oneToOneMappings.Length%2 == 0); + var table = new char[0x10000]; + for (int i = 0; i < table.Length; ++i) + table[i] = (char)i; + for (int i = oneToOneMappings.Length - 2; i >= 0; i -= 2) + table[oneToOneMappings[i]] = oneToOneMappings[i + 1]; + return table; + } +#else + public static readonly char[] FoldedCharsArray = new char[0x10000]; + + public static readonly unsafe char* FoldedChars = Initialize(); + internal static GCHandle FoldedCharsHandle; // assigned by Initialize + + private static unsafe char* Initialize() { + // initialize FoldedCharsArray + int n = oneToOneMappings.Length; + Debug.Assert(n%2 == 0); + fixed (char* chars = FoldedCharsArray) + fixed (char* mappings = oneToOneMappings) { + var uints = (uint*) chars; + uint c0 = BitConverter.IsLittleEndian ? 0x10000u : 0x1u; + for (int i = 0; i < 0x10000/2; i += 4) { + uints[i ] = c0; + uints[i + 1] = c0 + 0x20002u; + uints[i + 2] = c0 + 0x40004u; + uints[i + 3] = c0 + 0x60006u; + c0 = unchecked(c0 + 0x80008u); + } + for (int i = n - 2; i >= 0; i -= 2) + chars[mappings[i]] = mappings[i + 1]; + } + + // We pin an array on the managed heap instead of using Marshal.AllocHGlobal + // because we normally want it to be alive for as long as the AppDomain exists + // but not necessarily as long as the process lives. + // The table is large enough to be allocated on the large object heap, + // so pinning it is a no-op and the GC is not affected. + FoldedCharsHandle = GCHandle.Alloc(FoldedCharsArray, GCHandleType.Pinned); + return (char*)FoldedCharsHandle.AddrOfPinnedObject(); + } +#endif + + private const string oneToOneMappings = "\u0041\u0061\u0042\u0062\u0043\u0063\u0044\u0064\u0045\u0065\u0046\u0066\u0047\u0067\u0048\u0068\u0049\u0069\u004A\u006A\u004B\u006B\u004C\u006C\u004D\u006D\u004E\u006E\u004F\u006F\u0050\u0070\u0051\u0071\u0052\u0072\u0053\u0073\u0054\u0074\u0055\u0075\u0056\u0076\u0057\u0077\u0058\u0078\u0059\u0079\u005A\u007A\u00B5\u03BC\u00C0\u00E0\u00C1\u00E1\u00C2\u00E2\u00C3\u00E3\u00C4\u00E4\u00C5\u00E5\u00C6\u00E6\u00C7\u00E7\u00C8\u00E8\u00C9\u00E9\u00CA\u00EA\u00CB\u00EB\u00CC\u00EC\u00CD\u00ED\u00CE\u00EE\u00CF\u00EF\u00D0\u00F0\u00D1\u00F1\u00D2\u00F2\u00D3\u00F3\u00D4\u00F4\u00D5\u00F5\u00D6\u00F6\u00D8\u00F8\u00D9\u00F9\u00DA\u00FA\u00DB\u00FB\u00DC\u00FC\u00DD\u00FD\u00DE\u00FE\u0100\u0101\u0102\u0103\u0104\u0105\u0106\u0107\u0108\u0109\u010A\u010B\u010C\u010D\u010E\u010F\u0110\u0111\u0112\u0113\u0114\u0115\u0116\u0117\u0118\u0119\u011A\u011B\u011C\u011D\u011E\u011F\u0120\u0121\u0122\u0123\u0124\u0125\u0126\u0127\u0128\u0129\u012A\u012B\u012C\u012D\u012E\u012F\u0132\u0133\u0134\u0135\u0136\u0137\u0139\u013A\u013B\u013C\u013D\u013E\u013F\u0140\u0141\u0142\u0143\u0144\u0145\u0146\u0147\u0148\u014A\u014B\u014C\u014D\u014E\u014F\u0150\u0151\u0152\u0153\u0154\u0155\u0156\u0157\u0158\u0159\u015A\u015B\u015C\u015D\u015E\u015F\u0160\u0161\u0162\u0163\u0164\u0165\u0166\u0167\u0168\u0169\u016A\u016B\u016C\u016D\u016E\u016F\u0170\u0171\u0172\u0173\u0174\u0175\u0176\u0177\u0178\u00FF\u0179\u017A\u017B\u017C\u017D\u017E\u017F\u0073\u0181\u0253\u0182\u0183\u0184\u0185\u0186\u0254\u0187\u0188\u0189\u0256\u018A\u0257\u018B\u018C\u018E\u01DD\u018F\u0259\u0190\u025B\u0191\u0192\u0193\u0260\u0194\u0263\u0196\u0269\u0197\u0268\u0198\u0199\u019C\u026F\u019D\u0272\u019F\u0275\u01A0\u01A1\u01A2\u01A3\u01A4\u01A5\u01A6\u0280\u01A7\u01A8\u01A9\u0283\u01AC\u01AD\u01AE\u0288\u01AF\u01B0\u01B1\u028A\u01B2\u028B\u01B3\u01B4\u01B5\u01B6\u01B7\u0292\u01B8\u01B9\u01BC\u01BD\u01C4\u01C6\u01C5\u01C6\u01C7\u01C9\u01C8\u01C9\u01CA\u01CC\u01CB\u01CC\u01CD\u01CE\u01CF\u01D0\u01D1\u01D2\u01D3\u01D4\u01D5\u01D6\u01D7\u01D8\u01D9\u01DA\u01DB\u01DC\u01DE\u01DF\u01E0\u01E1\u01E2\u01E3\u01E4\u01E5\u01E6\u01E7\u01E8\u01E9\u01EA\u01EB\u01EC\u01ED\u01EE\u01EF\u01F1\u01F3\u01F2\u01F3\u01F4\u01F5\u01F6\u0195\u01F7\u01BF\u01F8\u01F9\u01FA\u01FB\u01FC\u01FD\u01FE\u01FF\u0200\u0201\u0202\u0203\u0204\u0205\u0206\u0207\u0208\u0209\u020A\u020B\u020C\u020D\u020E\u020F\u0210\u0211\u0212\u0213\u0214\u0215\u0216\u0217\u0218\u0219\u021A\u021B\u021C\u021D\u021E\u021F\u0220\u019E\u0222\u0223\u0224\u0225\u0226\u0227\u0228\u0229\u022A\u022B\u022C\u022D\u022E\u022F\u0230\u0231\u0232\u0233\u023A\u2C65\u023B\u023C\u023D\u019A\u023E\u2C66\u0241\u0242\u0243\u0180\u0244\u0289\u0245\u028C\u0246\u0247\u0248\u0249\u024A\u024B\u024C\u024D\u024E\u024F\u0345\u03B9\u0370\u0371\u0372\u0373\u0376\u0377\u037F\u03F3\u0386\u03AC\u0388\u03AD\u0389\u03AE\u038A\u03AF\u038C\u03CC\u038E\u03CD\u038F\u03CE\u0391\u03B1\u0392\u03B2\u0393\u03B3\u0394\u03B4\u0395\u03B5\u0396\u03B6\u0397\u03B7\u0398\u03B8\u0399\u03B9\u039A\u03BA\u039B\u03BB\u039C\u03BC\u039D\u03BD\u039E\u03BE\u039F\u03BF\u03A0\u03C0\u03A1\u03C1\u03A3\u03C3\u03A4\u03C4\u03A5\u03C5\u03A6\u03C6\u03A7\u03C7\u03A8\u03C8\u03A9\u03C9\u03AA\u03CA\u03AB\u03CB\u03C2\u03C3\u03CF\u03D7\u03D0\u03B2\u03D1\u03B8\u03D5\u03C6\u03D6\u03C0\u03D8\u03D9\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03E3\u03E4\u03E5\u03E6\u03E7\u03E8\u03E9\u03EA\u03EB\u03EC\u03ED\u03EE\u03EF\u03F0\u03BA\u03F1\u03C1\u03F4\u03B8\u03F5\u03B5\u03F7\u03F8\u03F9\u03F2\u03FA\u03FB\u03FD\u037B\u03FE\u037C\u03FF\u037D\u0400\u0450\u0401\u0451\u0402\u0452\u0403\u0453\u0404\u0454\u0405\u0455\u0406\u0456\u0407\u0457\u0408\u0458\u0409\u0459\u040A\u045A\u040B\u045B\u040C\u045C\u040D\u045D\u040E\u045E\u040F\u045F\u0410\u0430\u0411\u0431\u0412\u0432\u0413\u0433\u0414\u0434\u0415\u0435\u0416\u0436\u0417\u0437\u0418\u0438\u0419\u0439\u041A\u043A\u041B\u043B\u041C\u043C\u041D\u043D\u041E\u043E\u041F\u043F\u0420\u0440\u0421\u0441\u0422\u0442\u0423\u0443\u0424\u0444\u0425\u0445\u0426\u0446\u0427\u0447\u0428\u0448\u0429\u0449\u042A\u044A\u042B\u044B\u042C\u044C\u042D\u044D\u042E\u044E\u042F\u044F\u0460\u0461\u0462\u0463\u0464\u0465\u0466\u0467\u0468\u0469\u046A\u046B\u046C\u046D\u046E\u046F\u0470\u0471\u0472\u0473\u0474\u0475\u0476\u0477\u0478\u0479\u047A\u047B\u047C\u047D\u047E\u047F\u0480\u0481\u048A\u048B\u048C\u048D\u048E\u048F\u0490\u0491\u0492\u0493\u0494\u0495\u0496\u0497\u0498\u0499\u049A\u049B\u049C\u049D\u049E\u049F\u04A0\u04A1\u04A2\u04A3\u04A4\u04A5\u04A6\u04A7\u04A8\u04A9\u04AA\u04AB\u04AC\u04AD\u04AE\u04AF\u04B0\u04B1\u04B2\u04B3\u04B4\u04B5\u04B6\u04B7\u04B8\u04B9\u04BA\u04BB\u04BC\u04BD\u04BE\u04BF\u04C0\u04CF\u04C1\u04C2\u04C3\u04C4\u04C5\u04C6\u04C7\u04C8\u04C9\u04CA\u04CB\u04CC\u04CD\u04CE\u04D0\u04D1\u04D2\u04D3\u04D4\u04D5\u04D6\u04D7\u04D8\u04D9\u04DA\u04DB\u04DC\u04DD\u04DE\u04DF\u04E0\u04E1\u04E2\u04E3\u04E4\u04E5\u04E6\u04E7\u04E8\u04E9\u04EA\u04EB\u04EC\u04ED\u04EE\u04EF\u04F0\u04F1\u04F2\u04F3\u04F4\u04F5\u04F6\u04F7\u04F8\u04F9\u04FA\u04FB\u04FC\u04FD\u04FE\u04FF\u0500\u0501\u0502\u0503\u0504\u0505\u0506\u0507\u0508\u0509\u050A\u050B\u050C\u050D\u050E\u050F\u0510\u0511\u0512\u0513\u0514\u0515\u0516\u0517\u0518\u0519\u051A\u051B\u051C\u051D\u051E\u051F\u0520\u0521\u0522\u0523\u0524\u0525\u0526\u0527\u0528\u0529\u052A\u052B\u052C\u052D\u052E\u052F\u0531\u0561\u0532\u0562\u0533\u0563\u0534\u0564\u0535\u0565\u0536\u0566\u0537\u0567\u0538\u0568\u0539\u0569\u053A\u056A\u053B\u056B\u053C\u056C\u053D\u056D\u053E\u056E\u053F\u056F\u0540\u0570\u0541\u0571\u0542\u0572\u0543\u0573\u0544\u0574\u0545\u0575\u0546\u0576\u0547\u0577\u0548\u0578\u0549\u0579\u054A\u057A\u054B\u057B\u054C\u057C\u054D\u057D\u054E\u057E\u054F\u057F\u0550\u0580\u0551\u0581\u0552\u0582\u0553\u0583\u0554\u0584\u0555\u0585\u0556\u0586\u10A0\u2D00\u10A1\u2D01\u10A2\u2D02\u10A3\u2D03\u10A4\u2D04\u10A5\u2D05\u10A6\u2D06\u10A7\u2D07\u10A8\u2D08\u10A9\u2D09\u10AA\u2D0A\u10AB\u2D0B\u10AC\u2D0C\u10AD\u2D0D\u10AE\u2D0E\u10AF\u2D0F\u10B0\u2D10\u10B1\u2D11\u10B2\u2D12\u10B3\u2D13\u10B4\u2D14\u10B5\u2D15\u10B6\u2D16\u10B7\u2D17\u10B8\u2D18\u10B9\u2D19\u10BA\u2D1A\u10BB\u2D1B\u10BC\u2D1C\u10BD\u2D1D\u10BE\u2D1E\u10BF\u2D1F\u10C0\u2D20\u10C1\u2D21\u10C2\u2D22\u10C3\u2D23\u10C4\u2D24\u10C5\u2D25\u10C7\u2D27\u10CD\u2D2D\u13F8\u13F0\u13F9\u13F1\u13FA\u13F2\u13FB\u13F3\u13FC\u13F4\u13FD\u13F5\u1E00\u1E01\u1E02\u1E03\u1E04\u1E05\u1E06\u1E07\u1E08\u1E09\u1E0A\u1E0B\u1E0C\u1E0D\u1E0E\u1E0F\u1E10\u1E11\u1E12\u1E13\u1E14\u1E15\u1E16\u1E17\u1E18\u1E19\u1E1A\u1E1B\u1E1C\u1E1D\u1E1E\u1E1F\u1E20\u1E21\u1E22\u1E23\u1E24\u1E25\u1E26\u1E27\u1E28\u1E29\u1E2A\u1E2B\u1E2C\u1E2D\u1E2E\u1E2F\u1E30\u1E31\u1E32\u1E33\u1E34\u1E35\u1E36\u1E37\u1E38\u1E39\u1E3A\u1E3B\u1E3C\u1E3D\u1E3E\u1E3F\u1E40\u1E41\u1E42\u1E43\u1E44\u1E45\u1E46\u1E47\u1E48\u1E49\u1E4A\u1E4B\u1E4C\u1E4D\u1E4E\u1E4F\u1E50\u1E51\u1E52\u1E53\u1E54\u1E55\u1E56\u1E57\u1E58\u1E59\u1E5A\u1E5B\u1E5C\u1E5D\u1E5E\u1E5F\u1E60\u1E61\u1E62\u1E63\u1E64\u1E65\u1E66\u1E67\u1E68\u1E69\u1E6A\u1E6B\u1E6C\u1E6D\u1E6E\u1E6F\u1E70\u1E71\u1E72\u1E73\u1E74\u1E75\u1E76\u1E77\u1E78\u1E79\u1E7A\u1E7B\u1E7C\u1E7D\u1E7E\u1E7F\u1E80\u1E81\u1E82\u1E83\u1E84\u1E85\u1E86\u1E87\u1E88\u1E89\u1E8A\u1E8B\u1E8C\u1E8D\u1E8E\u1E8F\u1E90\u1E91\u1E92\u1E93\u1E94\u1E95\u1E9B\u1E61\u1E9E\u00DF\u1EA0\u1EA1\u1EA2\u1EA3\u1EA4\u1EA5\u1EA6\u1EA7\u1EA8\u1EA9\u1EAA\u1EAB\u1EAC\u1EAD\u1EAE\u1EAF\u1EB0\u1EB1\u1EB2\u1EB3\u1EB4\u1EB5\u1EB6\u1EB7\u1EB8\u1EB9\u1EBA\u1EBB\u1EBC\u1EBD\u1EBE\u1EBF\u1EC0\u1EC1\u1EC2\u1EC3\u1EC4\u1EC5\u1EC6\u1EC7\u1EC8\u1EC9\u1ECA\u1ECB\u1ECC\u1ECD\u1ECE\u1ECF\u1ED0\u1ED1\u1ED2\u1ED3\u1ED4\u1ED5\u1ED6\u1ED7\u1ED8\u1ED9\u1EDA\u1EDB\u1EDC\u1EDD\u1EDE\u1EDF\u1EE0\u1EE1\u1EE2\u1EE3\u1EE4\u1EE5\u1EE6\u1EE7\u1EE8\u1EE9\u1EEA\u1EEB\u1EEC\u1EED\u1EEE\u1EEF\u1EF0\u1EF1\u1EF2\u1EF3\u1EF4\u1EF5\u1EF6\u1EF7\u1EF8\u1EF9\u1EFA\u1EFB\u1EFC\u1EFD\u1EFE\u1EFF\u1F08\u1F00\u1F09\u1F01\u1F0A\u1F02\u1F0B\u1F03\u1F0C\u1F04\u1F0D\u1F05\u1F0E\u1F06\u1F0F\u1F07\u1F18\u1F10\u1F19\u1F11\u1F1A\u1F12\u1F1B\u1F13\u1F1C\u1F14\u1F1D\u1F15\u1F28\u1F20\u1F29\u1F21\u1F2A\u1F22\u1F2B\u1F23\u1F2C\u1F24\u1F2D\u1F25\u1F2E\u1F26\u1F2F\u1F27\u1F38\u1F30\u1F39\u1F31\u1F3A\u1F32\u1F3B\u1F33\u1F3C\u1F34\u1F3D\u1F35\u1F3E\u1F36\u1F3F\u1F37\u1F48\u1F40\u1F49\u1F41\u1F4A\u1F42\u1F4B\u1F43\u1F4C\u1F44\u1F4D\u1F45\u1F59\u1F51\u1F5B\u1F53\u1F5D\u1F55\u1F5F\u1F57\u1F68\u1F60\u1F69\u1F61\u1F6A\u1F62\u1F6B\u1F63\u1F6C\u1F64\u1F6D\u1F65\u1F6E\u1F66\u1F6F\u1F67\u1F88\u1F80\u1F89\u1F81\u1F8A\u1F82\u1F8B\u1F83\u1F8C\u1F84\u1F8D\u1F85\u1F8E\u1F86\u1F8F\u1F87\u1F98\u1F90\u1F99\u1F91\u1F9A\u1F92\u1F9B\u1F93\u1F9C\u1F94\u1F9D\u1F95\u1F9E\u1F96\u1F9F\u1F97\u1FA8\u1FA0\u1FA9\u1FA1\u1FAA\u1FA2\u1FAB\u1FA3\u1FAC\u1FA4\u1FAD\u1FA5\u1FAE\u1FA6\u1FAF\u1FA7\u1FB8\u1FB0\u1FB9\u1FB1\u1FBA\u1F70\u1FBB\u1F71\u1FBC\u1FB3\u1FBE\u03B9\u1FC8\u1F72\u1FC9\u1F73\u1FCA\u1F74\u1FCB\u1F75\u1FCC\u1FC3\u1FD8\u1FD0\u1FD9\u1FD1\u1FDA\u1F76\u1FDB\u1F77\u1FE8\u1FE0\u1FE9\u1FE1\u1FEA\u1F7A\u1FEB\u1F7B\u1FEC\u1FE5\u1FF8\u1F78\u1FF9\u1F79\u1FFA\u1F7C\u1FFB\u1F7D\u1FFC\u1FF3\u2126\u03C9\u212A\u006B\u212B\u00E5\u2132\u214E\u2160\u2170\u2161\u2171\u2162\u2172\u2163\u2173\u2164\u2174\u2165\u2175\u2166\u2176\u2167\u2177\u2168\u2178\u2169\u2179\u216A\u217A\u216B\u217B\u216C\u217C\u216D\u217D\u216E\u217E\u216F\u217F\u2183\u2184\u24B6\u24D0\u24B7\u24D1\u24B8\u24D2\u24B9\u24D3\u24BA\u24D4\u24BB\u24D5\u24BC\u24D6\u24BD\u24D7\u24BE\u24D8\u24BF\u24D9\u24C0\u24DA\u24C1\u24DB\u24C2\u24DC\u24C3\u24DD\u24C4\u24DE\u24C5\u24DF\u24C6\u24E0\u24C7\u24E1\u24C8\u24E2\u24C9\u24E3\u24CA\u24E4\u24CB\u24E5\u24CC\u24E6\u24CD\u24E7\u24CE\u24E8\u24CF\u24E9\u2C00\u2C30\u2C01\u2C31\u2C02\u2C32\u2C03\u2C33\u2C04\u2C34\u2C05\u2C35\u2C06\u2C36\u2C07\u2C37\u2C08\u2C38\u2C09\u2C39\u2C0A\u2C3A\u2C0B\u2C3B\u2C0C\u2C3C\u2C0D\u2C3D\u2C0E\u2C3E\u2C0F\u2C3F\u2C10\u2C40\u2C11\u2C41\u2C12\u2C42\u2C13\u2C43\u2C14\u2C44\u2C15\u2C45\u2C16\u2C46\u2C17\u2C47\u2C18\u2C48\u2C19\u2C49\u2C1A\u2C4A\u2C1B\u2C4B\u2C1C\u2C4C\u2C1D\u2C4D\u2C1E\u2C4E\u2C1F\u2C4F\u2C20\u2C50\u2C21\u2C51\u2C22\u2C52\u2C23\u2C53\u2C24\u2C54\u2C25\u2C55\u2C26\u2C56\u2C27\u2C57\u2C28\u2C58\u2C29\u2C59\u2C2A\u2C5A\u2C2B\u2C5B\u2C2C\u2C5C\u2C2D\u2C5D\u2C2E\u2C5E\u2C60\u2C61\u2C62\u026B\u2C63\u1D7D\u2C64\u027D\u2C67\u2C68\u2C69\u2C6A\u2C6B\u2C6C\u2C6D\u0251\u2C6E\u0271\u2C6F\u0250\u2C70\u0252\u2C72\u2C73\u2C75\u2C76\u2C7E\u023F\u2C7F\u0240\u2C80\u2C81\u2C82\u2C83\u2C84\u2C85\u2C86\u2C87\u2C88\u2C89\u2C8A\u2C8B\u2C8C\u2C8D\u2C8E\u2C8F\u2C90\u2C91\u2C92\u2C93\u2C94\u2C95\u2C96\u2C97\u2C98\u2C99\u2C9A\u2C9B\u2C9C\u2C9D\u2C9E\u2C9F\u2CA0\u2CA1\u2CA2\u2CA3\u2CA4\u2CA5\u2CA6\u2CA7\u2CA8\u2CA9\u2CAA\u2CAB\u2CAC\u2CAD\u2CAE\u2CAF\u2CB0\u2CB1\u2CB2\u2CB3\u2CB4\u2CB5\u2CB6\u2CB7\u2CB8\u2CB9\u2CBA\u2CBB\u2CBC\u2CBD\u2CBE\u2CBF\u2CC0\u2CC1\u2CC2\u2CC3\u2CC4\u2CC5\u2CC6\u2CC7\u2CC8\u2CC9\u2CCA\u2CCB\u2CCC\u2CCD\u2CCE\u2CCF\u2CD0\u2CD1\u2CD2\u2CD3\u2CD4\u2CD5\u2CD6\u2CD7\u2CD8\u2CD9\u2CDA\u2CDB\u2CDC\u2CDD\u2CDE\u2CDF\u2CE0\u2CE1\u2CE2\u2CE3\u2CEB\u2CEC\u2CED\u2CEE\u2CF2\u2CF3\uA640\uA641\uA642\uA643\uA644\uA645\uA646\uA647\uA648\uA649\uA64A\uA64B\uA64C\uA64D\uA64E\uA64F\uA650\uA651\uA652\uA653\uA654\uA655\uA656\uA657\uA658\uA659\uA65A\uA65B\uA65C\uA65D\uA65E\uA65F\uA660\uA661\uA662\uA663\uA664\uA665\uA666\uA667\uA668\uA669\uA66A\uA66B\uA66C\uA66D\uA680\uA681\uA682\uA683\uA684\uA685\uA686\uA687\uA688\uA689\uA68A\uA68B\uA68C\uA68D\uA68E\uA68F\uA690\uA691\uA692\uA693\uA694\uA695\uA696\uA697\uA698\uA699\uA69A\uA69B\uA722\uA723\uA724\uA725\uA726\uA727\uA728\uA729\uA72A\uA72B\uA72C\uA72D\uA72E\uA72F\uA732\uA733\uA734\uA735\uA736\uA737\uA738\uA739\uA73A\uA73B\uA73C\uA73D\uA73E\uA73F\uA740\uA741\uA742\uA743\uA744\uA745\uA746\uA747\uA748\uA749\uA74A\uA74B\uA74C\uA74D\uA74E\uA74F\uA750\uA751\uA752\uA753\uA754\uA755\uA756\uA757\uA758\uA759\uA75A\uA75B\uA75C\uA75D\uA75E\uA75F\uA760\uA761\uA762\uA763\uA764\uA765\uA766\uA767\uA768\uA769\uA76A\uA76B\uA76C\uA76D\uA76E\uA76F\uA779\uA77A\uA77B\uA77C\uA77D\u1D79\uA77E\uA77F\uA780\uA781\uA782\uA783\uA784\uA785\uA786\uA787\uA78B\uA78C\uA78D\u0265\uA790\uA791\uA792\uA793\uA796\uA797\uA798\uA799\uA79A\uA79B\uA79C\uA79D\uA79E\uA79F\uA7A0\uA7A1\uA7A2\uA7A3\uA7A4\uA7A5\uA7A6\uA7A7\uA7A8\uA7A9\uA7AA\u0266\uA7AB\u025C\uA7AC\u0261\uA7AD\u026C\uA7B0\u029E\uA7B1\u0287\uA7B2\u029D\uA7B3\uAB53\uA7B4\uA7B5\uA7B6\uA7B7\uAB70\u13A0\uAB71\u13A1\uAB72\u13A2\uAB73\u13A3\uAB74\u13A4\uAB75\u13A5\uAB76\u13A6\uAB77\u13A7\uAB78\u13A8\uAB79\u13A9\uAB7A\u13AA\uAB7B\u13AB\uAB7C\u13AC\uAB7D\u13AD\uAB7E\u13AE\uAB7F\u13AF\uAB80\u13B0\uAB81\u13B1\uAB82\u13B2\uAB83\u13B3\uAB84\u13B4\uAB85\u13B5\uAB86\u13B6\uAB87\u13B7\uAB88\u13B8\uAB89\u13B9\uAB8A\u13BA\uAB8B\u13BB\uAB8C\u13BC\uAB8D\u13BD\uAB8E\u13BE\uAB8F\u13BF\uAB90\u13C0\uAB91\u13C1\uAB92\u13C2\uAB93\u13C3\uAB94\u13C4\uAB95\u13C5\uAB96\u13C6\uAB97\u13C7\uAB98\u13C8\uAB99\u13C9\uAB9A\u13CA\uAB9B\u13CB\uAB9C\u13CC\uAB9D\u13CD\uAB9E\u13CE\uAB9F\u13CF\uABA0\u13D0\uABA1\u13D1\uABA2\u13D2\uABA3\u13D3\uABA4\u13D4\uABA5\u13D5\uABA6\u13D6\uABA7\u13D7\uABA8\u13D8\uABA9\u13D9\uABAA\u13DA\uABAB\u13DB\uABAC\u13DC\uABAD\u13DD\uABAE\u13DE\uABAF\u13DF\uABB0\u13E0\uABB1\u13E1\uABB2\u13E2\uABB3\u13E3\uABB4\u13E4\uABB5\u13E5\uABB6\u13E6\uABB7\u13E7\uABB8\u13E8\uABB9\u13E9\uABBA\u13EA\uABBB\u13EB\uABBC\u13EC\uABBD\u13ED\uABBE\u13EE\uABBF\u13EF\uFF21\uFF41\uFF22\uFF42\uFF23\uFF43\uFF24\uFF44\uFF25\uFF45\uFF26\uFF46\uFF27\uFF47\uFF28\uFF48\uFF29\uFF49\uFF2A\uFF4A\uFF2B\uFF4B\uFF2C\uFF4C\uFF2D\uFF4D\uFF2E\uFF4E\uFF2F\uFF4F\uFF30\uFF50\uFF31\uFF51\uFF32\uFF52\uFF33\uFF53\uFF34\uFF54\uFF35\uFF55\uFF36\uFF56\uFF37\uFF57\uFF38\uFF58\uFF39\uFF59\uFF3A\uFF5A"; +} // class CaseFoldTable + +} + +/* +// The oneToOneMappings string has been generated with the following F# program, which +// extracts the (non-Turkic) 1-to-1 case folding mappings for chars below 0x10000 from +// http://www.unicode.org/Public/8.8.0/ucd/CaseFolding.txt + +open FParsec.Primitives +open FParsec.CharParsers + +(* +# CaseFolding-8.0.0.txt +# Date: 2015-01-13, 18:16:36 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; # +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# ================================================================= +*) +// continue txt file as string +let datastr = @"0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F; C; 03F3; # GREEK CAPITAL LETTER YOT +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE +052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE +052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN +13F8; C; 13F0; # CHEROKEE SMALL LETTER YE +13F9; C; 13F1; # CHEROKEE SMALL LETTER YI +13FA; C; 13F2; # CHEROKEE SMALL LETTER YO +13FB; C; 13F3; # CHEROKEE SMALL LETTER YU +13FC; C; 13F4; # CHEROKEE SMALL LETTER YV +13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O +A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH +A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE +A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE +A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE +A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK +A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E +A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G +A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K +A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T +A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL +A7B3; C; AB53; # LATIN CAPITAL LETTER CHI +A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA +A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +AB70; C; 13A0; # CHEROKEE SMALL LETTER A +AB71; C; 13A1; # CHEROKEE SMALL LETTER E +AB72; C; 13A2; # CHEROKEE SMALL LETTER I +AB73; C; 13A3; # CHEROKEE SMALL LETTER O +AB74; C; 13A4; # CHEROKEE SMALL LETTER U +AB75; C; 13A5; # CHEROKEE SMALL LETTER V +AB76; C; 13A6; # CHEROKEE SMALL LETTER GA +AB77; C; 13A7; # CHEROKEE SMALL LETTER KA +AB78; C; 13A8; # CHEROKEE SMALL LETTER GE +AB79; C; 13A9; # CHEROKEE SMALL LETTER GI +AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO +AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU +AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV +AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA +AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE +AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI +AB80; C; 13B0; # CHEROKEE SMALL LETTER HO +AB81; C; 13B1; # CHEROKEE SMALL LETTER HU +AB82; C; 13B2; # CHEROKEE SMALL LETTER HV +AB83; C; 13B3; # CHEROKEE SMALL LETTER LA +AB84; C; 13B4; # CHEROKEE SMALL LETTER LE +AB85; C; 13B5; # CHEROKEE SMALL LETTER LI +AB86; C; 13B6; # CHEROKEE SMALL LETTER LO +AB87; C; 13B7; # CHEROKEE SMALL LETTER LU +AB88; C; 13B8; # CHEROKEE SMALL LETTER LV +AB89; C; 13B9; # CHEROKEE SMALL LETTER MA +AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME +AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI +AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO +AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU +AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA +AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA +AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH +AB91; C; 13C1; # CHEROKEE SMALL LETTER NE +AB92; C; 13C2; # CHEROKEE SMALL LETTER NI +AB93; C; 13C3; # CHEROKEE SMALL LETTER NO +AB94; C; 13C4; # CHEROKEE SMALL LETTER NU +AB95; C; 13C5; # CHEROKEE SMALL LETTER NV +AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA +AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE +AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI +AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO +AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU +AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV +AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA +AB9D; C; 13CD; # CHEROKEE SMALL LETTER S +AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE +AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI +ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO +ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU +ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV +ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA +ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA +ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE +ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE +ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI +ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI +ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO +ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU +ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV +ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA +ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA +ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE +ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI +ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO +ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU +ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV +ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA +ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE +ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI +ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO +ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU +ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV +ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA +ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE +ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI +ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO +ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU +ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV +ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW +10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A +10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA +10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB +10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB +10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC +10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC +10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS +10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED +10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND +10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E +10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E +10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE +10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF +10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG +10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY +10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH +10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I +10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II +10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ +10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK +10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK +10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK +10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL +10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY +10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM +10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN +10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY +10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O +10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO +10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE +10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE +10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE +10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP +10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP +10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER +10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER +10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES +10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ +10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET +10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT +10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY +10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH +10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U +10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU +10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE +10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE +10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV +10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ +10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS +10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN +10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA +118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A +118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI +118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU +118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA +118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO +118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II +118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU +118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E +118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O +118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG +118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA +118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO +118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY +118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ +118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC +118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN +118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD +118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE +118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG +118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA +118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT +118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM +118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU +118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU +118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO +118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO +118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR +118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR +118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU +118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII +118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +" + +type CharMapping = + | CommonMapping of int + | SimpleMapping of int + | TurkishMapping of int + | FullMapping1 of int + | FullMapping2 of int*int + | FullMapping3 of int*int*int + +let hex2int c = (int c &&& 15) + (int c >>> 6)*9 // hex char to int + +let pCodePoint = + manyMinMaxSatisfyL 4 5 isHex "codepoint with 4-5 hex digits" + |>> fun s -> + let mutable n = 0 + for i = 0 to s.Length - 1 do + n <- n*16 + hex2int s.[i] + n + +let semi = skipString "; " +let space = skipChar ' ' + +let pCharMapping = + pipe3 pCodePoint (semi >>. anyChar) (semi >>. sepBy pCodePoint space .>> (semi >>. skipRestOfLine true)) + (fun fromChar c toChars -> + match c with + | 'C' -> match toChars with + | [n0] -> (fromChar, CommonMapping n0) + | 'S' -> match toChars with + | [n0] -> (fromChar, SimpleMapping n0) + | 'T' -> match toChars with + | [n0] -> (fromChar, TurkishMapping n0) + | 'F' -> match toChars with + | [n0] -> (fromChar, FullMapping1(n0)) + | [n0; n1] -> (fromChar, FullMapping2(n0, n1)) + | [n0; n1; n2] -> (fromChar, FullMapping3(n0, n1, n2))) + +let pAllMappings = many pCharMapping .>> eof + +let parseMappings() = + match run pAllMappings datastr with + | Success(xs, _,_) -> xs + | Failure(msg,_,_) -> failwith msg + +let getOneToOneMappings() = + parseMappings() + |> List.choose (function (src, CommonMapping(dst)) + | (src, SimpleMapping(dst)) + when src < 0xffff + -> Some (char src, char dst) + | _ -> None) + +let getOneToOneMappingsAsStrings() = + let pairs = getOneToOneMappings() + let sb = new System.Text.StringBuilder() + for c1, c2 in pairs do + let c1s, c2s = (int c1).ToString("X4"), (int c2).ToString("X4") + sb.Append("\u").Append(c1s).Append("\u").Append(c2s) |> ignore + sb.ToString() + + +let writeOneToOneMappingsToFile(path) = + use file = new System.IO.StreamWriter(path, false, System.Text.Encoding.UTF8) + let one2ones = getOneToOneMappingsAsStrings() + file.WriteLine(one2ones) + file.Close() + +writeOneToOneMappingsToFile(@"c:\temp\one2onemappings.txt") + +*/ diff --git a/src/FParsecCS/CharSet.cs b/src/FParsecCS/CharSet.cs new file mode 100644 index 0000000..6dab748 --- /dev/null +++ b/src/FParsecCS/CharSet.cs @@ -0,0 +1,120 @@ +// Copyright (c) Stephan Tolksdorf 2008-2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Diagnostics; + +namespace FParsec { + +#if !LOW_TRUST + unsafe +#endif +internal sealed class CharSet { + private const int WordSize = 32; + private const int Log2WordSize = 5; + + private int Min; + private int Max; + private int BitTableMin; + private int[] BitTable; + private string CharsNotInBitTable; // We use a string here instead of a char[] because the + // .NET JITs tend to produce better code for loops involving strings. + + public CharSet(string chars) : this(chars, 32) {} + // because of mandatory bounds checking, we wouldn't get any advantage from a fixed size table + + public CharSet(string chars, int maxTableSize) { + if (chars.Length == 0) { + BitTableMin = Min = 0x10000; + Max = -1; + BitTable = new int[0]; + // charsNotInTable = null; + return; + } + if (maxTableSize < 4) maxTableSize = 4; + else if (maxTableSize > 0x10000/WordSize) maxTableSize = 0x10000/WordSize; + int maxTableBits = maxTableSize*WordSize; + + char prevChar = chars[0]; + Min = prevChar; + Max = prevChar; + BitTableMin = -1; + int bitTableMax = -1; + int nCharsNotInTable = 0; + for (int i = 1; i < chars.Length; ++i) { + char c = chars[i]; + if (c == prevChar) continue; // filter out repeated chars + prevChar = c; + int prevMin = Min; + if (c < Min) Min = c; + int prevMax = Max; + if (c > Max) Max = c; + if (BitTableMin < 0) { + // the first time the table range is exceeded the tableMin is set + if (Max - Min >= maxTableBits) { + BitTableMin = prevMin; // stays fixed + bitTableMax = prevMax; // will be updated later + nCharsNotInTable = 1; + } + } else if (c < BitTableMin || c >= BitTableMin + maxTableBits) { + ++nCharsNotInTable; + } else { + bitTableMax = Math.Max(c, bitTableMax); + } + } + if (BitTableMin < 0) { + BitTableMin = Min; + bitTableMax = Max; + } + int tableSize = bitTableMax - BitTableMin + 1 < maxTableBits + ? (bitTableMax - BitTableMin + 1)/WordSize + ((bitTableMax - BitTableMin + 1)%WordSize != 0 ? 1 : 0) + : maxTableSize; + BitTable = new int[tableSize]; + + #if LOW_TRUST + var notInTable = nCharsNotInTable > 0 ? new char[nCharsNotInTable] : null; + #else + CharsNotInBitTable = nCharsNotInTable > 0 ? new string('\u0000', nCharsNotInTable) : ""; + fixed (char* notInTable = CharsNotInBitTable) { + #endif + prevChar = chars[0] != 'x' ? 'x' : 'y'; + int n = 0; + for (int i = 0; i < chars.Length; ++i) { + char c = chars[i]; + if (c == prevChar) continue; + prevChar = c; + int off = c - BitTableMin; + int idx = off >> Log2WordSize; + if (unchecked((uint)idx) < (uint)BitTable.Length) { + BitTable[idx] |= 1 << off; // we don't need to mask off because C#'s operator<< does that for us + } else { + notInTable[n++] = c; + } + } + Debug.Assert(n == nCharsNotInTable); + #if !LOW_TRUST + } + #else + if (nCharsNotInTable > 0) CharsNotInBitTable = new string(notInTable); + #endif + } + + public bool Contains(char value) { + int off = value - BitTableMin; + int idx = off >> Log2WordSize; + if (unchecked((uint)idx) < (uint)BitTable.Length) { + return ((BitTable[idx] >> off) & 1) != 0; // we don't need to mask off because C#'s operator>> does that for us + } + if (CharsNotInBitTable == null) return false; + if (value >= Min && value <= Max) { + foreach (char c in CharsNotInBitTable) { + if (c == value) goto ReturnTrue; + } + } + return false; + ReturnTrue: + return true; + } +} + +} \ No newline at end of file diff --git a/src/FParsecCS/CharStream.cs b/src/FParsecCS/CharStream.cs new file mode 100644 index 0000000..05e163b --- /dev/null +++ b/src/FParsecCS/CharStream.cs @@ -0,0 +1,3925 @@ +// Copyright (c) Stephan Tolksdorf 2007-2012 +// License: Simplified BSD License. See accompanying documentation. + +#if !LOW_TRUST + +using System; +using System.IO; +using System.Collections.Generic; +using System.Text; +using System.Text.RegularExpressions; +using System.Diagnostics; +using System.Reflection; +using System.Runtime.Serialization; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +using Microsoft.FSharp.Core; + +using FParsec.Cloning; + +namespace FParsec { + +/// An opaque representation of a CharStream index. +public unsafe struct CharStreamIndexToken { +#if DEBUG + internal readonly CharStream CharStream; + private long Index { get { return GetIndex(CharStream); } } +#endif + internal readonly char* Ptr; + private readonly int BlockPlus1; + /// Returns -1 if the IndexToken was zero-initialized. + internal int Block { get { return unchecked(BlockPlus1 - 1); } } + + internal CharStreamIndexToken( + #if DEBUG + CharStream charStream, + #endif + char* ptr, + int block) + { + #if DEBUG + CharStream = charStream; + #endif + Ptr = ptr; + BlockPlus1 = unchecked(block + 1); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidIndexToken() { + throw new InvalidOperationException("The CharStreamIndexToken is invalid."); + } + + public long GetIndex(CharStream charStreamFromWhichIndexTokenWasRetrieved) { + int block = Block; + if (block < 0) ThrowInvalidIndexToken(); // tests for a zero-initialized IndexToken + #if DEBUG + Debug.Assert(CharStream == charStreamFromWhichIndexTokenWasRetrieved); + #endif + return charStreamFromWhichIndexTokenWasRetrieved.GetIndex(Ptr, block); + } +} + +public struct TwoChars : IEquatable { + private uint Chars; + + internal TwoChars(uint chars) { + Chars = chars; + } + public TwoChars(char char0, char char1) { + Chars = ((uint)char1 << 16) | (uint)char0; + } + + public char Char0 { get { return unchecked((char)Chars); } } + public char Char1 { get { return (char)(Chars >> 16); } } + + public override bool Equals(object obj) { return (obj is TwoChars) && Chars == ((TwoChars) obj).Chars; } + public bool Equals(TwoChars other) { return Chars == other.Chars; } + public override int GetHashCode() { return unchecked((int)Chars); } + public static bool operator==(TwoChars left, TwoChars right) { return left.Chars == right.Chars; } + public static bool operator!=(TwoChars left, TwoChars right) { return left.Chars != right.Chars; } +} + +/// Provides read‐access to a sequence of UTF‐16 chars. +public unsafe class CharStream : IDisposable { + + // In order to facilitate efficient backtracking we divide the stream into overlapping + // blocks with equal number of chars. The blocks are overlapping, so that + // backtracking over short distances at a block boundary doesn't trigger a reread of the + // previous block. + // + // Block 0 + // + // -----------------|-------- Block 1 + // Overlap + // --------|--------|-------- Block 2 + // Overlap + // --------|--------|-------- + // (...) + // a '-' symbolizes a char, a '|' a block boundary. + // + // + // In general there's no fixed relationship between the number of input bytes and the + // number of input chars. Worse, the encoding can be stateful, which makes it necessary + // to persist the decoder state over block boundaries. If we later want to + // be able to reread a certain block, we therefore need to keep record of various + // bits of information describing the state of the input stream at the beginning of a block: + + private class BlockInfo { + /// the byte stream index of the first char in the block after the OverhangCharsAtBlockBegin + public long ByteIndex; + /// the value of the CharStream's ByteBufferIndex before the block is read + public int ByteBufferIndex; + + /// the number of bytes in the stream from ByteIndex to the first char after the OverhangCharsAfterOverlap + public int NumberOfBytesInOverlap; + + /// the last char in the overlap with the previous block (used for integrity checking) + public char LastCharInOverlap; + + /// chars at the block begin that were already read together with chars of the last block before the overlap + public string OverhangCharsAtBlockBegin; + /// chars after the overlap with the previous block that were already read together with the overlap chars + public string OverhangCharsAfterOverlap; + + // Unfortunately the Decoder API has no explicit methods for managing the state, + // which forces us to use the comparatively inefficient serialization API + // (via FParsec.Cloning) for this purpose. + // The absence of explicit state management or at least a cloning method in the + // Decoder interface is almost as puzzling to me as the absence of such methods + // in System.Random. + + public CloneImage DecoderImageAtBlockBegin; + public CloneImage DecoderImageAfterOverlap; + + public BlockInfo(long byteIndex, int byteBufferIndex, + int nBytesInOverlapCount, char lastCharInOverlap, + string overhangCharsAtBlockBegin, CloneImage decoderImageAtBlockBegin, + string overhangCharsAfterOverlap, CloneImage decoderImageAfterOverlap) + { + ByteIndex = byteIndex; + ByteBufferIndex = byteBufferIndex; + NumberOfBytesInOverlap = nBytesInOverlapCount; + LastCharInOverlap = lastCharInOverlap; + OverhangCharsAtBlockBegin = overhangCharsAtBlockBegin; + OverhangCharsAfterOverlap = overhangCharsAfterOverlap; + DecoderImageAtBlockBegin = decoderImageAtBlockBegin; + DecoderImageAfterOverlap = decoderImageAfterOverlap; + } + } + + private const int DefaultBlockSize = 3*(1 << 16); // 3*2^16 = 200k + private const int DefaultByteBufferLength = (1 << 12); + private static int MinimumByteBufferLength = 128; // must be larger than longest detectable preamble (we can only guess here) + private const char EOS = '\uFFFF'; + + public const char EndOfStreamChar = EOS; + + /// Points to the current char in Buffer, + /// or is null if the end of the stream has been reached. + internal char* Ptr; + /// Equals Ptr == null ? null : BufferBegin. + internal char* PtrBegin; + /// Equals Ptr == null ? null : BufferEnd. + internal char* PtrEnd; + + /// Begin of the used part of the char buffer. Is constant. Is null if the CharStream is empty. + internal char* BufferBegin; + /// End of the used part of the char buffer. Varies for a multi-block stream. Is null if the CharStream is empty. + internal char* BufferEnd; + + /// The block currently loaded in the buffer. + internal int Block; + + /// Any CharStream method or property setter increments this value when it changes the CharStream state. + /// Backtracking to an old state also restores the old value of the StateTag. + public +#if SMALL_STATETAG + int +#else + long +#endif + StateTag; + + internal long IndexOfFirstCharInBlock; + + internal long _IndexOfFirstChar; + /// The index of the first char in the stream. + public long IndexOfFirstChar { get { return _IndexOfFirstChar; } } + + internal long _Line; + /// The line number for the next char. (The line count starts with 1.) + public long Line { get { return _Line; } } + public void SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(long line) { + _Line = line; + } + + internal long _LineBegin; + /// The stream index of the first char of the line that also contains the next char. + public long LineBegin { get { return _LineBegin; } } + public void SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(long lineBegin) { + _LineBegin = lineBegin; + } + + /// The UTF‐16 column number of the next char, i.e. Index ‐ LineBegin + 1. + public long Column { get { return Index - LineBegin + 1; } } + + internal string _Name; + public string Name { + get { return _Name; } + set { _Name = value; ++StateTag; } + } + + /// The Encoding that is used for decoding the underlying byte stream, or + /// System.Text.UnicodeEncoding in case the stream was directly constructed + /// from a string or char buffer. + public Encoding Encoding { get; private set; } + + // If the CharStream is constructed from a binary stream, we use a managed string as the char + // buffer. This allows us to apply regular expressions directly to the input. + // In the case of multi-block CharStreams we thus have to mutate the buffer string through pointers. + // This is safe as long as we use a newly constructed string and we don't pass a reference + // to the internal buffer string to the "outside world". (The one instance where we have to pass + // a reference to the buffer string is regex matching. See the docs for Match(regex) for more info.) + // + // Apart from Match(regex) we access the internal buffer only through a pinned pointer. + // This way we avoid the overhead of redundant bounds checking and can support strings, char arrays + // and unmanaged char buffers through the same interface. + // + // Pinning a string or char array makes life more difficult for the GC. However, as long as + // the buffer is only short-lived or large enough to be allocated on the large object heap, + // there shouldn't be a problem. Furthermore, the buffer strings for CharStreams constructed + // from a binary stream are allocated through the StringBuffer interface and hence always live + // on the large object heap. Thus, the only scenario to really worry about (and which the + // documentation explicitly warns about) is when a large number of small CharStreams + // are constructed directly from strings or char arrays and are used for an extended period of time. + + /// The string holding the char buffer, or null if the buffer is not part of a .NET string. + internal string BufferString; + /// A pointer to the beginning of BufferString, or null if BufferString is null. + internal char* BufferStringPointer; + + /// Holds the GCHandle for CharStreams directly constructed from strings or char arrays. + private GCHandle BufferHandle; + /// Holds the StringBuffer for CharStreams constructed from a binary stream. + private StringBuffer StringBuffer; + +#if DEBUG + internal FSharpRef SubstreamCount = new FSharpRef(0); + internal FSharpRef ParentSubstreamCount = null; +#endif + + private MultiBlockData BlockData; + internal bool IsSingleBlockStream { get { return BlockData == null; } } + + /// Contains the data and methods needed in case the input byte stream + /// is large enough to span multiple blocks of the CharStream. + private partial class MultiBlockData { + public CharStream CharStream; + + public long IndexOfLastCharPlus1; + + /// The index of the last block of the stream, or Int32.MaxValue if the end of stream has not yet been detected. + public int LastBlock; + + public Stream Stream; + // we keep a separate record of the Stream.Position, so that we don't need to require Stream.CanSeek + public long StreamPosition; + // we use StreamLength to avoid calling Read() again on a non-seekable stream after it returned 0 once (see ticket #23) + public long StreamLength; + public bool LeaveOpen; + + public int MaxCharCountForOneByte; + public Decoder Decoder; + public bool DecoderIsSerializable; + + public int BlockSize; + public int BlockOverlap; + /// BufferBegin + BlockSize - minRegexSpace + public char* RegexSpaceThreshold; + + /// The byte stream index of the first unused byte in the ByteBuffer. + public long ByteIndex { get { return StreamPosition - (ByteBufferCount - ByteBufferIndex); } } + + public List Blocks; + + public byte[] ByteBuffer; + public int ByteBufferIndex; + public int ByteBufferCount; + } + + public long IndexOfLastCharPlus1 { get { + return BlockData != null ? BlockData.IndexOfLastCharPlus1 + : IndexOfFirstChar + Buffer.PositiveDistance(BufferBegin, BufferEnd); + } } + + public int BlockOverlap { get { + return BlockData == null ? 0 : BlockData.BlockOverlap; + } } + + public int MinRegexSpace { + get { + return BlockData == null + ? 0 + : (int)Buffer.PositiveDistance(BlockData.RegexSpaceThreshold, + BufferBegin + BlockData.BlockSize); + } + set { + if (BlockData != null) { + if (value < 0 || value > BlockData.BlockOverlap) throw new ArgumentOutOfRangeException("value", "The MinRegexSpace value must be non-negative and not greater than the BlockOverlap."); + BlockData.RegexSpaceThreshold = BufferBegin + BlockData.BlockSize - value; + } + } + } + + public bool IsBeginOfStream { get { return Ptr == BufferBegin && Block == 0; } } + public bool IsEndOfStream { get { return Ptr == null; } } + + + public long Index { + #if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] + #endif + get { + if (Ptr != null) { + Debug.Assert(BufferBegin <= Ptr && Ptr < BufferEnd); + if (sizeof(System.IntPtr) != 8) // the JIT removes the inactive branch + return Buffer.PositiveDistance(PtrBegin, Ptr) + IndexOfFirstCharInBlock; + else + return Buffer.PositiveDistance64(PtrBegin, Ptr) + IndexOfFirstCharInBlock; + } + Debug.Assert(BlockData == null || BlockData.IndexOfLastCharPlus1 != Int64.MaxValue); + return IndexOfLastCharPlus1; + } + } + + internal long GetIndex(char* ptr, int block) { + if (ptr != null) { + if (block == Block) { + Debug.Assert(BufferBegin <= ptr && ptr < BufferEnd); + if (sizeof(System.IntPtr) != 8) + return Buffer.PositiveDistance(BufferBegin, ptr) + IndexOfFirstCharInBlock; + else + return Buffer.PositiveDistance64(BufferBegin, ptr) + IndexOfFirstCharInBlock; + } else { + Debug.Assert(BlockData != null && BufferBegin <= ptr && ptr < BufferBegin + BlockData.BlockSize); + int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; + long indexOfBlockBegin = IndexOfFirstChar + Math.BigMul(block, blockSizeMinusOverlap); + if (sizeof(System.IntPtr) != 8) + return Buffer.PositiveDistance(BufferBegin, ptr) + indexOfBlockBegin; + else + return Buffer.PositiveDistance64(BufferBegin, ptr) + indexOfBlockBegin; + } + } + Debug.Assert(BlockData == null || BlockData.IndexOfLastCharPlus1 != Int64.MaxValue); + return IndexOfLastCharPlus1; + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public Position Position { get { + long index = Index; + return new Position(_Name, index, Line, index - LineBegin + 1); + } } + + // we don't have a public constructor that only takes a string to avoid potential confusion with a filepath constructor + internal CharStream(string chars) { + Debug.Assert(chars != null); + BufferString = chars; + BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); + char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject(); + BufferStringPointer = bufferBegin; + CharConstructorContinue(bufferBegin, chars.Length); + } + + public CharStream(string chars, int index, int length) : this(chars, index, length, 0) {} + public CharStream(string chars, int index, int length, long streamIndexOffset) { + if (chars == null) throw new ArgumentNullException("chars"); + if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); + if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); + if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); + IndexOfFirstCharInBlock = streamIndexOffset; + _IndexOfFirstChar = streamIndexOffset; + _LineBegin = streamIndexOffset; + + BufferString = chars; + BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); + char* pBufferString = (char*)BufferHandle.AddrOfPinnedObject(); + BufferStringPointer = pBufferString; + CharConstructorContinue(pBufferString + index, length); + } + + public CharStream(char[] chars, int index, int length) : this(chars, index, length, 0) { } + public CharStream(char[] chars, int index, int length, long streamIndexOffset) { + if (chars == null) throw new ArgumentNullException("chars"); + if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); + if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); + if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); + IndexOfFirstCharInBlock = streamIndexOffset; + _IndexOfFirstChar = streamIndexOffset; + _LineBegin = streamIndexOffset; + + BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); + char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject() + index; + if (bufferBegin < unchecked(bufferBegin + length + 1)) { // a pedantic check ... + CharConstructorContinue(bufferBegin, length); + } else { + // ... for a purely theoretic case + BufferHandle.Free(); + throw new ArgumentOutOfRangeException("length", "The char array may not be allocated directly below the end of the address space."); + } + + } + + public CharStream(char* chars, int length) : this(chars, length, 0) {} + public CharStream(char* chars, int length, long streamIndexOffset) { + if (chars == null) throw new ArgumentNullException("chars"); + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + if (chars >= unchecked(chars + length + 1)) // chars + length + 1 must not overflow (the + 1 is needed for some methods below) + throw new ArgumentOutOfRangeException("length", "length is too large."); + if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); + IndexOfFirstCharInBlock = streamIndexOffset; + _IndexOfFirstChar = streamIndexOffset; + _LineBegin = streamIndexOffset; + CharConstructorContinue(chars, length); + } + + private void CharConstructorContinue(char* bufferBegin, int length) { + Debug.Assert((bufferBegin != null || length == 0) && length >= 0 + && bufferBegin < unchecked(bufferBegin + length + 1)); // the + 1 is needed for some methods below + + if (length != 0) { + BufferBegin = bufferBegin; + BufferEnd = bufferBegin + length; + Ptr = bufferBegin; + PtrBegin = bufferBegin; + PtrEnd = BufferEnd; + } + _Line = 1; + Encoding = Encoding.Unicode; + } + + internal CharStream(string chars, char* pChars, char* begin, int length) { + Debug.Assert((chars == null ? pChars == null + : pChars <= begin && length >= 0 && (int)Buffer.PositiveDistance(pChars, begin) <= chars.Length - length) + && (begin == null ? length == 0 + : length >= 0 && begin < unchecked(begin + length + 1))); + + BufferString = chars; + BufferStringPointer = pChars; + if (length != 0) { + BufferBegin = begin; + BufferEnd = begin + length; + Ptr = begin; + PtrBegin = begin; + PtrEnd = BufferEnd; + } + _Line = 1; + Encoding = Encoding.Unicode; + } + + public CharStream(string path, Encoding encoding) + : this(path, encoding, true, + DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : this(path, encoding, detectEncodingFromByteOrderMarks, + DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, + int blockSize, int blockOverlap, int byteBufferLength) + { + if (encoding == null) throw new ArgumentNullException("encoding"); + var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan); + try { + StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks, + blockSize, blockOverlap, byteBufferLength); + _Name = path; + } catch { + stream.Dispose(); + throw; + } + } + + public CharStream(Stream stream, Encoding encoding) + : this(stream, + false, encoding, true, + DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding) + : this(stream, + leaveOpen, encoding, true, + DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : this(stream, + leaveOpen, encoding, detectEncodingFromByteOrderMarks, + DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, + Encoding encoding, bool detectEncodingFromByteOrderMarks, + int blockSize, int blockOverlap, int byteBufferLength) + { + if (stream == null) throw new ArgumentNullException("stream"); + if (!stream.CanRead) throw new ArgumentException("stream is not readable"); + if (encoding == null) throw new ArgumentNullException("encoding"); + StreamConstructorContinue(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, + blockSize, blockOverlap, byteBufferLength); + } + + /// we modify this flag via reflection in the unit test + private static bool DoNotRoundUpBlockSizeToSimplifyTesting = false; + + private void StreamConstructorContinue(Stream stream, bool leaveOpen, + Encoding encoding, bool detectEncodingFromByteOrderMarks, + int blockSize, int blockOverlap, int byteBufferLength) + { + if (byteBufferLength < MinimumByteBufferLength) byteBufferLength = MinimumByteBufferLength; + + int remainingBytesCount = -1; + long streamPosition; + long streamLength; + if (stream.CanSeek) { + streamPosition = stream.Position; + streamLength = stream.Length; + long remainingBytesCount64 = streamLength - streamPosition; + if (remainingBytesCount64 <= Int32.MaxValue) { + remainingBytesCount = (int)remainingBytesCount64; + if (remainingBytesCount < byteBufferLength) byteBufferLength = remainingBytesCount; + } + } else { + streamPosition = 0; + streamLength = Int64.MaxValue; + } + + byte[] byteBuffer = new byte[byteBufferLength]; + int byteBufferCount = 0; + do { + int n = stream.Read(byteBuffer, byteBufferCount, byteBufferLength - byteBufferCount); + if (n == 0) { + remainingBytesCount = byteBufferCount; + Debug.Assert(!stream.CanSeek || streamPosition + byteBufferCount == streamLength); + streamLength = streamPosition + byteBufferCount; + break; + } + byteBufferCount += n; + } while (byteBufferCount < MinimumByteBufferLength); + streamPosition += byteBufferCount; + + int preambleLength = Text.DetectPreamble(byteBuffer, byteBufferCount, ref encoding, detectEncodingFromByteOrderMarks); + remainingBytesCount -= preambleLength; + + _Line = 1; + Encoding = encoding; + + // we allow such small block sizes only to simplify testing + if (blockSize < 8) blockSize = DefaultBlockSize; + + bool allCharsFitIntoOneBlock = false; + if (remainingBytesCount >= 0 && remainingBytesCount/4 <= blockSize) { + if (remainingBytesCount != 0) { + try { + int maxCharCount = Encoding.GetMaxCharCount(remainingBytesCount); // may throw ArgumentOutOfRangeException + if (blockSize >= maxCharCount) { + allCharsFitIntoOneBlock = true; + blockSize = maxCharCount; + } + } catch (ArgumentOutOfRangeException) { } + } else { + allCharsFitIntoOneBlock = true; + blockSize = 0; + } + } + var buffer = StringBuffer.Create(blockSize); + Debug.Assert(buffer.Length >= blockSize && (blockSize > 0 || buffer.StringPointer == null)); + StringBuffer = buffer; + BufferString = buffer.String; + BufferStringPointer = buffer.StringPointer; + char* bufferBegin = buffer.StringPointer + buffer.Index; + try { + Decoder decoder = encoding.GetDecoder(); + if (allCharsFitIntoOneBlock) { + int bufferCount = preambleLength == byteBufferCount + ? 0 + : Text.ReadAllRemainingCharsFromStream(bufferBegin, buffer.Length, byteBuffer, preambleLength, byteBufferCount, stream, streamPosition, decoder, streamPosition == streamLength); + if (!leaveOpen) stream.Close(); + if (bufferCount != 0) { + BufferBegin = bufferBegin; + Ptr = bufferBegin; + PtrBegin = bufferBegin; + BufferEnd = bufferBegin + bufferCount; + PtrEnd = BufferEnd; + } + Block = 0; + } else { + if (!DoNotRoundUpBlockSizeToSimplifyTesting) blockSize = buffer.Length; + BufferBegin = bufferBegin; + BufferEnd = bufferBegin; + var d = new MultiBlockData(); + BlockData = d; + d.CharStream = this; + d.Stream = stream; + d.StreamPosition = streamPosition; + d.StreamLength = streamLength; + d.LeaveOpen = leaveOpen; + d.Decoder = decoder; + d.DecoderIsSerializable = decoder.GetType().IsSerializable; + d.ByteBuffer = byteBuffer; + d.ByteBufferIndex = preambleLength; + d.ByteBufferCount = byteBufferCount; + d.MaxCharCountForOneByte = Math.Max(1, Encoding.GetMaxCharCount(1)); + if (d.MaxCharCountForOneByte > 1024) // an arbitrary limit low enough that a char array with this size can be allocated on the stack + throw new ArgumentException("The CharStream class does not support Encodings with GetMaxCharCount(1) > 1024."); + if (blockSize < 3*d.MaxCharCountForOneByte) blockSize = 3*d.MaxCharCountForOneByte; + // MaxCharCountForOneByte == the maximum number of overhang chars + if( Math.Min(blockOverlap, blockSize - 2*blockOverlap) < d.MaxCharCountForOneByte + || blockOverlap >= blockSize/2) blockOverlap = blockSize/3; + d.BlockSize = blockSize; + d.BlockOverlap = blockOverlap; + d.RegexSpaceThreshold = bufferBegin + (blockSize - 2*blockOverlap/3); + d.IndexOfLastCharPlus1 = Int64.MaxValue; + Block = -2; // special value recognized by ReadBlock + d.LastBlock = Int32.MaxValue; + d.Blocks = new List(); + // the first block has no overlap with a previous block + d.Blocks.Add(new BlockInfo(preambleLength, preambleLength, 0, EOS, null, null, null, null)); + d.ReadBlock(0); + if (d.LastBlock == 0) { + if (!d.LeaveOpen) d.Stream.Close(); + BlockData = null; + } + } + } catch { + buffer.Dispose(); + throw; + } + } + + public void Dispose() { + #if DEBUG + lock (SubstreamCount) { + if (SubstreamCount.Value != 0) + throw new InvalidOperationException("A CharStream must not be disposed before all of its Substreams have been disposed."); + } + if (ParentSubstreamCount != null) { + lock (ParentSubstreamCount) --ParentSubstreamCount.Value; + } + #endif + if (BufferHandle.IsAllocated) BufferHandle.Free(); + if (StringBuffer != null) StringBuffer.Dispose(); + if (BlockData != null && !BlockData.LeaveOpen) BlockData.Stream.Close(); + Ptr = null; + PtrBegin = null; + PtrEnd = null; + BufferBegin = null; + BufferEnd = null; + } + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification="The CharStream is manually disposed.")] + public static T ParseString(string chars, int index, int length, + FSharpFunc,T> parser, + TUserState userState, + string streamName) + { + if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); + if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "length is out of range."); + fixed (char* pChars = chars) { + var stream = new CharStream(chars, pChars, pChars + index, length); + stream.UserState = userState; + stream._Name = streamName; + try { + return parser.Invoke(stream); + } finally { + #if DEBUG + stream.Dispose(); + #else + // manually dispose stream + stream.Ptr = null; + stream.PtrBegin = null; + stream.PtrEnd = null; + stream.BufferBegin = null; + stream.BufferEnd = null; + #endif + } + } + } + + private partial class MultiBlockData { + /// Refills the ByteBuffer if no unused byte is remaining. + /// Returns the number of unused bytes in the (refilled) ByteBuffer. + private int FillByteBuffer() { + int n = ByteBufferCount - ByteBufferIndex; + if (n > 0) return n; + return ClearAndRefillByteBuffer(0); + } + + /// Refills the ByteBuffer starting at the given index. If the underlying byte + /// stream contains enough bytes, the ByteBuffer is filled up to the ByteBuffer.Length. + /// Returns the number of bytes available for consumption in the refilled ByteBuffer. + private int ClearAndRefillByteBuffer(int byteBufferIndex) { + Debug.Assert(byteBufferIndex >= 0 && byteBufferIndex <= ByteBuffer.Length); + // Stream.Read is not guaranteed to use all the provided output buffer, so we need + // to call it in a loop when we want to rely on the buffer being fully filled + // (unless we reach the end of the stream). Knowing that the buffer always gets + // completely filled allows us to calculate the buffer utilization after skipping + // a certain number of input bytes. For most streams there will be only one loop + // iteration anyway (or two at the end of the stream). + int i = byteBufferIndex; + int m = ByteBuffer.Length - byteBufferIndex; + while (m != 0 && StreamPosition != StreamLength) { // we check the StreamPosition to avoid calling Read after it returned 0 at the end of the stream (see ticket #23) + int c = Stream.Read(ByteBuffer, i, m); + if (c != 0) { + i += c; + m -= c; + StreamPosition += c; + } else { + Debug.Assert(!Stream.CanSeek || StreamPosition == StreamLength); + StreamLength = StreamPosition; + break; + } + } + int n = i - byteBufferIndex; + ByteBufferIndex = byteBufferIndex; + ByteBufferCount = byteBufferIndex + n; + return n; + } + + /// Reads up to the given maximum number of chars into the given buffer. + /// If more than the maximum number of chars have to be read from the stream in order to + /// fill the buffer (due to the way the Decoder API works), the overhang chars are + /// returned through the output parameter. + /// Returns a pointer to one char after the last char read. + private char* ReadCharsFromStream(char* buffer, int maxCount, out string overhangChars) { + Debug.Assert(maxCount >= 0); + fixed (byte* byteBuffer = ByteBuffer) { + overhangChars = null; + try { + while (maxCount >= MaxCharCountForOneByte) {// if maxCount < MaxCharCountForOneByte, Convert could throw + int nBytesInByteBuffer = FillByteBuffer(); + bool flush = nBytesInByteBuffer == 0; + int bytesUsed, charsUsed; bool completed = false; + Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer, + buffer, maxCount, flush, + out bytesUsed, out charsUsed, out completed); + ByteBufferIndex += bytesUsed; // GetChars consumed bytesUsed bytes from the byte buffer + buffer += charsUsed; + maxCount -= charsUsed; + if (flush && completed) return buffer; + } + if (maxCount == 0) return buffer; + + char* cs = stackalloc char[MaxCharCountForOneByte]; + for (;;) { + int nBytesInByteBuffer = FillByteBuffer(); + bool flush = nBytesInByteBuffer == 0; + int bytesUsed, charsUsed; bool completed; + Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer, + cs, MaxCharCountForOneByte, flush, + out bytesUsed, out charsUsed, out completed); + ByteBufferIndex += bytesUsed; + if (charsUsed > 0) { + int i = 0; + do { + *buffer = cs[i]; + ++buffer; ++i; + if (--maxCount == 0) { + if (i < charsUsed) overhangChars = new string(cs, i, charsUsed - i); + return buffer; + } + } while (i < charsUsed); + } + if (flush && completed) return buffer; + } + } catch (DecoderFallbackException e) { + e.Data.Add("Stream.Position", ByteIndex + e.Index); + throw; + } + } + } + + /// Reads a block of chars (which must be different from the current block) + /// into the BufferString. If the current CharStream block is block - 1, this method + /// seeks the CharStream to the first char after the overlap of the two blocks. + /// Otherwise it seeks the CharStream to the first char in the block. It returns the + /// CharStream.Ptr value at the new position (which can be null). + internal char* ReadBlock(int block) { + int prevBlock = CharStream.Block; + if (block == prevBlock) throw new InvalidOperationException(); + if (!DecoderIsSerializable && block > 0) { + if (prevBlock > block) + throw new NotSupportedException("The CharStream does not support seeking backwards over ranges longer than the block overlap because the Encoding's Decoder is not serializable. The decoder has the type: " + Decoder.GetType().FullName); + while (prevBlock + 1 < block) ReadBlock(++prevBlock); + } + + BlockInfo bi = Blocks[block]; // will throw if block is out of range + int blockSizeMinusOverlap = BlockSize - BlockOverlap; + long charIndex = Math.BigMul(block, blockSizeMinusOverlap); + char* bufferBegin = CharStream.BufferBegin; + char* begin, buffer; + int nCharsToRead; + + // fill [0 ... BlockOverlap-1] if block > 0 + if (prevBlock == block - 1) { + Buffer.Copy((byte*)bufferBegin, (byte*)(bufferBegin + blockSizeMinusOverlap), + BlockOverlap*sizeof(char)); + Debug.Assert(bufferBegin[BlockOverlap - 1] == bi.LastCharInOverlap); + begin = buffer = bufferBegin + BlockOverlap; + } else if (prevBlock >= 0) { + Stream.Seek(bi.ByteIndex, SeekOrigin.Begin); // will throw if Stream can't seek + // now that there was no exception, we can change the state... + StreamPosition = bi.ByteIndex; + ClearAndRefillByteBuffer(bi.ByteBufferIndex); + if (block != 0) + Decoder = (Decoder)bi.DecoderImageAtBlockBegin.CreateClone(); + else + Decoder.Reset(); + if (prevBlock == block + 1) { + // move the overlap into [BlockSize - BlockOverlap, BlockSize - 1] before it gets overwritten + Buffer.Copy((byte*)(bufferBegin + blockSizeMinusOverlap), (byte*)bufferBegin, + BlockOverlap*sizeof(char)); + } + begin = buffer = bufferBegin; + if (block > 0) { + nCharsToRead = BlockOverlap; + if (bi.OverhangCharsAtBlockBegin != null) { + nCharsToRead -= bi.OverhangCharsAtBlockBegin.Length; + for (int i = 0; i < bi.OverhangCharsAtBlockBegin.Length; ++i) + *(buffer++) = bi.OverhangCharsAtBlockBegin[i]; + } + string overhangCharsAfterOverlap; + buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlap); + if ( buffer != bufferBegin + BlockOverlap + || ByteIndex != bi.ByteIndex + bi.NumberOfBytesInOverlap + || *(buffer - 1) != bi.LastCharInOverlap + || overhangCharsAfterOverlap != bi.OverhangCharsAfterOverlap) + throw new IOException("CharStream: stream integrity error"); + } + } else { // ReadBlock was called from the constructor + if (block != 0) throw new InvalidOperationException(); + begin = buffer = bufferBegin; + } + + // fill [0 ... BlockSize-BlockOverlap-1] if block == 0 + // and [BlockOverlap ... BlockSize-BlockOverlap-1] otherwise + if (block == 0) { + nCharsToRead = blockSizeMinusOverlap; + } else { + nCharsToRead = blockSizeMinusOverlap - BlockOverlap; + if (bi.OverhangCharsAfterOverlap != null) { + nCharsToRead -= bi.OverhangCharsAfterOverlap.Length; + for (int i = 0; i < bi.OverhangCharsAfterOverlap.Length; ++i) + *(buffer++) = bi.OverhangCharsAfterOverlap[i]; + } + } + string overhangCharsAtNextBlockBegin; + buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAtNextBlockBegin); + + long byteIndexAtNextBlockBegin = ByteIndex; + int byteBufferIndexAtNextBlockBegin = ByteBufferIndex; + + // fill [BlockSize-BlockOverlap ... BlockSize-1] + if (block == Blocks.Count - 1) { // next block hasn't yet been read + Cloner cloner = null; + CloneImage decoderImageAtNextBlockBegin = null; + if (DecoderIsSerializable) { + cloner = Cloner.Create(Decoder.GetType()); + decoderImageAtNextBlockBegin = cloner.CaptureImage(Decoder); + } + nCharsToRead = BlockOverlap; + if (overhangCharsAtNextBlockBegin != null) { + nCharsToRead -= overhangCharsAtNextBlockBegin.Length; + for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i) + *(buffer++) = overhangCharsAtNextBlockBegin[i]; + } + string overhangCharsAfterOverlapWithNextBlock; + buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock); + if (LastBlock == Int32.MaxValue) { // last block hasn't yet been detected + if (buffer == bufferBegin + BlockSize) { + var decoderImageAfterOverlapWithNextBlock = + !DecoderIsSerializable ? null : cloner.CaptureImage(Decoder); + int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin); + Blocks.Add(new BlockInfo(byteIndexAtNextBlockBegin, byteBufferIndexAtNextBlockBegin, + nBytesInOverlapWithNextBlock, *(buffer - 1), + overhangCharsAtNextBlockBegin, decoderImageAtNextBlockBegin, + overhangCharsAfterOverlapWithNextBlock, decoderImageAfterOverlapWithNextBlock)); + } else { // we reached the end of the stream + LastBlock = block; + IndexOfLastCharPlus1 = CharStream.IndexOfFirstChar + charIndex + (buffer - bufferBegin); + } + } else if (IndexOfLastCharPlus1 != CharStream.IndexOfFirstChar + charIndex + (buffer - bufferBegin)) { + throw new IOException("CharStream: stream integrity error"); + } + } else { + BlockInfo nbi = Blocks[block + 1]; + if (buffer != bufferBegin + blockSizeMinusOverlap + || byteIndexAtNextBlockBegin != nbi.ByteIndex + || byteBufferIndexAtNextBlockBegin != nbi.ByteBufferIndex + || overhangCharsAtNextBlockBegin != nbi.OverhangCharsAtBlockBegin) + throw new IOException("CharStream: stream integrity error"); + + if (prevBlock != block + 1 || (block == 0 && !DecoderIsSerializable)) { // jumping back to block 0 is supported even if the decoder is not serializable + nCharsToRead = BlockOverlap; + if (overhangCharsAtNextBlockBegin != null) { + nCharsToRead -= overhangCharsAtNextBlockBegin.Length; + for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i) + *(buffer++) = overhangCharsAtNextBlockBegin[i]; + } + string overhangCharsAfterOverlapWithNextBlock; + buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock); + int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin); + if (buffer != bufferBegin + BlockSize + || nBytesInOverlapWithNextBlock != nbi.NumberOfBytesInOverlap + || *(buffer - 1) != nbi.LastCharInOverlap + || overhangCharsAfterOverlapWithNextBlock != nbi.OverhangCharsAfterOverlap) + throw new IOException("CharStream: stream integrity error"); + } else { + Debug.Assert(bufferBegin[BlockSize - 1] == nbi.LastCharInOverlap); + buffer += BlockOverlap; // we already copied the chars at the beginning of this function + int off = nbi.NumberOfBytesInOverlap - (ByteBufferCount - ByteBufferIndex); + if (off > 0) { + // we wouldn't have gotten here if the Stream didn't support seeking + Stream.Seek(off, SeekOrigin.Current); + StreamPosition += off; + ClearAndRefillByteBuffer(off%ByteBuffer.Length); + } else { + ByteBufferIndex += nbi.NumberOfBytesInOverlap; + } + Decoder = (Decoder)nbi.DecoderImageAfterOverlap.CreateClone(); + } + } + + CharStream.Block = block; + //CharStream.CharIndex = charIndex; + CharStream.IndexOfFirstCharInBlock = CharStream.IndexOfFirstChar + charIndex; + CharStream.BufferEnd = buffer; + if (begin != buffer) { + CharStream.Ptr = begin; + CharStream.PtrEnd = buffer; + CharStream.PtrBegin = CharStream.BufferBegin; + return begin; + } else { + CharStream.Ptr = null; + CharStream.PtrEnd = null; + CharStream.PtrBegin = null; + return null; + } + } + } // class MultiBlockData + + + /// Returns an iterator pointing to the given index in the stream, + /// or to the end of the stream if the indexed position lies beyond the last char in the stream. + /// The index is negative or less than the BeginIndex. + /// Accessing the char with the given index requires seeking in the underlying byte stream, but the byte stream does not support seeking or the Encoding's Decoder is not serializable. + /// An I/O error occured. + /// The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option. + /// The input stream contains invalid bytes for which the decoder fallback threw this exception. + /// Can not allocate enough memory for the internal data structure. + /// Method is called after the stream was disposed. + public void Seek(long index) { + ++StateTag; + // The following comparison is safe in case of an overflow since + // 0 <= IndexOfFirstCharInBlock < 2^60 + 2^31 * 2^31 and BufferEnd - BufferBegin < 2^31, + // where 2^31 is an upper bound for both the number of blocks and the number of chars in a block. + long off = unchecked(index - IndexOfFirstCharInBlock); + if (0 <= off && off < Buffer.PositiveDistance(BufferBegin, BufferEnd)) { + Ptr = BufferBegin + (uint)off; + PtrBegin = BufferBegin; + PtrEnd = BufferEnd; + return; + } + if (index < IndexOfFirstChar) { + --StateTag; + throw (new ArgumentOutOfRangeException("index", "The index is negative or less than the IndexOfFirstChar.")); + } + if (BlockData == null || index >= BlockData.IndexOfLastCharPlus1) { + Ptr = null; + PtrBegin = null; + PtrEnd = null; + return; + } + // we never get here for streams with only one block + index -= IndexOfFirstChar; + int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; + long idx_; + long block_ = Math.DivRem(index, blockSizeMinusOverlap, out idx_); + int block = block_ > Int32.MaxValue ? Int32.MaxValue : (int)block_; + int idx = (int)idx_; + Seek(block, idx); + } + + private void Seek(int block, int indexInBlock) { + Debug.Assert(block >= 0 && indexInBlock >= 0 && BlockData != null); + if (block > Block) { + if (indexInBlock < BlockData.BlockOverlap) { + --block; + indexInBlock += BlockData.BlockSize - BlockData.BlockOverlap; + } + } else if (block < Block) { + int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; + if (indexInBlock >= blockSizeMinusOverlap) { + ++block; + indexInBlock -= blockSizeMinusOverlap; + } + } + if (block == Block) { + Debug.Assert(indexInBlock < Buffer.PositiveDistance(BufferBegin, BufferEnd)); + PtrBegin = BufferBegin; + PtrEnd = BufferEnd; + } else { + int last = BlockData.Blocks.Count - 1; + if (block >= last) { + BlockData.ReadBlock(last); + while (Block < block && Block != BlockData.LastBlock) + BlockData.ReadBlock(Block + 1); + if (block != Block || indexInBlock >= Buffer.PositiveDistance(PtrBegin, PtrEnd)) { + Ptr = null; + PtrBegin = null; + PtrEnd = null; + return; + } + } else { + BlockData.ReadBlock(block); + Debug.Assert(indexInBlock < Buffer.PositiveDistance(PtrBegin, PtrEnd)); + } + } + Ptr = BufferBegin + indexInBlock; + } + + internal void Seek(char* ptr, int block) { + if (ptr != null) { + if (block != Block) { + Debug.Assert(BlockData != null && ptr >= BufferBegin && ptr < BufferBegin + BlockData.BlockSize); + int indexInBlock = (int)Buffer.PositiveDistance(BufferBegin, ptr); + Seek(block, indexInBlock); + } else { + Debug.Assert(ptr >= BufferBegin && ptr < BufferEnd); + Ptr = ptr; + PtrBegin = BufferBegin; + PtrEnd = BufferEnd; + } + } else { + Ptr = null; + PtrBegin = null; + PtrEnd = null; + } + } + + private void SeekToFirstCharAfterLastCharOfCurrentBlock() { + if (Ptr != null) { + if (BlockData != null && Block != BlockData.LastBlock) BlockData.ReadBlock(Block + 1); + else { + Ptr = null; + PtrBegin = null; + PtrEnd = null; + } + } + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public CharStreamIndexToken IndexToken { get { + return new CharStreamIndexToken( + #if DEBUG + this, + #endif + Ptr, + Block + ); + } } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidIndexToken() { + throw new ArgumentException("The CharStreamIndexToken is invalid."); + } + + public void Seek(CharStreamIndexToken indexToken) { + int block = indexToken.Block; + if (block < 0) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens + #if DEBUG + Debug.Assert(this == indexToken.CharStream); + #endif + if (Ptr != null && indexToken.Ptr != null && block == Block) { + Ptr = indexToken.Ptr; + Debug.Assert(Ptr >= BufferBegin && Ptr < BufferEnd); + } else { + Seek(indexToken.Ptr, block); + } + ++StateTag; + } + + // Below we split many methods into a default method containing the code + // for the most frequently used branch and a "...Continue" method containing + // the code for the remaining branches. This allows the JIT to produce + // faster code for the main branch and in a few cases even to inline it. + + public string ReadFrom(CharStreamIndexToken indexOfFirstChar) { + int block = indexOfFirstChar.Block; + if (block < 0) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens + #if DEBUG + Debug.Assert(this == indexOfFirstChar.CharStream); + #endif + return ReadFrom(indexOfFirstChar.Ptr, block); + } + + internal string ReadFrom(char* ptr, int block) { + if (ptr != null && ptr < Ptr && block == Block) { + Debug.Assert(BufferBegin <= ptr && Ptr < BufferEnd); + return new string(ptr, 0, (int)Buffer.PositiveDistance(ptr, Ptr)); + } + return ReadFromContinue(ptr, block); + } + private string ReadFromContinue(char* ptr, int block) { + ulong index1 = (ulong)GetIndex(ptr, block); + ulong index2 = (ulong)Index; + + if (index1 < index2) { + ulong length_ = index2 - index1; + // The maximum theoretical string size is Int32.MaxValue, + // though on .NET it is actually less than 2^30, since the maximum + // object size is limited to Int32.MaxValue, even on 64-bit systems. + if (length_ > Int32.MaxValue) { + // OutOfMemoryException is the exception the .NET string constructor throws + // if the the string length is larger than the maximum string length, + // even if enough memory would be available. + throw new OutOfMemoryException(); + } + int length = (int)length_; + var stateTag = StateTag; + Seek(ptr, block); + var str = Read((int)length); + StateTag = stateTag; + return str; + } else if (index1 > index2) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamIndexToken/CharStreamState."); + return ""; + } + + public void RegisterNewline() { + var index = Index; + Debug.Assert(index != _LineBegin); + _LineBegin = index; + ++_Line; + ++StateTag; + } + + private void RegisterNewlines(char* lineBegin, uint lineOffset) { + Debug.Assert(BufferBegin <= lineBegin && lineBegin <= BufferEnd && lineOffset > 0); + _Line += lineOffset; + long newLineBegin = Buffer.PositiveDistance(BufferBegin, lineBegin) + IndexOfFirstCharInBlock; + Debug.Assert(newLineBegin != _LineBegin); + _LineBegin = newLineBegin; + ++StateTag; + } + + public void RegisterNewlines(int lineOffset, int newColumnMinus1) { + Debug.Assert(lineOffset != 0 && newColumnMinus1 >= 0); + _Line += lineOffset; + Debug.Assert(_Line > 0); + var newLineBegin = Index - newColumnMinus1; + Debug.Assert(newLineBegin != _LineBegin); + _LineBegin = Index - newColumnMinus1; + ++StateTag; + } + + public void RegisterNewlines(long lineOffset, long newColumnMinus1) { + Debug.Assert(lineOffset != 0 && newColumnMinus1 >= 0); + _Line += lineOffset; + Debug.Assert(_Line > 0); + var newLineBegin = Index - newColumnMinus1; + Debug.Assert(newLineBegin != _LineBegin); + _LineBegin = Index - newColumnMinus1; + ++StateTag; + } + + public char Peek() { + char* ptr = Ptr; + if (ptr != null) return *ptr; + return EOS; + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public void Skip() { + char* ptr1 = Ptr + 1; + if (ptr1 < PtrEnd) { + Ptr = ptr1; + ++StateTag; + return; + } + SkipContinue(); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void SkipContinue() { SkipContinue(1u); } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public char Read() { + char* ptr = Ptr; + char* ptr1 = ptr + 1; + if (ptr1 < PtrEnd) { + char c = *ptr; + Ptr = ptr1; + ++StateTag; + return c; + } + return ReadContinue(); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char ReadContinue() { + var c = Peek(); + Skip(); + return c; + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public char SkipAndPeek() { + char* ptr = Ptr + 1; + if (ptr < PtrEnd) { + Ptr = ptr; + ++StateTag; + return *ptr; + } + return SkipAndPeekContinue(); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char SkipAndPeekContinue() { return SkipAndPeekContinue(1u); } + + private static readonly bool IsLittleEndian = BitConverter.IsLittleEndian; // improves inlining and dead code elimination, at least with the .NET JIT + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public TwoChars Peek2() { + char* ptr = Ptr; + if (ptr + 1 < PtrEnd) { + #if UNALIGNED_READS + if (IsLittleEndian) { + return new TwoChars(*((uint*)ptr)); + } else { + return new TwoChars(ptr[0], ptr[1]); + } + #else + return new TwoChars(ptr[0], ptr[1]); + #endif + } + return Peek2Continue(); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private TwoChars Peek2Continue() { + return new TwoChars(Peek(), Peek(1u)); + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public char Peek(uint utf16Offset) { + if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) + return Ptr[utf16Offset]; + return PeekContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char PeekContinue(uint utf16Offset) { + if (Ptr == null || BlockData == null || Block == BlockData.LastBlock) return EOS; + char* ptr = Ptr; + int block = Block; + var stateTag = StateTag; + Seek(Index + utf16Offset); + char c = Peek(); + Seek(ptr, block); // backtrack + StateTag = stateTag; + return c; + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public void Skip(uint utf16Offset) { + if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) { + Ptr += utf16Offset; + ++StateTag; + return; + } + SkipContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void SkipContinue(uint utf16Offset) { + if (Ptr == null || utf16Offset == 0) return; + if (BlockData == null || Block == BlockData.LastBlock) { + Ptr = null; + PtrBegin = null; + PtrEnd = null; + ++StateTag; + return; + } + Seek(Index + utf16Offset); + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public char SkipAndPeek(uint utf16Offset) { + if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) { + char* ptr = Ptr + utf16Offset; + Ptr = ptr; + ++StateTag; + return *ptr; + } + return SkipAndPeekContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char SkipAndPeekContinue(uint utf16Offset) { + SkipContinue(utf16Offset); + return Peek(); + } + + public char Peek(int utf16Offset) { // don't force inlining, because the .NET JIT doesn't optimize after inlining + if (utf16Offset >= 0 + ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) + : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) + { + return Ptr[utf16Offset]; + } + return PeekContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char PeekContinue(int utf16Offset) { + if (utf16Offset >= 0) return PeekContinue((uint)utf16Offset); + var newIndex = Index + utf16Offset; + if (newIndex >= _IndexOfFirstChar) { + char* ptr = Ptr; + int block = Block; + var stateTag = StateTag; + Seek(Index + utf16Offset); + char c = Peek(); + Seek(ptr, block); + StateTag = stateTag; + return c; + } + return EOS; + } + + public void Skip(int utf16Offset) { + if (utf16Offset >= 0 + ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) + : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) + { + Ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 + ++StateTag; + return; + } + SkipContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void SkipContinue(int utf16Offset) { + if (utf16Offset >= 0) { + SkipContinue((uint)utf16Offset); + return; + } + Seek(Index + utf16Offset); + } + + public void Skip(long utf16Offset) { + if (utf16Offset >= 0 + ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) + : unchecked((ulong)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) + { + Ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 + ++StateTag; + return; + } + SkipContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void SkipContinue(long utf16Offset) { + long index = Index; + Seek(utf16Offset > Int64.MaxValue - index ? Int64.MaxValue : index + utf16Offset); + } + + public char SkipAndPeek(int utf16Offset) { + if (utf16Offset >= 0 + ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) + : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) + { + char* ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 + Ptr = ptr; + ++StateTag; + return *ptr; + } + return SkipAndPeekContinue(utf16Offset); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private char SkipAndPeekContinue(int utf16Offset) { + if (utf16Offset >= 0) { + SkipContinue((uint)utf16Offset); + return Peek(); + } + var newIndex = Index + utf16Offset; + if (newIndex >= IndexOfFirstChar) { + Seek(Index + utf16Offset); + return Peek(); + } else { + Seek(_IndexOfFirstChar); + return EOS; + } + } + + public string PeekString(int length) { + if (unchecked((uint)length) <= Buffer.PositiveDistance(Ptr, PtrEnd)) + return new String(Ptr, 0, length); + return PeekStringContinue(length); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private string PeekStringContinue(int length) { + return ReadContinue(length, true); + } + + public string Read(int length) { + char* ptr = Ptr; + if (unchecked((uint)length) < Buffer.PositiveDistance(ptr, PtrEnd)) { + Ptr += length; + ++StateTag; + return new String(ptr, 0, length); + } + return ReadContinue(length); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private string ReadContinue(int length) { + return ReadContinue(length, false); + } + + private string ReadContinue(int length, bool backtrack) { + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + if (length == 0 || Ptr == null) return ""; + if (BlockData == null) { + int maxLength = (int)Buffer.PositiveDistance(Ptr, PtrEnd); + if (length > maxLength) + length = maxLength; + } else { + long maxLength = BlockData.IndexOfLastCharPlus1 - Index; + if (length > maxLength) + length = (int)maxLength; + } + string str = new String('\u0000', length); + fixed (char* pStr = str) { + int cc = ReadContinue(pStr, length, backtrack); + if (cc == length) return str; + return new String(pStr, 0, cc); + } + } + + public int PeekString(char[] buffer, int bufferIndex, int length) { + return Read(buffer, bufferIndex, length, true); + } + public int Read(char[] buffer, int bufferIndex, int length) { + return Read(buffer, bufferIndex, length, false); + } + private int Read(char[] buffer, int bufferIndex, int length, bool backtrack) { + if (bufferIndex < 0) + throw new ArgumentOutOfRangeException("bufferIndex", "bufferIndex is negative."); + if (length > buffer.Length - bufferIndex) // throws if buffer is null + throw new ArgumentOutOfRangeException("length", "bufferIndex or length is out of range."); + // We must exit early for length == 0, because pining an empty array + // would invoke implementation-defined behaviour. + if (length <= 0) { + if (length == 0) return 0; + throw new ArgumentOutOfRangeException("length", "length is negative."); + } + fixed (char* pBuffer = buffer) + return Read(pBuffer + bufferIndex, length, backtrack); + } + + public int PeekString(char* buffer, int length) { + return Read(buffer, length, true); + } + public int Read(char* buffer, int length) { + return Read(buffer, length, false); + } + private int Read(char* buffer, int length, bool backtrack) { + if (unchecked((uint)length) < Buffer.PositiveDistance(Ptr, PtrEnd)) { + char* ptr = Ptr; + int len = length; + #if UNALIGNED_READS + if ((unchecked((int)buffer) & 2) != 0 && len != 0) { // align buffer pointer + *buffer = *ptr; + ++buffer; ++ptr; --len; + } + len -= 8; + while (len >= 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + ((int*)buffer)[1] = ((int*)ptr)[1]; + ((int*)buffer)[2] = ((int*)ptr)[2]; + ((int*)buffer)[3] = ((int*)ptr)[3]; + buffer += 8; ptr += 8; len -= 8; + } + if ((len & 4) != 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + ((int*)buffer)[1] = ((int*)ptr)[1]; + buffer += 4; ptr += 4; + } + if ((len & 2) != 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + buffer += 2; ptr += 2; + } + #else + len -= 2; + while (len >= 0) { + buffer[0] = ptr[0]; + buffer[1] = ptr[1]; + buffer += 2; ptr += 2; len -= 2; + } + #endif + if ((len & 1) != 0) { + *buffer = *ptr; + ++ptr; + } + if (!backtrack) { + Ptr = ptr; + ++StateTag; + } + return length; + } + return ReadContinue(buffer, length, backtrack); + } + private int ReadContinue(char* buffer, int length, bool backtrack) { + if (length < 0) + throw new ArgumentOutOfRangeException("length", "length is negative."); + + if (length == 0 || Ptr == null) return 0; + + int oldLength = length; + int oldBlock = Block; + char* oldPtr = Ptr; + char* ptr = Ptr; + + do { + int len = Math.Min((int)Buffer.PositiveDistance(Ptr, PtrEnd), length); + Debug.Assert(length > 0 && len > 0); + length -= len; + #if UNALIGNED_READS + if ((unchecked((int)buffer) & 2) != 0) { // align buffer pointer + *buffer = *ptr; + ++buffer; ++ptr; --len; + } + len -= 8; + while (len >= 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + ((int*)buffer)[1] = ((int*)ptr)[1]; + ((int*)buffer)[2] = ((int*)ptr)[2]; + ((int*)buffer)[3] = ((int*)ptr)[3]; + buffer += 8; ptr += 8; len -= 8; + } + if ((len & 4) != 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + ((int*)buffer)[1] = ((int*)ptr)[1]; + buffer += 4; ptr += 4; + } + if ((len & 2) != 0) { + ((int*)buffer)[0] = ((int*)ptr)[0]; + buffer += 2; ptr += 2; + } + #else + len -= 2; + while (len >= 0) { + buffer[0] = ptr[0]; + buffer[1] = ptr[1]; + buffer += 2; ptr += 2; len -= 2; + } + #endif + if ((len & 1) != 0) { + *buffer = *ptr; + ++buffer; ++ptr; + } + } while (length != 0 + && BlockData != null + && Block != BlockData.LastBlock + && (ptr = BlockData.ReadBlock(Block + 1)) != null); + if (!backtrack) { + ++StateTag; + if (ptr != PtrEnd) Ptr = ptr; + else SeekToFirstCharAfterLastCharOfCurrentBlock(); + } else { + if (Block != oldBlock) Seek(oldPtr, oldBlock); + } + return oldLength - length; + } + + public bool Match(char ch) { + char* ptr = Ptr; + return ptr != null && ch == *ptr; + } + + public bool MatchCaseFolded(char caseFoldedChar) { + char* ptr = Ptr; + return ptr != null && caseFoldedChar == CaseFoldTable.FoldedChars[*ptr]; + } + + public bool Skip(char ch) { + char* ptr1 = Ptr + 1; + if (ptr1 < PtrEnd && ch == *Ptr) { + Ptr = ptr1; + ++StateTag; + return true; + } + return SkipContinue(ch); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private bool SkipContinue(char ch) { + if (Match(ch)) { + Skip(); + return true; + } + return false; + } + + public bool SkipCaseFolded(char caseFoldedChar) { + char* ptr1 = Ptr + 1; + if (ptr1 < PtrEnd && caseFoldedChar == CaseFoldTable.FoldedChars[*Ptr]) { + Ptr = ptr1; + ++StateTag; + return true; + } + return SkipCaseFoldedContinue(caseFoldedChar); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private bool SkipCaseFoldedContinue(char caseFoldedChar) { + if (MatchCaseFolded(caseFoldedChar)) { + Skip(); + return true; + } + return false; + } + +#if AGGRESSIVE_INLINING + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + public bool Skip(TwoChars twoChars) { + char* ptr2 = Ptr + 2; + if (ptr2 < PtrEnd) { + #if UNALIGNED_READS + if (IsLittleEndian) { + if (new TwoChars(*((uint*)Ptr)) == twoChars) { + Ptr = ptr2; + ++StateTag; + return true; + } + } else { + if (twoChars.Char0 == Ptr[0] && twoChars.Char1 == Ptr[1]) { + Ptr = ptr2; + ++StateTag; + return true; + } + } + #else + if (twoChars.Char0 == Ptr[0] && twoChars.Char1 == Ptr[1]) { + Ptr = ptr2; + ++StateTag; + return true; + } + #endif + return false; + } + return SkipContinue(twoChars); + } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private bool SkipContinue(TwoChars twoChars) { + char* cs = stackalloc char[2]; + cs[0] = twoChars.Char0; + cs[1] = twoChars.Char1; + return SkipContinue(cs, 2, false); + } + + public bool Match(string chars) { + if (chars.Length <= Buffer.PositiveDistance(Ptr, PtrEnd)) { + for (int i = 0; i < chars.Length; ++i) { + if (Ptr[i] != chars[i]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + return SkipContinue(chars, true); + } + + public bool Skip(string chars) { + if (chars.Length < Buffer.PositiveDistance(Ptr, PtrEnd)) { + for (int i = 0; i < chars.Length; ++i) { + if (Ptr[i] != chars[i]) goto ReturnFalse; + } + Ptr += chars.Length; + ++StateTag; + return true; + ReturnFalse: + return false; + } + return SkipContinue(chars, false); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private bool SkipContinue(string chars, bool backtrackEvenIfCharsMatch) { + fixed (char* pChars = chars) + return SkipContinue(pChars, chars.Length, backtrackEvenIfCharsMatch); + } + + public bool MatchCaseFolded(string caseFoldedChars) { + if (caseFoldedChars.Length <= Buffer.PositiveDistance(Ptr, PtrEnd)) { + for (int i = 0; i < caseFoldedChars.Length; ++i) { + if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + return SkipCaseFoldedContinue(caseFoldedChars, true); + } + + public bool SkipCaseFolded(string caseFoldedChars) { + if (caseFoldedChars.Length < Buffer.PositiveDistance(Ptr, PtrEnd)) { + for (int i = 0; i < caseFoldedChars.Length; ++i) { + if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse; + } + Ptr += caseFoldedChars.Length; + ++StateTag; + return true; + ReturnFalse: + return false; + } + return SkipCaseFoldedContinue(caseFoldedChars, false); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private bool SkipCaseFoldedContinue(string caseFoldedChars, bool backtrackEvenIfCharsMatch) { + fixed (char* pCaseFoldedChars = caseFoldedChars) + return SkipCaseFoldedContinue(pCaseFoldedChars, caseFoldedChars.Length, backtrackEvenIfCharsMatch); + } + + public bool Match(char[] chars, int charsIndex, int length) { + return Skip(chars, charsIndex, length, true); + } + public bool Skip(char[] chars, int charsIndex, int length) { + return Skip(chars, charsIndex, length, false); + } + private bool Skip(char[] chars, int charsIndex, int length, bool backtrack) { + if (charsIndex < 0) + throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative."); + if (length > chars.Length - charsIndex) // throws if chars is null + throw new ArgumentOutOfRangeException("length", "length is out of range."); + // We must exit early for length == 0, because pining an empty array + // would invoke implementation-defined behaviour. + if (length <= 0) { + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + if (!backtrack) ++StateTag; + return true; + } + fixed (char* pChars = chars) return Skip(pChars + charsIndex, length, backtrack); + } + + public bool Match(char* chars, int length) { + return Skip(chars, length, true); + } + public bool Skip(char* chars, int length) { + return Skip(chars, length, false); + } + private bool Skip(char* chars, int length, bool backtrackEvenIfCharsMatch) { + if (unchecked((uint)length < Buffer.PositiveDistance(Ptr, PtrEnd))) { + #if UNALIGNED_READS + char* ptr = Ptr; + int len = length - 2; + while (len >= 0) { + if (*((int*)ptr) != *((int*)chars)) goto ReturnFalse; + ptr += 2; chars += 2; len -= 2; + } + if ((len & 1) != 0) { + if (*ptr != *chars) goto ReturnFalse; + ++ptr; + } + #else + char* ptr = Ptr; + int len = length; + while (len != 0) { + if (*ptr != *chars) goto ReturnFalse; + ++ptr; ++chars; --len; + } + #endif + if (!backtrackEvenIfCharsMatch) { + Ptr = ptr; + ++StateTag; + } + return true; + ReturnFalse: + return false; + } + return SkipContinue(chars, length, backtrackEvenIfCharsMatch); + } + + public bool MatchCaseFolded(char* caseFoldedChars, int length) { + return SkipCaseFolded(caseFoldedChars, length, true); + } + public bool SkipCaseFolded(char* caseFoldedChars, int length) { + return SkipCaseFolded(caseFoldedChars, length, false); + } + private bool SkipCaseFolded(char* caseFoldedChars, int length, bool backtrackEvenIfCharsMatch) { + if (unchecked((uint)length < Buffer.PositiveDistance(Ptr, PtrEnd))) { + char* ptr = Ptr; + int len = length; + while (len != 0) { + if (CaseFoldTable.FoldedChars[*ptr] != *caseFoldedChars) goto ReturnFalse; + ++ptr; ++caseFoldedChars; --len; + } + if (!backtrackEvenIfCharsMatch) { + Ptr = ptr; + ++StateTag; + } + return true; + ReturnFalse: + return false; + } + return SkipCaseFoldedContinue(caseFoldedChars, length, backtrackEvenIfCharsMatch); + } + + private bool SkipContinue(char* chars, int length, bool backtrackEvenIfCharsMatch) { + if (length <= 0) { + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + return true; + } + + if (Ptr == null) return false; + + int oldBlock = Block; + char* oldPtr = Ptr; + char* ptr = Ptr; + + for (;;) { + Debug.Assert(length > 0); + int len = (int)Buffer.PositiveDistance(ptr, PtrEnd); + if (len < length) { + if (BlockData == null || Block == BlockData.LastBlock) goto ReturnFalse; + length -= len; + } else { + len = length; + length = 0; + } + Debug.Assert(len > 0); + #if UNALIGNED_READS + len -= 2; + while (len >= 0) { + if (*((int*)ptr) != *((int*)chars)) goto ReturnFalse; + ptr += 2; chars += 2; len -= 2; + } + if ((len & 1) != 0) { + if (*ptr != *chars) goto ReturnFalse; + ++ptr; ++chars; + } + #else + do { + if (*ptr != *chars) goto ReturnFalse; + ++ptr; ++chars; --len; + } while (len != 0); + #endif + if (length != 0) { + Debug.Assert(BlockData != null && Block != BlockData.LastBlock); + ptr = BlockData.ReadBlock(Block + 1); + } else { + if (backtrackEvenIfCharsMatch) { + if (Block != oldBlock) Seek(oldPtr, oldBlock); + } else { + if (ptr != PtrEnd) Ptr = ptr; + else SeekToFirstCharAfterLastCharOfCurrentBlock(); + ++StateTag; + } + return true; + } + } + ReturnFalse: + if (Block != oldBlock) Seek(oldPtr, oldBlock); + return false; + } + + private bool SkipCaseFoldedContinue(char* caseFoldedChars, int length, bool backtrackEvenIfCharsMatch) { + if (length <= 0) { + if (length == 0) return true; + throw new ArgumentOutOfRangeException("length", "length is negative."); + } + + if (Ptr == null) return false; + + int oldBlock = Block; + char* oldPtr = Ptr; + char* ptr = Ptr; + + for (;;) { + Debug.Assert(length > 0); + int len = (int)Buffer.PositiveDistance(ptr, PtrEnd); + if (len < length) { + if (BlockData == null || Block == BlockData.LastBlock) goto ReturnFalse; + length -= len; + } else { + len = length; + length = 0; + } + Debug.Assert(len > 0); + do { + if (CaseFoldTable.FoldedChars[*ptr] != *caseFoldedChars) goto ReturnFalse; + ++ptr; ++caseFoldedChars; --len; + } while (len != 0); + if (length != 0) { + Debug.Assert(BlockData != null && Block != BlockData.LastBlock); + ptr = BlockData.ReadBlock(Block + 1); + } else { + if (backtrackEvenIfCharsMatch) { + if (Block != oldBlock) Seek(oldPtr, oldBlock); + } else { + if (ptr != PtrEnd) Ptr = ptr; + else SeekToFirstCharAfterLastCharOfCurrentBlock(); + ++StateTag; + } + return true; + } + } + ReturnFalse: + if (Block != oldBlock) Seek(oldPtr, oldBlock); + return false; + } + + public Match Match(Regex regex) { + if (BufferString == null) throw new NotSupportedException("CharStream instances constructed from char arrays or char pointers do not support regular expression matching."); + if (Ptr != null) { + if (BlockData != null && Ptr > BlockData.RegexSpaceThreshold && Block != BlockData.LastBlock) { + // BlockOverlap > MinRegexSpace + char c = *Ptr; + char* ptr = Ptr; + BlockData.ReadBlock(Block + 1); + int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; + Ptr = ptr - blockSizeMinusOverlap; + PtrBegin = BufferBegin; // might have been set to null by ReadBlock + PtrEnd = BufferEnd; + Debug.Assert(*Ptr == c && BufferBegin <= Ptr && Ptr < BufferEnd); + } + int index = (int)Buffer.PositiveDistance(BufferStringPointer, Ptr); + int length = (int)Buffer.PositiveDistance(Ptr, BufferEnd); + return regex.Match(BufferString, index, length); + } + return regex.Match(""); + } + + public bool SkipWhitespace() { + char* lineBegin = null; + uint lineOffset = 0; + char* ptr = Ptr; + char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r', + if (ptr + 1 < PtrEnd) { // PtrEnd might be null + char c = *ptr; + ++ptr; + if (c > ' ') goto ReturnFalse; + if (c == ' ') { + if (*ptr > ' ') { + Ptr = ptr; + ++StateTag; + return true; + } + goto Loop; + } else { + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + if (ptr > end) goto Newline; + } + } else if (c != '\n') goto CheckTab; + if (*ptr > ' ') { + Ptr = ptr; + RegisterNewline(); + return true; + } + goto Newline; + CheckTab: + if (c != '\t') goto ReturnFalse; + goto Loop; + } + Newline: + lineBegin = ptr; + ++lineOffset; + Loop: + for (;;) { + if (ptr >= end) break; + c = *ptr; + ++ptr; + if (c != ' ') { + if (c != '\t') { + if (c == '\r') { + if (*ptr == '\n') ++ptr; + goto Newline; + } + if (c == '\n') goto Newline; + --ptr; + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return true; + } else { + RegisterNewlines(lineBegin, lineOffset); + return true; + } + } + } + } + } + return SkipWhitespaceContinue(ptr, lineBegin, lineOffset); + ReturnFalse: + return false; + } + private bool SkipWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset) { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + if (index == 0) { + c = Peek(); + if (c == ' ' || c == '\t') c = SkipAndPeek(); + else if (c != '\r' && c != '\n') return false; + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + } + for (;;) { + if (c == ' ' || c == '\t') c = SkipAndPeek(); + else if (c != '\r' && c != '\n') { + StateTag = stateTag + 1; + return true; + } else { + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } + } + } + + public bool SkipUnicodeWhitespace() { + char* lineBegin = null; + uint lineOffset = 0; + char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' + char* ptr = Ptr; + if (ptr + 1 < PtrEnd) { // PtrEnd might be null + char c = *ptr; + ++ptr; + if (c == ' ') goto Loop; + if (!Text.IsWhitespace(c)) return false; + if (c <= '\r') { + if (c == '\r') { + if (*ptr == '\n') ++ptr; + } else if (c != '\n') goto Loop; + } else { + if (c < '\u2028' ? c != '\u0085' : c > '\u2029') goto Loop; + } + Newline: + lineBegin = ptr; + ++lineOffset; + Loop: + for (;;) { + if (ptr >= end) break; + c = *ptr; + ++ptr; + if (c != ' ') { + if (Text.IsWhitespace(c)) { + if (c <= '\r') { + if (c == '\r') { + if (*ptr == '\n') ++ptr; + goto Newline; + } + if (c == '\n') goto Newline; + } else if (c < '\u2028' ? c == '\u0085' : c <= '\u2029') goto Newline; + } else { + --ptr; + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return true; + } else { + RegisterNewlines(lineBegin, lineOffset); + return true; + } + } + } + } + } + return SkipUnicodeWhitespaceContinue(ptr, lineBegin, lineOffset); + } + private bool SkipUnicodeWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset) { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + if (index == 0) { + c = Peek(); + if (!Text.IsWhitespace(c)) return false; + if (c == ' ' || c == '\t') c = SkipAndPeek(); + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + } + for (;;) { + if (c == ' ') c = SkipAndPeek(); + else { + if (!Text.IsWhitespace(c)) break; + char c0 = c; + c = SkipAndPeek(); + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') c = SkipAndPeek(); + } else if (c0 != '\n') continue; + } else if (c0 < '\u2028' ? c0 != '\u0085' : c0 > '\u2029') continue; + RegisterNewline(); + } + } + StateTag = stateTag + 1; + return true; + } + + public bool SkipNewline() { + var ptr = Ptr; + if (ptr + 2 < PtrEnd) { + char c = *ptr; + ++ptr; + if (c == '\r') { + if (*ptr == '\n') ++ptr; + } else if (c != '\n') return false; + Ptr = ptr; + RegisterNewline(); + return true; + } else { + var stateTag = StateTag; + char c = Peek(); + if (c == '\r') { + c = SkipAndPeek(); + if (c == '\n') Skip(); + } else { + if (c != '\n') return false; + Skip(); + } + RegisterNewline(); + StateTag = stateTag + 1; + return true; + } + } + + public bool SkipUnicodeNewline() { + var ptr = Ptr; + if (ptr + 2 < PtrEnd) { + char c = *ptr; + ++ptr; + if (c <= '\r') { + if (c == '\r') { + if (*ptr == '\n') ++ptr; + } else if (c != '\n') goto ReturnFalse; + } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; + Ptr = ptr; + RegisterNewline(); + return true; + } else { + char c = Peek(); + uint n = 1; + if (c <= '\r') { + if (c == '\r') { + if (Peek(1u) == '\n') n = 2; + } else if (c != '\n') goto ReturnFalse; + } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; + Skip(n); + var stateTag = StateTag; + RegisterNewline(); + StateTag = stateTag; + return true; + } + ReturnFalse: + return false; + } + + public int SkipNewlineThenWhitespace(int powerOf2TabStopDistance, bool allowFormFeed) { + int tabStopDistanceMinus1 = unchecked(powerOf2TabStopDistance - 1); + if (powerOf2TabStopDistance <= 0 || (powerOf2TabStopDistance & tabStopDistanceMinus1) != 0) + throw new ArgumentOutOfRangeException("powerOf2TabStopDistance", "powerOf2TabStopDistance must be a positive power of 2."); + + char* lineBegin = null; + uint lineOffset = 0; + int ind = -1; + char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' + char* ptr = Ptr; + if (ptr + 1 < PtrEnd) { // PtrEnd might be null + char c = *ptr; + ++ptr; + if (c == '\r') { + if (*ptr == '\n') ++ptr; + } else if (c != '\n') { + return -1; + } + Newline: + lineBegin = ptr; + ++lineOffset; + ind = 0; + for (;;) { + if (ptr >= end) break; + c = *ptr; + ++ptr; + if (c == ' ') { + ind = unchecked(ind + 1); + if (ind >= 0) continue; + // indentation has overflown, so put back ' ' and return + ind = unchecked(ind - 1); + } else if (c <= '\r') { + if (c == '\r') { + if (*ptr == '\n') ++ptr; + goto Newline; + } + if (c == '\n') goto Newline; + if (c == '\t') { + // ind = ind + tabStopDistance - ind%tabStopDistance + int d = tabStopDistanceMinus1 + 1 - (ind & tabStopDistanceMinus1); + ind = unchecked(ind + d); + if (ind >= 0) continue; + // indentation has overflown, so put back '\t' and return + ind = unchecked(ind - d); + } else if (c == '\f' && allowFormFeed) { + ind = 0; + continue; + } + } + --ptr; + Ptr = ptr; + RegisterNewlines(lineBegin, lineOffset); + return ind; + } + // end of block + } + return SkipNewlineWhitespaceContinue(ptr, lineBegin, lineOffset, ind, tabStopDistanceMinus1, allowFormFeed); + } + private int SkipNewlineWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset, int ind_, + int tabStopDistanceMinus1, bool allowFormFeed) + { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + if (index == 0) { + c = Peek(); + if (!(c == '\r' || c == '\n')) return -1; + } else { + RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + } + int ind = ind_; + for (;;) { + if (c == ' ') { + ind = unchecked(ind + 1); + if (ind >= 0) c = SkipAndPeek(); + else { + // indentation has overflown, so put back ' ' and return + ind = unchecked(ind - 1); + break; + } + } else if (c == '\r' || c == '\n') { + ind = 0; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } else if (c == '\t') { + // ind = ind + tabStopDistance - ind%tabStopDistance + int d = tabStopDistanceMinus1 + 1 - (ind & tabStopDistanceMinus1); + ind = unchecked(ind + d); + if (ind >= 0) c = SkipAndPeek(); + else { + // indentation has overflown, so put back '\t' and return + ind = unchecked(ind - d); + break; + } + } else if (c == '\f' && allowFormFeed) { + ind = 0; + c = SkipAndPeek(); + } else break; + } + StateTag = stateTag + 1; + return ind; + } + + public void SkipRestOfLine(bool skipNewline) { + char* ptr = Ptr; + char* end = unchecked(PtrEnd - 2); // - 2, so that we can do (*) without further checking + if (ptr + 2 < PtrEnd) { // PtrEnd might be null + for (;;) { + char c = *ptr; + if (c > '\r') { + if (++ptr == end) break; + } else if (c != '\r' && c != '\n') { + if (++ptr == end) break; + } else { + if (!skipNewline) { + if (ptr != Ptr) { + Ptr = ptr; + ++StateTag; + } + return; + } else { + ++ptr; + if (c == '\r' && *ptr == '\n') ++ptr; + Ptr = ptr; // (*) + RegisterNewline(); + return; + } + } + } + } + SkipRestOfLineContinue(ptr, skipNewline); + } + private void SkipRestOfLineContinue(char* ptr, bool skipNewline) { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + if (index == 0) { + c = Peek(); + if (c == EOS || (!skipNewline && (c == '\r' || c == '\n'))) return; + } else { + c = SkipAndPeek(index); + } + while (c != EOS) { + if (c == '\r' || c == '\n') { + if (skipNewline) SkipNewline(); + break; + } + c = SkipAndPeek(); + } + StateTag = stateTag + 1; + return; + } + + public string ReadRestOfLine(bool skipNewline) { + char* ptr = Ptr; + char* end = unchecked(PtrEnd - 2); // - 2, so that we can do (*) without further checking + if (ptr + 2 < PtrEnd) { // PtrEnd might be null + for (;;) { + char c = *ptr; + if (c > '\r') { + if (++ptr == end) break; + } else if (c != '\r' && c != '\n') { + if (++ptr == end) break; + } else { + char* ptr0 = Ptr; + if (!skipNewline) { + if (ptr != ptr0) { + Ptr = ptr; + ++StateTag; + return new string(ptr0, 0, (int)Buffer.PositiveDistance(ptr0, ptr)); + } else { + return ""; + } + } else { + var skippedString = ptr == ptr0 ? "" : new string(ptr0, 0, (int)Buffer.PositiveDistance(ptr0, ptr)); + ++ptr; + if (c == '\r' && *ptr == '\n') ++ptr; + Ptr = ptr; // (*) + RegisterNewline(); + return skippedString; + } + } + } + } + return ReadRestOfLineContinue(ptr, skipNewline); + } + private string ReadRestOfLineContinue(char* ptr, bool skipNewline) { + var stateTag = StateTag; + var indexToken = IndexToken; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + if (index == 0) { + c = Peek(); + if (c == EOS || (!skipNewline && (c == '\r' || c == '\n'))) return ""; + } else { + c = SkipAndPeek(index); + } + while (c != EOS) { + if (c == '\r' || c == '\n') { + var skippedString = ReadFrom(indexToken); + if (skipNewline) SkipNewline(); + StateTag = stateTag + 1; + return skippedString; + } + c = SkipAndPeek(); + } + StateTag = stateTag + 1; + return ReadFrom(indexToken); + } + + public char ReadCharOrNewline() { + var ptr = Ptr; + if (ptr + 2 < PtrEnd) { + char c = *ptr; + ++ptr; + if (c != '\r') { + if (c != '\n') { + Ptr = ptr; + ++StateTag; + return c; + } + } else if (*ptr == '\n') ++ptr; + Ptr = ptr; + RegisterNewline(); + return '\n'; + } else { + char c0 = Peek(); + if (c0 != EOS) { + char c = SkipAndPeek(); + var stateTag = StateTag; + if (c0 != '\r') { + if (c0 != '\n') return c0; + } else if (c == '\n') Skip(); + RegisterNewline(); + StateTag = stateTag; + return '\n'; + } + return EOS; + } + } + + public int SkipCharsOrNewlines(int maxCount) { + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' + char* end2 = unchecked(ptr + maxCount); + char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; + if (ptr < end) { + for (;;) { + char c = *ptr; + ++ptr; + if (c > '\r') { + if (ptr == end) break; + } else { + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < bufferEnd1) ++end; + } + } else if (c != '\n') goto CheckBound; + lineBegin = ptr; + ++lineOffset; + CheckBound: + if (ptr >= end) break; + } + } + if (end < bufferEnd1) { + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + } + } + return SkipCharsOrNewlinesContinue(ptr, lineBegin, lineOffset, nCRLF, maxCount); + } + private int SkipCharsOrNewlinesContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, + int maxCount) + { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + if (maxCount == 0 || (c = Peek()) == EOS) return 0; + count = 0; + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') c = SkipAndPeek(); + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } + StateTag = unchecked(stateTag + 1); + return count; + } + + public string ReadCharsOrNewlines(int maxCount, bool normalizeNewlines) { + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + char* ptr = Ptr; + if (ptr != null) { + char* PtrEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' + char* end2 = unchecked(ptr + maxCount); + char* end = end2 >= ptr && end2 <= PtrEnd1 ? end2 : PtrEnd1; + if (ptr < end) { + for (;;) { + char c = *ptr; + ++ptr; + if (c > '\r') { + if (ptr == end) break; + } else { + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < PtrEnd1) ++end; + } else { + ++nCR; + } + } else if (c != '\n') goto CheckBound; + lineBegin = ptr; + ++lineOffset; + CheckBound: + if (ptr >= end) break; + } + } + if (end < PtrEnd1) { + char* ptr0 = Ptr; + Ptr = ptr; + int length = (int)Buffer.PositiveDistance(ptr0, ptr); + if (lineOffset == 0) { + ++StateTag; + return new string(ptr0, 0, length); + } + RegisterNewlines(lineBegin, lineOffset); + return !normalizeNewlines || (nCR | nCRLF) == 0 + ? new string(ptr0, 0, length) + : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); + } + } + } + return ReadCharsOrNewlinesContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, maxCount, normalizeNewlines); + } + private string ReadCharsOrNewlinesContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, + int maxCount, bool normalizeNewlines) + { + var stateTag = StateTag; + var indexToken = IndexToken; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + if (maxCount == 0 || (c = Peek()) == EOS) return ""; + count = 0; + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') { + ++nCRLF; + c = SkipAndPeek(); + } else { + ++nCR; + } + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } + StateTag = unchecked(stateTag + 1); + string str = ReadFrom(indexToken); + if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; + fixed (char* pStr = str) + return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); + } + + public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc predicate) { + return SkipCharsOrNewlinesWhile(predicate, predicate); + } + public int SkipCharsOrNewlinesWhile(FSharpFunc predicateForFirstChar, + FSharpFunc predicate) + { + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + char* ptr = Ptr; + char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' + if (ptr + 1 < PtrEnd) { // PtrEnd might be null + char c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = ptr; + lineOffset = 1; + } + } + for (;;) { + if (ptr >= end) goto EndOfBlock; + c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicate.Invoke(c)) break; + } else if (c == '\r') { + if (!predicate.Invoke('\n')) break; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicate.Invoke(c)) break; + if (c == '\n') { + lineBegin = ptr; + ++lineOffset; + } + } + } + --ptr; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return count; + } + RegisterNewlines(lineBegin, lineOffset); + return count; + ReturnEmpty: + return 0; + } + EndOfBlock: + return SkipCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, predicateForFirstChar, predicate); + } + private int SkipCharsOrNewlinesWhileContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, + FSharpFunc predicateForFirstChar, FSharpFunc predicate) + { + var stateTag = StateTag; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + c = Peek(); + char cc = c == '\r' ? '\n' : c; + if (c == EOS || !predicateForFirstChar.Invoke(cc)) return 0; + count = 1; + char c0 = c; + c = SkipAndPeek(); + if (cc == '\n') { + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS) break; + if (c != '\r' && c != '\n') { + if (!predicate.Invoke(c)) break; + count = unchecked(count + 1); + if (count >= 0) c = SkipAndPeek(); + else { // overflow + count = unchecked(count - 1); + break; + } + } else { + if (!predicate.Invoke('\n')) break; + count = unchecked(count + 1); + if (count >= 0) { + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } else { + count = unchecked(count - 1); + break; + } + } + } + StateTag = unchecked(stateTag + 1); + return count; + } + + public string ReadCharsOrNewlinesWhile(FSharpFunc predicate, bool normalizeNewlines) { + return ReadCharsOrNewlinesWhile(predicate, predicate, normalizeNewlines); + } + public string ReadCharsOrNewlinesWhile( + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + bool normalizeNewlines) + { + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + char* ptr = Ptr; + char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' + if (ptr + 1 < PtrEnd) { // PtrEnd might be null + char c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + } else { + ++nCR; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = ptr; + lineOffset = 1; + } + } + for (;;) { + if (ptr >= end) goto EndOfBlock; + c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicate.Invoke(c)) break; + } else if (c == '\r') { + if (!predicate.Invoke('\n')) break; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + } else { + ++nCR; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicate.Invoke(c)) break; + if (c == '\n') { + lineBegin = ptr; + ++lineOffset; + } + } + } + --ptr; + char* ptr0 = Ptr; + Ptr = ptr; + int length = (int)Buffer.PositiveDistance(ptr0, ptr); + if (lineOffset == 0) { + ++StateTag; + return new string(ptr0, 0, length); + } + RegisterNewlines(lineBegin, lineOffset); + return !normalizeNewlines || (nCR | nCRLF) == 0 + ? new string(ptr0, 0, length) + : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); + ReturnEmpty: + return ""; + } + EndOfBlock: + return ReadCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, predicateForFirstChar, predicate, normalizeNewlines); + } + private string ReadCharsOrNewlinesWhileContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + bool normalizeNewlines) + { + var stateTag = StateTag; + var indexToken = IndexToken; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + c = Peek(); + char cc = c == '\r' ? '\n' : c; + if (c == EOS || !predicateForFirstChar.Invoke(cc)) return ""; + count = 1; + char c0 = c; + c = SkipAndPeek(); + if (cc == '\n') { + if (c0 == '\r') { + if (c == '\n') { + ++nCRLF; + c = SkipAndPeek(); + } else { + ++nCR; + } + } + RegisterNewline(); + } + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS) break; + if (c != '\r' && c != '\n') { + if (!predicate.Invoke(c)) break; + count = unchecked(count + 1); + if (count < 0) break; + c = SkipAndPeek(); + } else { + if (!predicate.Invoke('\n')) break; + count = unchecked(count + 1); + if (count < 0) break; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r') { + if (c == '\n') { + ++nCRLF; + c = SkipAndPeek(); + } else { + ++nCR; + } + } + RegisterNewline(); + } + } + StateTag = unchecked(stateTag + 1); + string str = ReadFrom(indexToken); + if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; + fixed (char* pStr = str) + return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); + } + + public int SkipCharsOrNewlinesWhile(FSharpFunc predicate, int minCount, int maxCount) { + return SkipCharsOrNewlinesWhile(predicate, predicate, minCount, maxCount); + } + public int SkipCharsOrNewlinesWhile( + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + int minCount, int maxCount) + { + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd1 = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' + char* end2 = unchecked(ptr + maxCount); + char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; + if (ptr < end) { + char c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < bufferEnd1) ++end; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = ptr; + ++lineOffset; + } + } + for (;;) { + if (ptr < end) { + c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicate.Invoke(c)) break; + } else if (c == '\r') { + if (!predicate.Invoke('\n')) break; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < bufferEnd1) ++end; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicate.Invoke(c)) break; + if (c == '\n') { + lineBegin = ptr; + ++lineOffset; + } + } + } else { + if (end >= bufferEnd1) goto EndOfBlock; + goto ReturnCount; + } + } + --ptr; + ReturnCount: + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + if (count >= minCount) { + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + ReturnEmpty: + return 0; + } + } + EndOfBlock: + return SkipCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, predicateForFirstChar, predicate, minCount, maxCount); + } + private int SkipCharsOrNewlinesWhileContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + int minCount, int maxCount) + { + var ptr0 = Ptr; + var block0 = Block; + var tag0 = StateTag; + var line0 = _Line; + var lineBegin0 = _LineBegin; + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + c = Peek(); + if (c == EOS || maxCount == 0) goto ReturnEmpty; + if (c != '\r' && c != '\n') { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + count = 1; + c = SkipAndPeek(); + } else { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + count = 1; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS || count == maxCount) break; + if (c != '\r' && c != '\n') { + if (!predicate.Invoke(c)) break; + ++count; + c = SkipAndPeek(); + } else { + if (!predicate.Invoke('\n')) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r' && c == '\n') c = SkipAndPeek(); + RegisterNewline(); + } + } + if (count >= minCount) { + StateTag = unchecked(tag0 + 1); + return count; + } + ReturnEmpty: + // backtrack + Seek(ptr0, block0); + _Line = line0; + _LineBegin = lineBegin0; + StateTag = tag0; + return 0; + } + + public string ReadCharsOrNewlinesWhile(FSharpFunc predicate, int minCount, int maxCount, bool normalizeNewlines) { + return ReadCharsOrNewlinesWhile(predicate, predicate, minCount, maxCount, normalizeNewlines); + } + public string ReadCharsOrNewlinesWhile( + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + int minCount, int maxCount, bool normalizeNewlines) + { + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' + char* end2 = unchecked(ptr + maxCount); + char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; + if (ptr < end) { + char c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < bufferEnd1) ++end; + } else { + ++nCR; + } + lineBegin = ptr; + lineOffset = 1; + } else { + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = ptr; + lineOffset = 1; + } + } + for (;;) { + if (ptr < end) { + c = *ptr; + ++ptr; + if (c > '\r') { + if (!predicate.Invoke(c)) break; + } else if (c == '\r') { + if (!predicate.Invoke('\n')) break; + if (*ptr == '\n') { + ++ptr; + ++nCRLF; + if (end < bufferEnd1) ++end; + } else { + ++nCR; + } + lineBegin = ptr; + ++lineOffset; + } else { + if (!predicate.Invoke(c)) break; + if (c == '\n') { + lineBegin = ptr; + ++lineOffset; + } + } + } else { + if (end >= bufferEnd1) goto EndOfBlock; + goto ReturnStringInBlock; + } + } + --ptr; + ReturnStringInBlock: + { + char* ptr0 = Ptr; + int length = (int)Buffer.PositiveDistance(ptr0, ptr); + if (length - nCRLF >= minCount) { + Ptr = ptr; + if (lineOffset == 0) { + ++StateTag; + return new string(ptr0, 0, length); + } + RegisterNewlines(lineBegin, lineOffset); + return !normalizeNewlines || (nCR | nCRLF) == 0 + ? new string(ptr0, 0, length) + : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); + } + } + ReturnEmpty: + return ""; + } + } + EndOfBlock: + return ReadCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, predicateForFirstChar, predicate, minCount, maxCount, normalizeNewlines); + } + private string ReadCharsOrNewlinesWhileContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, + FSharpFunc predicateForFirstChar, FSharpFunc predicate, + int minCount, int maxCount, bool normalizeNewlines) + { + var ptr0 = Ptr; + var block0 = Block; + var tag0 = StateTag; + var line0 = _Line; + var lineBegin0 = _LineBegin; + + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c; + int count; + if (index == 0) { + c = Peek(); + if (c == EOS || maxCount == 0) goto ReturnEmpty; + if (c != '\r' && c != '\n') { + count = 1; + if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; + c = SkipAndPeek(); + } else { + if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; + count = 1; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r') { + if (c == '\n') { + ++nCRLF; + c = SkipAndPeek(); + } else { + ++nCR; + } + } + RegisterNewline(); + } + } else { + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + c = SkipAndPeek(index); + count = (int)index - nCRLF; + } + for (;;) { + if (c == EOS || count == maxCount) break; + if (c != '\r' && c != '\n') { + if (!predicate.Invoke(c)) break; + ++count; + c = SkipAndPeek(); + } else { + if (!predicate.Invoke('\n')) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 == '\r') { + if (c == '\n') { + ++nCRLF; + c = SkipAndPeek(); + } else { + ++nCR; + } + } + RegisterNewline(); + } + } + if (count >= minCount) { + StateTag = unchecked(tag0 + 1); + string str = ReadFrom(ptr0, block0); + if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; + fixed (char* pStr = str) + return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); + } + ReturnEmpty: + // backtrack + Seek(ptr0, block0); + _Line = line0; + _LineBegin = lineBegin0; + StateTag = tag0; + return ""; + } + + private static bool Rest3OfStringEquals(char* str1, char* str2, int length) { + for (int i = 3; i < length; ++i) { + if (str1[i] != str2[i]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + + private static bool Rest3OfStringEqualsCaseFolded(char* str1, char* cfStr2, int length) { + char* cftable = CaseFoldTable.FoldedChars; + for (int i = 3; i < length; ++i) { + if (cftable[str1[i]] != cfStr2[i]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + + public int SkipCharsOrNewlinesUntilString(string str, int maxCount, out bool foundString) { + int strLength = str.Length; // throws if str is null + if (strLength == 0) throw new ArgumentException("The string argument is empty."); + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + fixed (char* pStr = str) { + uint lineOffset = 0; + int nCRLF = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd = PtrEnd; + char* end1 = unchecked(bufferEnd - strLength); + if (end1 >= ptr && end1 < bufferEnd) { + char* end2 = unchecked(ptr + maxCount); + char* end = end2 < ptr || end1 <= end2 ? end1 : end2; + for (;;) { + char c = *ptr; + if (c != pStr[0]) { + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } else { + Debug.Assert(ptr + strLength <= PtrEnd); + if (strLength == 1 || (ptr[1] == pStr[1] && + (strLength == 2 || (ptr[2] == pStr[2] && + (strLength == 3 || Rest3OfStringEquals(ptr, pStr, strLength)))))) + { + foundString = true; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + c = *ptr; + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + lineBegin = ptr; + ++lineOffset; + ++nCRLF; + if (end < end1) ++end; + else if (ptr > end) break; + continue; + } + } else if (c != '\n') continue; + lineBegin = ptr; + ++lineOffset; + } // for + if (ptr < end1) { + foundString = false; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + } + } + return SkipCharsOrNewlinesUntilStringContinue(ptr, lineBegin, lineOffset, nCRLF, pStr, strLength, maxCount, out foundString); + } + } + private int SkipCharsOrNewlinesUntilStringContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, + char* pStr, int strLength, int maxCount, out bool foundString) + { + var stateTag = StateTag; + foundString = false; + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + uint index = Buffer.PositiveDistance(Ptr, ptr); + char c = SkipAndPeek((uint)index); + int count = (int)index - nCRLF; + for (;;) { + if (c != pStr[0] || !Match(pStr, strLength)) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') { + c = SkipAndPeek(); + } + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } else { + foundString = true; + break; + } + } + StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); + return count; + } + + public int SkipCharsOrNewlinesUntilString( + string str, int maxCount, bool normalizeNewlines, + out string skippedCharsIfStringFoundOtherwiseNull) + { + int strLength = str.Length; // throws if str is null + if (strLength == 0) throw new ArgumentException("The string argument is empty."); + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + fixed (char* pStr = str) { + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + char* ptr = Ptr; + if (ptr != null) { + char* end1 = unchecked(PtrEnd - strLength); + if (end1 >= ptr && end1 < PtrEnd) { + char* end2 = unchecked(ptr + maxCount); + char* end = end2 < ptr || end1 <= end2 ? end1 : end2; + for (;;) { + char c = *ptr; + if (c != pStr[0]) { + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } else { + Debug.Assert(ptr + strLength <= PtrEnd); + if (strLength == 1 || (ptr[1] == pStr[1] && + (strLength == 2 || (ptr[2] == pStr[2] && + (strLength == 3 || Rest3OfStringEquals(ptr, pStr, strLength)))))) + { + char* ptr0 = Ptr; + if (ptr != ptr0) { + Ptr = ptr; + int length = (int)Buffer.PositiveDistance(ptr0, ptr); + if (lineOffset == 0) { + if (length != 0) ++StateTag; + skippedCharsIfStringFoundOtherwiseNull = new string(ptr0, 0, length); + return length; + } else { + RegisterNewlines(lineBegin, lineOffset); + skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlines || (nCR | nCRLF) == 0 + ? new string(ptr0, 0, length) + : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); + return length - nCRLF; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = ""; + return 0; + } + } + c = *ptr; + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + lineBegin = ptr; + ++lineOffset; + ++nCRLF; + if (end < end1) ++end; + else if (ptr > end) break; + continue; + } else { + ++nCR; + } + } else if (c != '\n') continue; + lineBegin = ptr; + ++lineOffset; + } // for + if (ptr < end1) { + skippedCharsIfStringFoundOtherwiseNull = null; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + } + } + return SkipCharsOrNewlinesUntilStringContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, pStr, strLength, maxCount, normalizeNewlines, out skippedCharsIfStringFoundOtherwiseNull); + } + } + private int SkipCharsOrNewlinesUntilStringContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, + char* pStr, int strLength, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) + { + var stateTag = StateTag; + var indexToken = IndexToken; + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + uint index = Buffer.PositiveDistance(Ptr, ptr); + int count = (int)index - nCRLF; + char c = SkipAndPeek(index); + for (;;) { + if (c != pStr[0] || !Match(pStr, strLength)) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = SkipAndPeek(); + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') { + c = SkipAndPeek(); + ++nCRLF; + } else { + ++nCR; + } + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } else { // found string + if (count != 0) { + StateTag = unchecked(stateTag + 1); + var s = ReadFrom(indexToken); + if (!normalizeNewlines || (nCR | nCRLF) == 0) { + skippedCharsIfStringFoundOtherwiseNull = s; + return count; + } else { + fixed (char* ps = s) + skippedCharsIfStringFoundOtherwiseNull = Text.CopyWithNormalizedNewlines(ps, s.Length, nCRLF, nCR); + return count; + } + } else { + StateTag = stateTag; + skippedCharsIfStringFoundOtherwiseNull = ""; + return 0; + } + } + } + StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); + skippedCharsIfStringFoundOtherwiseNull = null; + return count; + } + + public int SkipCharsOrNewlinesUntilCaseFoldedString( + string caseFoldedString, int maxCount, + out bool foundString) + { + int strLength = caseFoldedString.Length; // throws if str is null + if (strLength == 0) throw new ArgumentException("The string argument is empty."); + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + char* lineBegin = null; + fixed (char* pStr = caseFoldedString) { + uint lineOffset = 0; + int nCRLF = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd = PtrEnd; + char* end1 = unchecked(bufferEnd - strLength); + if (end1 >= ptr && end1 < bufferEnd) { + char* end2 = unchecked(ptr + maxCount); + char* end = end2 < ptr || end1 <= end2 ? end1 : end2; + + char* cftable = CaseFoldTable.FoldedChars; + for (;;) { + char c = cftable[*ptr]; + if (c != pStr[0]) { + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } else { + Debug.Assert(ptr + strLength <= PtrEnd); + if (strLength == 1 || (cftable[ptr[1]] == pStr[1] && + (strLength == 2 || (cftable[ptr[2]] == pStr[2] && + (strLength == 3 || Rest3OfStringEqualsCaseFolded(ptr, pStr, strLength)))))) + { + foundString = true; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + c = *ptr; // we don't need to casefold here + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + lineBegin = ptr; + ++lineOffset; + ++nCRLF; + if (end < end1) ++end; + else if (ptr > end) break; + continue; + } + } else if (c != '\n') continue; + lineBegin = ptr; + ++lineOffset; + } // for + if (ptr < end1) { + foundString = false; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + } + } + return SkipCharsOrNewlinesUntilCaseFoldedStringContinue(ptr, lineBegin, lineOffset, nCRLF, pStr, strLength, maxCount, out foundString); + } + } + private int SkipCharsOrNewlinesUntilCaseFoldedStringContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, + char* pStr, int strLength, int maxCount, out bool foundString) + { + var stateTag = StateTag; + foundString = false; + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + uint index = Buffer.PositiveDistance(Ptr, ptr); + char* cftable = CaseFoldTable.FoldedChars; + char c = cftable[SkipAndPeek((uint)index)]; + int count = (int)index - nCRLF; + for (;;) { + if (c != pStr[0] || !MatchCaseFolded(pStr, strLength)) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = cftable[SkipAndPeek()]; + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') { + c = cftable[SkipAndPeek()]; + ++nCRLF; + } + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } else { + foundString = true; + break; + } + } + StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); + return count; + } + + public int SkipCharsOrNewlinesUntilCaseFoldedString( + string caseFoldedString, int maxCount, bool normalizeNewlines, + out string skippedCharsIfStringFoundOtherwiseNull) + { + int strLength = caseFoldedString.Length; // throws if str is null + if (strLength == 0) throw new ArgumentException("The string argument is empty."); + if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); + fixed (char* pStr = caseFoldedString) { + char* lineBegin = null; + uint lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + char* ptr = Ptr; + if (ptr != null) { + char* bufferEnd = PtrEnd; + char* end1 = unchecked(bufferEnd - strLength); + if (end1 >= ptr && end1 < bufferEnd) { + char* end2 = unchecked(ptr + maxCount); + char* end = end2 < ptr || end1 <= end2 ? end1 : end2; + char* cftable = CaseFoldTable.FoldedChars; + for (;;) { + char c = cftable[*ptr]; + if (c != pStr[0]) { + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } else { + Debug.Assert(ptr + strLength <= PtrEnd); + if (strLength == 1 || (cftable[ptr[1]] == pStr[1] && + (strLength == 2 || (cftable[ptr[2]] == pStr[2] && + (strLength == 3 || Rest3OfStringEqualsCaseFolded(ptr, pStr, strLength)))))) + { + char* ptr0 = Ptr; + if (ptr != ptr0) { + Ptr = ptr; + int length = (int)Buffer.PositiveDistance(ptr0, ptr); + if (lineOffset == 0) { + if (length != 0) ++StateTag; + skippedCharsIfStringFoundOtherwiseNull = new string(ptr0, 0, length); + return length; + } else { + RegisterNewlines(lineBegin, lineOffset); + skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlines || (nCR | nCRLF) == 0 + ? new string(ptr0, 0, length) + : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); + return length - nCRLF; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = ""; + return 0; + } + } + c = *ptr; // we don't need to casefold here + if (ptr == end) break; + ++ptr; + if (c > '\r' || c == '\t') continue; + } + if (c == '\r') { + if (*ptr == '\n') { + ++ptr; + lineBegin = ptr; + ++lineOffset; + ++nCRLF; + if (end < end1) ++end; + else if (ptr > end) break; + continue; + } else { + ++nCR; + } + } else if (c != '\n') continue; + lineBegin = ptr; + ++lineOffset; + } // for + if (ptr < end1) { + skippedCharsIfStringFoundOtherwiseNull = null; + int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; + Ptr = ptr; + if (lineOffset == 0) { + if (count != 0) ++StateTag; + return count; + } else { + RegisterNewlines(lineBegin, lineOffset); + return count; + } + } + } + } + return SkipCharsOrNewlinesUntilCaseFoldedStringContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, pStr, strLength, maxCount, normalizeNewlines, out skippedCharsIfStringFoundOtherwiseNull); + } + } + private int SkipCharsOrNewlinesUntilCaseFoldedStringContinue( + char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, + char* pStr, int strLength, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) + { + var stateTag = StateTag; + var indexToken = IndexToken; + if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); + uint index = Buffer.PositiveDistance(Ptr, ptr); + int count = (int)index - nCRLF; + char* cftable = CaseFoldTable.FoldedChars; + char c = cftable[SkipAndPeek(index)]; + for (;;) { + if (c != pStr[0] || !MatchCaseFolded(pStr, strLength)) { + if (c == EOS || count == maxCount) break; + ++count; + char c0 = c; + c = cftable[SkipAndPeek()]; + if (c0 <= '\r') { + if (c0 == '\r') { + if (c == '\n') { + c = cftable[SkipAndPeek()]; + ++nCRLF; + } else { + ++nCR; + } + } else if (c0 != '\n') continue; + RegisterNewline(); + } + } else { // found string + if (count != 0) { + StateTag = unchecked(stateTag + 1); + var s = ReadFrom(indexToken); + if ((nCR | nCRLF) == 0 || !normalizeNewlines) { + skippedCharsIfStringFoundOtherwiseNull = s; + return count; + } else { + fixed (char* ps = s) + skippedCharsIfStringFoundOtherwiseNull = Text.CopyWithNormalizedNewlines(ps, s.Length, nCRLF, nCR); + return count; + } + } else { + StateTag = stateTag; + skippedCharsIfStringFoundOtherwiseNull = ""; + return 0; + } + } + } + StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); + skippedCharsIfStringFoundOtherwiseNull = null; + return count; + } +} + +public unsafe struct CharStreamState { +#if DEBUG + internal readonly CharStream CharStream; + private long Index { get { return GetIndex(CharStream); } } +#endif + internal readonly char* Ptr; + internal readonly int Block; +#if SMALL_STATETAG + public readonly int Tag; +#else + public readonly long Tag; +#endif + public readonly long Line; + public readonly long LineBegin; + public readonly TUserState UserState; + public readonly string Name; + + // Public (though undocumented) as long as the .NET JIT doesn't + // always inline CharStream.State + public CharStreamState(CharStream charStream) { + #if DEBUG + CharStream = charStream; + #endif + Ptr = charStream.Ptr; + Block = charStream.Block; + Tag = charStream.StateTag; + Line = charStream._Line; + LineBegin = charStream._LineBegin; + UserState = charStream._UserState; + Name = charStream._Name; + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public CharStreamIndexToken IndexToken { get { + if (Line <= 0) // tests for a zero-initialized state + throw new InvalidOperationException("The CharStreamState is invalid."); + + return new CharStreamIndexToken( + #if DEBUG + CharStream, + #endif + Ptr, + Block); + } } + + // On .NET calling an instance method of a generic struct can be more + // expensive than calling an instance method of a generic class + // (when the type parameter value is not statically known at the call + // site and isn't a value type that makes the .NET JIT specialize + // the code). + // + // Moving the actual implementations of the following methods into + // the CharStream class allows the .NET JIT to inline them, + // so that we effectively replace struct method calls with cheaper + // class method calls. + + public long GetIndex(CharStream charStreamFromWhichStateWasRetrieved) { + return charStreamFromWhichStateWasRetrieved.GetIndex(ref this); + } + + public Position GetPosition(CharStream charStreamFromWhichStateWasRetrieved) { + return charStreamFromWhichStateWasRetrieved.GetPosition(ref this); + } +} + +/// Provides read‐access to a sequence of UTF‐16 chars. +public unsafe sealed class CharStream : CharStream { + // we don't have a public constructor that only takes a string to avoid potential confusion with a filepath constructor + internal CharStream(string chars) : base(chars) { } + + public CharStream(string chars, int index, int length) : base(chars, index, length) {} + + public CharStream(string chars, int index, int length, long streamIndexOffset) + : base(chars, index, length, streamIndexOffset) { } + + public CharStream(char[] chars, int index, int length) : base(chars, index, length) { } + + public CharStream(char[] chars, int index, int length, long streamIndexOffset) + : base(chars, index, length, streamIndexOffset) { } + + public CharStream(char* chars, int length) : base(chars, length) { } + + public CharStream(char* chars, int length, long streamIndexOffset) + : base(chars, length, streamIndexOffset) { } + + internal CharStream(string chars, char* pChars, char* begin, int length) + : base(chars, pChars, begin, length) { } + + public CharStream(string path, Encoding encoding) + : base(path, encoding) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : base(path, encoding, detectEncodingFromByteOrderMarks) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, + int blockSize, int blockOverlap, int byteBufferLength) + : base(path, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, byteBufferLength) { } + + public CharStream(Stream stream, Encoding encoding) + : base(stream, encoding) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding) + : base(stream, leaveOpen, encoding) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks) { } + + public CharStream(Stream stream, bool leaveOpen, + Encoding encoding, bool detectEncodingFromByteOrderMarks, + int blockSize, int blockOverlap, int byteBufferLength) + : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, + blockSize, blockOverlap, byteBufferLength) {} + + internal TUserState _UserState; + public TUserState UserState { + get { return _UserState; } + set { _UserState = value; ++StateTag; } + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public CharStreamState State { get { + return new CharStreamState(this); + } } + + + // GetIndex and GetPosition are helper methods for CharStreamState + + internal long GetIndex(ref CharStreamState state) { + if (state.Line <= 0) // tests for a zero-initialized state + throw new InvalidOperationException("The CharStreamState is invalid."); + #if DEBUG + Debug.Assert(this == state.CharStream); + #endif + return GetIndex(state.Ptr, state.Block); + } + + internal Position GetPosition(ref CharStreamState state) { + if (state.Line <= 0) // tests for a zero-initialized state + throw new InvalidOperationException("The CharStreamState is invalid."); + #if DEBUG + Debug.Assert(this == state.CharStream); + #endif + long index = GetIndex(state.Ptr, state.Block); + return new Position(state.Name, index, state.Line, index - state.LineBegin + 1); + } + + // Passing a large struct by value is suboptimal, so for optimization purposes + // we define internal overloads that take ref arguments. Unfortunately, C#/F# + // doesn't have const-refs, so we can't make these overloads public (at least, + // not without risking heart attacks within certain user demographics of this library). + // An alternative would be to move the following methods into the CharStreamState class, + // but IMHO the resulting API would feel less intuitive and be somewhat less disoverable. + + public void BacktrackTo(CharStreamState state) { + BacktrackTo(ref state); + } + internal void BacktrackTo(ref CharStreamState state) { + if (state.Line <= 0) // tests for zero-initialized states + throw new ArgumentException("The CharStreamState is invalid."); + #if DEBUG + Debug.Assert(this == state.CharStream); + #endif + Seek(state.Ptr, state.Block); + StateTag = state.Tag; + _Line = state.Line; + _LineBegin = state.LineBegin; + _UserState = state.UserState; + _Name = state.Name; + } + + public string ReadFrom(CharStreamState stateWhereStringBegins, bool normalizeNewlines) { + return ReadFrom(ref stateWhereStringBegins, normalizeNewlines); + } + internal string ReadFrom(ref CharStreamState stateWhereStringBegins, bool normalizeNewlines) { + if (stateWhereStringBegins.Line <= 0) // tests for zero-initialized states + throw new ArgumentException("The CharStreamState is invalid."); + #if DEBUG + Debug.Assert(this == stateWhereStringBegins.CharStream); + #endif + string str = ReadFrom(stateWhereStringBegins.Ptr, stateWhereStringBegins.Block); + if (!normalizeNewlines || _Line == stateWhereStringBegins.Line) return str; + return Text.NormalizeNewlines(str); + } + + public CharStream CreateSubstream(CharStreamState stateWhereSubstreamBegins) { + return CreateSubstream(ref stateWhereSubstreamBegins); + } + internal CharStream CreateSubstream(ref CharStreamState stateWhereSubstreamBegins) { + if (stateWhereSubstreamBegins.Line <= 0) // tests for zero-initialized states + throw new ArgumentException("The CharStreamState is invalid."); + #if DEBUG + Debug.Assert(this == stateWhereSubstreamBegins.CharStream); + #endif + CharStream subStream; + if (IsSingleBlockStream) { + // the CharStream has only one block, so its safe to + // construct a new CharStream from a pointer into the original buffer + char* ptr0 = stateWhereSubstreamBegins.Ptr; + if (ptr0 == null) ptr0 = BufferEnd; + char* end = Ptr; + if (end == null) end = BufferEnd; + if (end < ptr0) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); + int length = (int)Buffer.PositiveDistance(ptr0, end); + subStream = new CharStream(BufferString, BufferStringPointer, ptr0, length); + var indexOfFirstChar = Buffer.PositiveDistance(BufferBegin, ptr0) + _IndexOfFirstChar; + subStream.IndexOfFirstCharInBlock = indexOfFirstChar; + subStream._IndexOfFirstChar = indexOfFirstChar; + } else if (Block == stateWhereSubstreamBegins.Block && Ptr != null && stateWhereSubstreamBegins.Ptr != null) { + char* ptr0 = stateWhereSubstreamBegins.Ptr; + char* end = Ptr; + if (end < ptr0) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); + int length = (int)Buffer.PositiveDistance(ptr0, end); + string subString = new String(ptr0, 0, length); + subStream = new CharStream(subString); + var indexOfFirstChar = Buffer.PositiveDistance(BufferBegin, ptr0) + _IndexOfFirstChar; + subStream.IndexOfFirstCharInBlock = indexOfFirstChar; + subStream._IndexOfFirstChar = indexOfFirstChar; + } else { + var subString = ReadFrom(ref stateWhereSubstreamBegins, false); + subStream = new CharStream(subString); + var indexOfFirstChar = GetIndex(stateWhereSubstreamBegins.Ptr, stateWhereSubstreamBegins.Block); + subStream.IndexOfFirstCharInBlock = indexOfFirstChar; + subStream._IndexOfFirstChar = indexOfFirstChar; + } + subStream.StateTag = stateWhereSubstreamBegins.Tag; + subStream._Line = stateWhereSubstreamBegins.Line; + subStream._LineBegin = stateWhereSubstreamBegins.LineBegin; + subStream._Name = stateWhereSubstreamBegins.Name; + #if DEBUG + ++SubstreamCount.Value; + subStream.ParentSubstreamCount = SubstreamCount; + #endif + return subStream; + } +} + +} + +#endif // !LOW_TRUST diff --git a/src/FParsecCS/CharStreamLT.cs b/src/FParsecCS/CharStreamLT.cs new file mode 100644 index 0000000..7ecad8b --- /dev/null +++ b/src/FParsecCS/CharStreamLT.cs @@ -0,0 +1,1948 @@ +// Copyright (c) Stephan Tolksdorf 2007-2011 +// License: Simplified BSD License. See accompanying documentation. + +#if LOW_TRUST + +using System; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using Microsoft.FSharp.Core; + +namespace FParsec { + + +/// An opaque representation of a CharStream index. +public struct CharStreamIndexToken { +#if DEBUG + internal readonly CharStream CharStream; + private long Index { get { return GetIndex(CharStream); } } +#endif + private readonly int IdxPlus1; + /// Returns -1 if the IndexToken was zero-initialized. + internal int Idx { get { return unchecked(IdxPlus1 - 1); } } + + internal CharStreamIndexToken( + #if DEBUG + CharStream charStream, + #endif + int idx) + { + #if DEBUG + CharStream = charStream; + #endif + IdxPlus1 = unchecked(idx + 1); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidIndexToken() { + throw new InvalidOperationException("The CharStreamIndexToken is invalid."); + } + + public long GetIndex(CharStream charStreamFromWhichIndexTokenWasRetrieved) { + int idx = Idx; + if (idx == -1) ThrowInvalidIndexToken(); // tests for a zero-initialized IndexToken + #if DEBUG + Debug.Assert(CharStream == charStreamFromWhichIndexTokenWasRetrieved); + #endif + return charStreamFromWhichIndexTokenWasRetrieved.GetIndex(idx); + } +} + +public struct TwoChars : IEquatable { + private uint Chars; + + internal TwoChars(uint chars) { + Chars = chars; + } + public TwoChars(char char0, char char1) { + Chars = ((uint)char1 << 16) | (uint)char0; + } + + public char Char0 { get { return unchecked((char)Chars); } } + public char Char1 { get { return (char)(Chars >> 16); } } + + public override bool Equals(object obj) { return (obj is TwoChars) && Chars == ((TwoChars) obj).Chars; } + public bool Equals(TwoChars other) { return Chars == other.Chars; } + public override int GetHashCode() { return unchecked((int)Chars); } + public static bool operator==(TwoChars left, TwoChars right) { return left.Chars == right.Chars; } + public static bool operator!=(TwoChars left, TwoChars right) { return left.Chars != right.Chars; } +} + +/// Provides read‐access to a sequence of UTF‐16 chars. +public class CharStream : IDisposable { + private const int DefaultByteBufferLength = (1 << 12); + private static int MinimumByteBufferLength = 128; // must be larger than longest detectable preamble (we can only guess here) + private const char EOS = '\uFFFF'; + + public const char EndOfStreamChar = EOS; + + public int BlockOverlap { get { return 0; } } + + public int MinRegexSpace { get { return 0; } + set { } } + + internal String String; + + /// The current index in the string, or Int32.MinValue if the end of the stream has been reached. + internal int Idx; + + /// Index of the first char in the string belonging to the stream. Is always non-negative. + internal int IndexBegin; + /// 1 + index of the last char in the string belonging to the stream. Equals IndexBegin if the stream is empty. + internal int IndexEnd; + + /// Any CharStream method or property setter increments this value when it changes the CharStream state. + /// Backtracking to an old state also restores the old value of the StateTag. + public +#if SMALL_STATETAG + uint +#else + ulong +#endif + StateTag; + + /// IndexOfFirstChar - IndexBegin + internal long StringToStreamIndexOffset; + + public long IndexOfFirstChar { get { return (uint)IndexBegin + StringToStreamIndexOffset; } } + public long IndexOfLastCharPlus1 { get { return (uint)IndexEnd + StringToStreamIndexOffset; } } + + public long Index { get { + // return GetIndex(Idx); + if (Idx >= 0) { + Debug.Assert(Idx >= IndexBegin && Idx < IndexEnd); + return (uint)Idx + StringToStreamIndexOffset; + } else { + Debug.Assert(Idx == Int32.MinValue); + return (uint)IndexEnd + StringToStreamIndexOffset; + } + } } + internal long GetIndex(int idx) { + if (idx >= 0) { + Debug.Assert(idx >= IndexBegin && idx < IndexEnd); + return (uint)idx + StringToStreamIndexOffset; + } else { + Debug.Assert(idx == Int32.MinValue); + return (uint)IndexEnd + StringToStreamIndexOffset; + } + } + + /// Indicates whether the Iterator points to the beginning of the CharStream. + /// If the CharStream is empty, this property is always true. + public bool IsBeginOfStream { get { + return Idx == IndexBegin || (Idx < 0 && IndexBegin == IndexEnd); + } } + + /// Indicates whether the Iterator points to the end of the CharStream, + /// i.e. whether it points to one char beyond the last char in the CharStream. + public bool IsEndOfStream { get { return Idx < 0; } } + + internal long _Line; + public long Line { get { return _Line; } } + public void SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(long line) { + _Line = line; + } + + internal long _LineBegin; + public long LineBegin { get { return _LineBegin; } } + public void SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(long lineBegin) { + _LineBegin = lineBegin; + } + + /// The UTF‐16 column number of the next char, i.e. Index ‐ LineBegin + 1. + public long Column { get { return Index - LineBegin + 1; } } + + internal string _Name; + public string Name { + get { return _Name; } + set { _Name = value; ++StateTag; } + } + + public Encoding Encoding { get; private set; } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public Position Position { get { + long index = Index; + return new Position(_Name, index, Line, index - LineBegin + 1); + } } + + internal CharStream(string chars) { + Debug.Assert(chars != null); + String = chars; + Encoding = Encoding.Unicode; + _Line = 1; + var length = chars.Length; + if (length != 0) { + // Idx = 0 + IndexEnd = length; + } else { + Idx = Int32.MinValue; + // IndexEnd = 0 + } + } + + public CharStream(string chars, int index, int length) : this(chars, index, length, 0) {} + + public CharStream(string chars, int index, int length, long streamBeginIndex) { + if (chars == null) throw new ArgumentNullException("chars"); + if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); + if (streamBeginIndex < 0 || streamBeginIndex >= (1L << 60)) throw new ArgumentOutOfRangeException("streamBeginIndex", "streamBeginIndex must be non-negative and less than 2^60."); + int indexEnd = unchecked(index + length); + if (indexEnd < index || indexEnd > chars.Length) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); + String = chars; + Encoding = Encoding.Unicode; + _Line = 1; + Idx = length == 0 ? Int32.MinValue : index; + IndexBegin = index; + IndexEnd = indexEnd; + _LineBegin = streamBeginIndex; + StringToStreamIndexOffset = streamBeginIndex - index; + } + +#if !PCL + public CharStream(string path, Encoding encoding) + : this(path, encoding, true, DefaultByteBufferLength) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : this(path, encoding, detectEncodingFromByteOrderMarks, DefaultByteBufferLength) { } + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { + if (encoding == null) throw new ArgumentNullException("encoding"); + Stream stream = new FileStream(path, FileMode.Open, FileAccess.Read, + FileShare.Read, 4096, FileOptions.SequentialScan); + try { + StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks, byteBufferLength); + _Name = path; + } catch { + stream.Dispose(); + throw; + } + } +#endif + + public CharStream(Stream stream, Encoding encoding) + : this(stream, false, encoding, true, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding) + : this(stream, leaveOpen, encoding, true, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : this(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, DefaultByteBufferLength) { } + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { + if (stream == null) throw new ArgumentNullException("stream"); + if (!stream.CanRead) throw new ArgumentException("stream is not readable"); + if (encoding == null) throw new ArgumentNullException("encoding"); + StreamConstructorContinue(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, byteBufferLength); + } + + private void StreamConstructorContinue(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { + // the ByteBuffer must be larger than the longest detectable preamble + if (byteBufferLength < MinimumByteBufferLength) byteBufferLength = MinimumByteBufferLength; + + int remainingBytesCount = -1; + long streamPosition; + if (stream.CanSeek) { + streamPosition = stream.Position; + long remainingBytesCount64 = stream.Length - streamPosition; + if (remainingBytesCount64 <= Int32.MaxValue) { + remainingBytesCount = (int)remainingBytesCount64; + if (remainingBytesCount < byteBufferLength) byteBufferLength = remainingBytesCount; + } + } else { + streamPosition = 0; + } + + // byteBufferLength should be larger than the longest detectable preamble + byte[] byteBuffer = new byte[byteBufferLength]; + int byteBufferCount = 0; + bool flush = false; + do { + int n = stream.Read(byteBuffer, byteBufferCount, byteBuffer.Length - byteBufferCount); + if (n == 0) { + remainingBytesCount = byteBufferCount; + flush = true; + break; + } + byteBufferCount += n; + } while (byteBufferCount < MinimumByteBufferLength); + streamPosition += byteBufferCount; + + int preambleLength = Text.DetectPreamble(byteBuffer, byteBufferCount, ref encoding, detectEncodingFromByteOrderMarks); + remainingBytesCount -= preambleLength; + Encoding = encoding; + _Line = 1; + if (remainingBytesCount != 0) { + int charBufferLength = encoding.GetMaxCharCount(byteBufferLength); // might throw + char[] charBuffer = new char[charBufferLength]; + int stringBufferCapacity = 2*charBufferLength; + if (remainingBytesCount > 0) { + try { + stringBufferCapacity = encoding.GetMaxCharCount(remainingBytesCount); // might throw + } catch (ArgumentOutOfRangeException) { } + } + var sb = new StringBuilder(stringBufferCapacity); + var decoder = encoding.GetDecoder(); + Debug.Assert(preambleLength < byteBufferCount); + int byteBufferIndex = preambleLength; + for (;;) { + try { + int charBufferCount = decoder.GetChars(byteBuffer, byteBufferIndex, byteBufferCount - byteBufferIndex, charBuffer, 0, flush); + sb.Append(charBuffer, 0, charBufferCount); + } catch (DecoderFallbackException e) { + e.Data.Add("Stream.Position", streamPosition - (byteBufferCount - byteBufferIndex) + e.Index); + throw; + } + if (flush) break; + byteBufferIndex = 0; + byteBufferCount = stream.Read(byteBuffer, 0, byteBuffer.Length); + streamPosition += byteBufferCount; + flush = byteBufferCount == 0; + } + String = sb.ToString(); + if (!leaveOpen) stream.Dispose(); + } else { + String = ""; + } + if (String.Length != 0) { + // Idx = 0 + IndexEnd = String.Length; + } else { + Idx = Int32.MinValue; + // IndexEnd = 0 + } + } + + /// The low trust version of the CharStream class implements the IDisposable + /// interface only for API compatibility. The Dispose method does not need to be called on + /// low trust CharStream instances, because the instances hold no resources that need to be disposed. + public void Dispose() {} + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification="The CharStream is manually disposed.")] + public static T ParseString(string chars, int index, int length, + FSharpFunc,T> parser, + TUserState userState, + string streamName) + { + var stream = new CharStream(chars, index, length); + stream.UserState = userState; + stream._Name = streamName; + return parser.Invoke(stream); + } + + public void Seek(long index) { + long idx = unchecked(index - StringToStreamIndexOffset); + if (idx >= IndexBegin && idx < IndexEnd) { + Idx = (int)idx; + ++StateTag; + return; + } + if (index < IndexOfFirstChar) + throw (new ArgumentOutOfRangeException("index", "The index is negative or less than the IndexOfFirstChar.")); + ++StateTag; + Idx = Int32.MinValue; + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public CharStreamIndexToken IndexToken { get { + return new CharStreamIndexToken( + #if DEBUG + this, + #endif + Idx + ); + } } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidIndexToken() { + throw new ArgumentException("The CharStreamIndexToken is invalid."); + } + + public void Seek(CharStreamIndexToken indexToken) { + int idx = indexToken.Idx; + if (idx == -1) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens + #if DEBUG + Debug.Assert(this == indexToken.CharStream); + #endif + Idx = idx; + Debug.Assert((Idx >= IndexBegin && Idx < IndexEnd) || Idx == Int32.MinValue); + ++StateTag; + } + + public string ReadFrom(CharStreamIndexToken indexToken) { + int idx = indexToken.Idx; + if (idx == -1) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens + #if DEBUG + Debug.Assert(this == indexToken.CharStream); + #endif + return ReadFrom(idx); + } + + internal string ReadFrom(int idx0) { + if (idx0 >= 0) { + Debug.Assert(idx0 >= IndexBegin && idx0 < IndexEnd); + if (idx0 <= Idx) + return String.Substring(idx0, Idx - idx0); + if (Idx < 0) + return String.Substring(idx0, IndexEnd - idx0); + } else { + Debug.Assert(idx0 == Int32.MinValue); + if (Idx < 0) return ""; + } + throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamIndexToken/CharStreamState."); + } + + public void RegisterNewline() { + ++_Line; + var index = Index; + Debug.Assert(index != _LineBegin); + _LineBegin = index; + ++StateTag; + } + + private void RegisterNewLineBegin(int stringLineBegin, int lineOffset) { + Debug.Assert(lineOffset > 0 + && ((Idx >= stringLineBegin && Idx < IndexEnd) || Idx == Int32.MinValue) + && stringLineBegin >= IndexBegin && stringLineBegin <= IndexEnd); + _Line += lineOffset; + long newLineBegin = (uint)stringLineBegin + StringToStreamIndexOffset; + Debug.Assert(newLineBegin > _LineBegin); + _LineBegin = newLineBegin; + ++StateTag; + } + + public void RegisterNewlines(int lineOffset, int newColumnMinus1) { + _Line += lineOffset; + Debug.Assert(_Line > 0 && newColumnMinus1 >= 0); + var newLineBegin = Index - newColumnMinus1; + Debug.Assert(lineOffset != 0 && newLineBegin != _LineBegin); + _LineBegin = Index - newColumnMinus1; + ++StateTag; + } + + public void RegisterNewlines(long lineOffset, long newColumnMinus1) { + _Line += lineOffset; + Debug.Assert(_Line > 0 && newColumnMinus1 >= 0); + var newLineBegin = Index - newColumnMinus1; + Debug.Assert(lineOffset != 0 && newLineBegin != _LineBegin); + _LineBegin = Index - newColumnMinus1; + ++StateTag; + } + + + public char Peek() { + int idx = Idx; + if (idx >= 0) return String[idx]; + return EOS; + } + + public void Skip() { + int idx = Idx + 1; + if (unchecked((uint)idx) < (uint)IndexEnd) { + Idx = idx; + ++StateTag; + } else if (idx == IndexEnd) { + Idx = Int32.MinValue; + ++StateTag; + } + } + + public char Read() { + int idx = Idx; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + ++StateTag; + return c; + } + return EOS; + } + + public char SkipAndPeek() { + int idx = Idx + 1; + if (unchecked((uint)idx) < (uint)IndexEnd) { + Idx = idx; + ++StateTag; + return String[idx]; + } else if (idx == IndexEnd) { + Idx = Int32.MinValue; + ++StateTag; + } + return EOS; + } + + public TwoChars Peek2() { + int idx = Idx + 1; + if (unchecked((uint)idx) < (uint)IndexEnd) + return new TwoChars(String[idx - 1], String[idx]); + else if (idx == IndexEnd) + return new TwoChars(String[idx - 1], EOS); + else + return new TwoChars(EOS, EOS); + } + + public char Peek(uint utf16Offset) { + int n = unchecked((int)utf16Offset); + if (n >= 0) { // utf16Offset <= Int32.MaxValue + int idx = unchecked(Idx + n); + if (unchecked((uint)idx) < (uint)IndexEnd) + return String[idx]; + } + return EOS; + } + + public void Skip(uint utf16Offset) { + ++StateTag; + int n = unchecked((int)utf16Offset); + if (n >= 0) { // utf16Offset <= Int32.MaxValue + int idx = unchecked(Idx + n); + if (unchecked((uint)idx) < (uint)IndexEnd) { + Idx = idx; + return; + } + } + Idx = Int32.MinValue; + return; + } + + public char SkipAndPeek(uint utf16Offset) { + ++StateTag; + int n = unchecked((int)utf16Offset); + if (n >= 0) { // utf16Offset <= Int32.MaxValue + int idx = unchecked(Idx + n); + if (unchecked((uint)idx) < (uint)IndexEnd) { + Idx = idx; + return String[idx]; + } + } + Idx = Int32.MinValue; + return EOS; + } + + public char Peek(int utf16Offset) { + int idx = unchecked(Idx + utf16Offset); + if (utf16Offset < 0) goto Negative; + if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; + ReturnChar: + return String[idx]; + Negative: + if (Idx >= 0) { + if (idx >= IndexBegin) goto ReturnChar; + } else { + idx = IndexEnd + utf16Offset; + if (idx >= IndexBegin) goto ReturnChar; + } + EndOfStream: + return EOS; + } + + public void Skip(int utf16Offset) { + ++StateTag; + int idx = unchecked(Idx + utf16Offset); + if (utf16Offset < 0) goto Negative; + if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; + Return: + Idx = idx; + return; + Negative: + if (Idx >= 0) { + if (idx >= IndexBegin) goto Return; + } else { + idx = IndexEnd + utf16Offset; + if (idx >= IndexBegin) goto Return; + } + --StateTag; + throw new ArgumentOutOfRangeException("utf16Offset", "Index + utf16Offset is negative or less than the index of the first char in the CharStream."); + EndOfStream: + idx = Int32.MinValue; + goto Return; + } + + public void Skip(long utf16Offset) { + if (unchecked((int)utf16Offset) == utf16Offset) { + Skip((int)utf16Offset); + } else { + if (utf16Offset < 0) throw new ArgumentOutOfRangeException("utf16Offset", "Index + utf16Offset is negative or less than the index of the first char in the CharStream."); + ++StateTag; + Idx = Int32.MinValue; + } + } + + public char SkipAndPeek(int utf16Offset) { + ++StateTag; + int idx = unchecked(Idx + utf16Offset); + if (utf16Offset < 0) goto Negative; + if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; + ReturnChar: + Idx = idx; + return String[idx]; + Negative: + if (Idx >= 0) { + if (idx >= IndexBegin) goto ReturnChar; + } else { + idx = IndexEnd + utf16Offset; + if (idx >= IndexBegin) goto ReturnChar; + if (IndexBegin == IndexEnd) goto EndOfStream; + } + Idx = IndexBegin; + return EOS; + EndOfStream: + Idx = Int32.MinValue; + return EOS; + } + + public string PeekString(int length) { + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + int idx = Idx; + if (unchecked((uint)idx) + (uint)length <= (uint)IndexEnd) + return String.Substring(idx, length); + else + return idx < 0 ? "" : String.Substring(idx, IndexEnd - idx); + } + + public string Read(int length) { + if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); + ++StateTag; + var idx = Idx; + int newIdx = unchecked(idx + length); + if (unchecked((uint)newIdx) < (uint)IndexEnd) { + Idx = newIdx; + return String.Substring(idx, length); + } else { + Idx = Int32.MinValue; + return idx < 0 ? "" : String.Substring(idx, IndexEnd - idx); + } + } + + public int PeekString(char[] buffer, int bufferIndex, int length) { + return Read(buffer, bufferIndex, length, true); + } + public int Read(char[] buffer, int bufferIndex, int length) { + return Read(buffer, bufferIndex, length, false); + } + private int Read(char[] buffer, int bufferIndex, int length, bool backtrack) { + if (bufferIndex < 0) + throw new ArgumentOutOfRangeException("bufferIndex", "bufferIndex is negative."); + if (length < 0 || bufferIndex > buffer.Length - length) + throw new ArgumentOutOfRangeException("length", "bufferIndex or length is out of range."); + if (unchecked((uint)Idx) + (uint)length < (uint)IndexEnd) { + for (int i = 0; i < length; ++i) + buffer[bufferIndex + i] = String[Idx + i]; + if (!backtrack) { + Idx += length; + ++StateTag; + } + return length; + } else if (Idx >= 0) { + int n = IndexEnd - Idx; + for (int i = 0; i < n; ++i) + buffer[bufferIndex + i] = String[Idx + i]; + if (!backtrack) { + Idx = Int32.MinValue; + ++StateTag; + } + return n; + } else { + return 0; + } + } + + public bool Match(char ch) { + return Idx >= 0 && String[Idx] == ch; + } + + public bool MatchCaseFolded(char caseFoldedChar) { + return Idx >= 0 && CaseFoldTable.FoldedChars[String[Idx]] == caseFoldedChar; + } + + public bool Skip(char ch) { + int idx = Idx; + if (idx >= 0 && String[idx] == ch) { + ++idx; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + ++StateTag; + return true; + } + return false; + } + + public bool SkipCaseFolded(char caseFoldedChar) { + int idx = Idx; + if (idx >= 0 && CaseFoldTable.FoldedChars[String[idx]] == caseFoldedChar) { + ++idx; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + ++StateTag; + return true; + } + return false; + } + + public bool Skip(TwoChars twoChars) { + int idx2 = unchecked(Idx + 2); + if (unchecked((uint)idx2) < (uint)IndexEnd) { + if (String[Idx] == twoChars.Char0 && String[Idx + 1] == twoChars.Char1) { + ++StateTag; + Idx = idx2; + return true; + } + } else if (idx2 == IndexEnd && String[Idx] == twoChars.Char0 && String[Idx + 1] == twoChars.Char1) { + ++StateTag; + Idx = Int32.MinValue; + return true; + } + return false; + } + + public bool Match(string chars) { + if (unchecked((uint)Idx) + (uint)chars.Length <= (uint)IndexEnd) { + for (int i = 0; i < chars.Length; ++i) + if (chars[i] != String[Idx + i]) goto ReturnFalse; + return true; + } + return chars.Length == 0; + ReturnFalse: + return false; + } + + public bool Skip(string chars) { + int newIdx = unchecked(Idx + chars.Length); + if (unchecked((uint)newIdx) <= (uint)IndexEnd) { + for (int i = 0; i < chars.Length; ++i) + if (chars[i] != String[Idx + i]) goto ReturnFalse; + if (newIdx == IndexEnd) newIdx = Int32.MinValue; + Idx = newIdx; + ++StateTag; + return true; + } + return chars.Length == 0; + ReturnFalse: + return false; + } + + public bool MatchCaseFolded(string caseFoldedChars) { + if (unchecked((uint)Idx) + (uint)caseFoldedChars.Length <= (uint)IndexEnd) { + for (int i = 0; i < caseFoldedChars.Length; ++i) + if (caseFoldedChars[i] != CaseFoldTable.FoldedChars[String[Idx + i]]) goto ReturnFalse; + return true; + } + return caseFoldedChars.Length == 0; + ReturnFalse: + return false; + } + + public bool SkipCaseFolded(string caseFoldedChars) { + int newIdx = unchecked(Idx + caseFoldedChars.Length); + if (unchecked((uint)newIdx) <= (uint)IndexEnd) { + for (int i = 0; i < caseFoldedChars.Length; ++i) + if (caseFoldedChars[i] != CaseFoldTable.FoldedChars[String[Idx + i]]) goto ReturnFalse; + if (newIdx == IndexEnd) newIdx = Int32.MinValue; + Idx = newIdx; + ++StateTag; + return true; + } + return caseFoldedChars.Length == 0; + ReturnFalse: + return false; + } + + public bool Match(char[] chars, int charsIndex, int length) { + return Skip(chars, charsIndex, length, true); + } + public bool Skip(char[] chars, int charsIndex, int length) { + return Skip(chars, charsIndex, length, false); + } + private bool Skip(char[] chars, int charsIndex, int length, bool backtrackEvenIfCharsMatch) { + if (charsIndex < 0) + throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative."); + if (length < 0 || charsIndex > chars.Length - length) + throw new ArgumentOutOfRangeException("length", "charsIndex or length is out of range."); + int newIdx = unchecked(Idx + length); + if (unchecked((uint)newIdx) <= (uint)IndexEnd) { + for (int i = 0; i < length; ++i) + if (chars[charsIndex + i] != String[Idx + i]) goto ReturnFalse; + if (!backtrackEvenIfCharsMatch) { + if (newIdx == IndexEnd) newIdx = Int32.MinValue; + Idx = newIdx; + ++StateTag; + return true; + } + return true; + } + return length == 0; + ReturnFalse: + return false; + } + + public Match Match(Regex regex) { + if (Idx >= 0) return regex.Match(String, Idx, IndexEnd - Idx); + return regex.Match(""); + } + + public bool SkipWhitespace() { + int lineBegin = 0; + int lineOffset = 0; + int idx = Idx; + int end = IndexEnd; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c > ' ') goto ReturnFalse; + if (c == ' ') { + if (idx != end && String[idx] > ' ') { + Idx = idx; + ++StateTag; + return true; + } + goto Loop; + } else { + if (c == '\r') { + if (idx != end && String[idx] == '\n') ++idx; + } else if (c != '\n') goto CheckTab; + if (idx != end && String[idx] > ' ') { + Idx = idx; + RegisterNewline(); + return true; + } + goto Newline; + CheckTab: + if (c != '\t') goto ReturnFalse; + goto Loop; + } + Newline: + lineBegin = idx; + ++lineOffset; + Loop: + for (;;) { + if (idx != end) { + c = String[idx]; + ++idx; + if (c != ' ') { + if (c != '\t') { + if (c == '\r') { + if (idx != end && String[idx] == '\n') ++idx; + goto Newline; + } + if (c == '\n') goto Newline; + // end of whitespace + --idx; + break; + } + } + } else { // end of stream, + idx = Int32.MinValue; + break; + } + } + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + return true; + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + return true; + } + } + ReturnFalse: + return false; + } + + public bool SkipUnicodeWhitespace() { + int lineBegin = 0; + int lineOffset = 0; + int idx = Idx; + int end = IndexEnd; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c == ' ') goto Loop; + if (!Text.IsWhitespace(c)) goto ReturnFalse; + if (c <= '\r') { + if (c == '\r') { + if (idx != end && String[idx] == '\n') ++idx; + } else if (c != '\n') goto Loop; + } else if (c < '\u2028' ? c != '\u0085' : c > '\u2029') goto Loop; + Newline: + lineBegin = idx; + ++lineOffset; + Loop: + for (;;) { + if (idx != end) { + c = String[idx]; + ++idx; + if (c != ' ') { + if (Text.IsWhitespace(c)) { + if (c <= '\r') { + if (c == '\r') { + if (idx != end && String[idx] == '\n') ++idx; + goto Newline; + } + if (c == '\n') goto Newline; + } else if (c < '\u2028' ? c == '\u0085' : c <= '\u2029') goto Newline; + } else { // end of whitespace + --idx; + break; + } + } + } else { // end of stream + idx = Int32.MinValue; + break; + } + } + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + return true; + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + return true; + } + } + ReturnFalse: + return false; + } + + public bool SkipNewline() { + int idx = Idx; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') ++idx; + } else if (c != '\n') goto ReturnFalse; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + RegisterNewline(); + return true; + } +ReturnFalse: + return false; + } + + public bool SkipUnicodeNewline() { + int idx = Idx; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c < '\u0085') { + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') ++idx; + } else if (c != '\n') goto ReturnFalse; + } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + RegisterNewline(); + return true; + } + ReturnFalse: + return false; + } + + public int SkipNewlineThenWhitespace(int powerOf2TabStopDistance, bool allowFormFeed) { + int tabStopDistanceMinus1 = unchecked(powerOf2TabStopDistance - 1); + if (powerOf2TabStopDistance <= 0 || (powerOf2TabStopDistance & tabStopDistanceMinus1) != 0) + throw new ArgumentOutOfRangeException("powerOf2TabStopDistance", "powerOf2TabStopDistance must be a positive power of 2."); + + int lineBegin = 0; + int lineOffset = 0; + int idx = Idx; + int indexEnd = IndexEnd; + char c = '\u0000'; + if (idx >= 0) c = String[idx]; + ++idx; + if (c == '\r') { + if (idx != indexEnd && String[idx] == '\n') ++idx; + } else if (c != '\n') { + return -1; + } + Newline: + lineBegin = idx; + ++lineOffset; + int ind = 0; + for (;;) { + if (idx != indexEnd) { + c = String[idx]; + ++idx; + if (c == ' ') { + ind = unchecked(ind + 1); + if (ind >= 0) continue; + // indentation has overflown, so put back ' ' and return + ind = unchecked(ind - 1); + } else if (c <= '\r') { + if (c == '\r') { + if (idx != indexEnd && String[idx] == '\n') ++idx; + goto Newline; + } + if (c == '\n') goto Newline; + if (c == '\t') { + // ind = ind + tabStopDistance - ind%tabStopDistance + int d = tabStopDistanceMinus1 - (ind & tabStopDistanceMinus1); + ind = unchecked(ind + d + 1); + if (ind >= 0) continue; + // indentation has overflown, so put back '\t' and return + ind = unchecked(ind - d - 1); + } else if (c == '\f' && allowFormFeed) { + ind = 0; + continue; + } + } + // end of indentation + --idx; + break; + } else { + // end of stream; + idx = Int32.MinValue; + break; + } + } + Idx = idx; + RegisterNewLineBegin(lineBegin, lineOffset); + return ind; + } + + public void SkipRestOfLine(bool skipNewline) { + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0) { + for (;;) { + char c = String[idx]; + if (c > '\r') { + if (++idx == indexEnd) break; + } else if (c != '\r' && c != '\n') { + if (++idx == indexEnd) break; + } else { + if (!skipNewline) { + if (idx != Idx) { + Idx = idx; + ++StateTag; + } + return; + } else { + ++idx; + if (c == '\r' && idx != indexEnd && String[idx] == '\n') ++idx; + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + RegisterNewline(); + return; + } + } + } + // idx == indexEnd + { + Idx = Int32.MinValue; + ++StateTag; + } + } + } + + public string ReadRestOfLine(bool skipNewline) { + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0) { + for (;;) { + char c = String[idx]; + if (c > '\r') { + if (++idx == indexEnd) break; + } else if (c != '\r' && c != '\n') { + if (++idx == indexEnd) break; + } else { + int idx0 = Idx; + if (!skipNewline) { + if (idx != idx0) { + Idx = idx; + ++StateTag; + return String.Substring(idx0, idx - idx0); + } else { + return ""; + } + } else { + var skippedString = idx == idx0 ? "" : String.Substring(idx0, idx - idx0); + ++idx; + if (c == '\r' && idx != indexEnd && String[idx] == '\n') ++idx; + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + RegisterNewline(); + return skippedString; + } + } + } + // idx == indexEnd + { + int idx0 = Idx; + Idx = Int32.MinValue; + ++StateTag; + return String.Substring(idx0, indexEnd - idx0); + } + } + return ""; + } + + public char ReadCharOrNewline() { + int idx = Idx; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c != '\r') { + if (c != '\n') { + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + ++StateTag; + return c; + } + } else if (idx != IndexEnd && String[idx] == '\n') ++idx; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + RegisterNewline(); + return '\n'; + } + return EOS; + } + + public int SkipCharsOrNewlines(int maxCharsOrNewlines) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int idx = Idx; + if (idx >= 0 && maxCharsOrNewlines > 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; + for (;;) { + if (idx >= end) break; + char c = String[idx]; + ++idx; + if (c <= '\r') { + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != IndexEnd) ++end; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } + } + int count = idx - Idx - nCRLF; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + return 0; + } + + public string ReadCharsOrNewlines(int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0 && maxCharsOrNewlines > 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; + for (;;) { + if (idx >= end) break; + char c = String[idx]; + ++idx; + if (c <= '\r') { + if (c == '\r') { + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } else { + ++nCR; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } + } + int idx0 = Idx; + int length = idx - idx0; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + return String.Substring(idx0, length); + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 + ? String.Substring(idx0, length) + : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); + } + } + return ""; + } + + public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f) { + return SkipCharsOrNewlinesWhile(f, f); + } + public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f) { + int lineOffset = 0; + int nCRLF = 0; + int lineBegin = 0; + int idx = Idx; + int end = IndexEnd; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c > '\r') { + if (!f1.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!f1.Invoke('\n')) goto ReturnEmpty; + if (idx != end && String[idx] == '\n') { + ++idx; + ++nCRLF; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f1.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + for (;;) { + if (idx == end) goto ReturnCount; + c = String[idx]; + ++idx; + if (c > '\r') { + if (!f.Invoke(c)) break; + } else if (c == '\r') { + if (!f.Invoke('\n')) break; + if (idx != end && String[idx] == '\n') { + ++idx; + ++nCRLF; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f.Invoke(c)) break; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + } + --idx; + ReturnCount: + int count = idx - Idx - nCRLF; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + ReturnEmpty: + return 0; + } + + public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, bool normalizeNewlines) { + return ReadCharsOrNewlinesWhile(f, f, normalizeNewlines); + } + public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, bool normalizeNewlinesInReturnString) { + int lineOffset = 0; + int nCR = 0; + int nCRLF = 0; + int lineBegin = 0; + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0) { + char c = String[idx]; + ++idx; + if (c > '\r') { + if (!f1.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!f1.Invoke('\n')) goto ReturnEmpty; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + } else { + ++nCR; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f1.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + for (;;) { + if (idx == indexEnd) goto ReturnString; + c = String[idx]; + ++idx; + if (c > '\r') { + if (!f.Invoke(c)) break; + } else if (c == '\r') { + if (!f.Invoke('\n')) break; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + } else { + ++nCR; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f.Invoke(c)) break; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + } + --idx; + ReturnString: + int idx0 = Idx; + int length = idx - idx0; + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + return String.Substring(idx0, length); + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 + ? String.Substring(idx0, length) + : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); + } + } + ReturnEmpty: + return ""; + } + + public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines) { + return SkipCharsOrNewlinesWhile(f, f, minCharsOrNewlines, maxCharsOrNewlines); + } + public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0 && maxCharsOrNewlines > 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; + char c = String[idx]; + ++idx; + if (c > '\r') { + if (!f1.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!f1.Invoke('\n')) goto ReturnEmpty; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f1.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + for (;;) { + if (idx >= end) goto ReturnCount; + c = String[idx]; + ++idx; + if (c > '\r') { + if (!f.Invoke(c)) break; + } else if (c == '\r') { + if (!f.Invoke('\n')) break; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f.Invoke(c)) break; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + } + --idx; + ReturnCount: + int count = idx - Idx - nCRLF; + if (count >= minCharsOrNewlines) { + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + } + ReturnEmpty: + return 0; + } + + public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { + return ReadCharsOrNewlinesWhile(f, f, minCharsOrNewlines, maxCharsOrNewlines, normalizeNewlinesInReturnString); + } + public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + int idx = Idx; + int indexEnd = IndexEnd; + if (idx >= 0 && maxCharsOrNewlines > 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; + char c = String[idx]; + ++idx; + if (c > '\r') { + if (!f1.Invoke(c)) goto ReturnEmpty; + } else if (c == '\r') { + if (!f1.Invoke('\n')) goto ReturnEmpty; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } else { + ++nCR; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f1.Invoke(c)) goto ReturnEmpty; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + for (;;) { + if (idx >= end) goto ReturnString; + c = String[idx]; + ++idx; + if (c > '\r') { + if (!f.Invoke(c)) break; + } else if (c == '\r') { + if (!f.Invoke('\n')) break; + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } else { + ++nCR; + } + lineBegin = idx; + ++lineOffset; + } else { + if (!f.Invoke(c)) break; + if (c == '\n') { + lineBegin = idx; + ++lineOffset; + } + } + } + --idx; + ReturnString: + int idx0 = Idx; + int length = idx - idx0; + if (length - nCRLF >= minCharsOrNewlines) { + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + return String.Substring(idx0, length); + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 + ? String.Substring(idx0, length) + : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); + } + } + } + ReturnEmpty: + return ""; + } + + private static bool RestOfStringEquals(string str1, int str1Index, string str2) { + for (int i1 = str1Index + 1, i2 = 1; i2 < str2.Length; ++i1, ++i2) { + if (str1[i1] != str2[i2]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + + private static bool RestOfStringEqualsCI(string str1, int str1Index, string cfStr2) { + char[] cftable = CaseFoldTable.FoldedChars; + for (int i1 = str1Index + 1, i2 = 1; i2 < cfStr2.Length; ++i1, ++i2) { + if (cftable[str1[i1]] != cfStr2[i2]) goto ReturnFalse; + } + return true; + ReturnFalse: + return false; + } + + public int SkipCharsOrNewlinesUntilString(string str, int maxCharsOrNewlines, out bool foundString) { + if (str.Length == 0) throw new ArgumentException("The string argument is empty."); + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. + int first = str[0]; + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int idx = Idx; + int indexEnd = IndexEnd; + int end1 = indexEnd - str.Length; + if (idx >= 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; + for (;;) { + if (idx < end) { + char c = String[idx]; + if (c != first) { + ++idx; + if (c > '\r' || c == '\t') continue; + } else { + if (idx <= end1 && RestOfStringEquals(String, idx, str)) { + foundString = true; + break; + } + ++idx; + if (c > '\r') continue; + } + if (c == '\r') { + if (idx != indexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != indexEnd) ++end; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } else { + foundString = idx <= end1 && String[idx] == first && RestOfStringEquals(String, idx, str); + break; + } + } + if (idx != Idx) { + int count = idx - Idx - nCRLF; + if (idx == indexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + } else { + foundString = false; + } + return 0; + } + + public int SkipCharsOrNewlinesUntilString(string str, int maxCharsOrNewlines, bool normalizeNewlinesInOutString, out string skippedCharsIfStringFoundOtherwiseNull) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + if (str.Length == 0) throw new ArgumentException("The string argument is empty."); + // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. + int first = str[0]; + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + int idx = Idx; + int end1 = IndexEnd - str.Length; + if (idx >= 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; + for (;;) { + if (idx < end) { + char c = String[idx]; + if (c != first) { + ++idx; + if (c > '\r' || c == '\t') continue; + } else { + if (idx <= end1 && RestOfStringEquals(String, idx, str)) break; + ++idx; + if (c > '\r') continue; + } + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != IndexEnd) ++end; + } else { + ++nCR; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } else { + if (idx <= end1 && String[idx] == first && RestOfStringEquals(String, idx, str)) break; + // string not found + skippedCharsIfStringFoundOtherwiseNull = null; + if (idx != Idx) { + int count = idx - Idx - nCRLF; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + return 0; + } + } + // found string + int idx0 = Idx; + int length = idx - idx0; + if (length != 0) { + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + skippedCharsIfStringFoundOtherwiseNull = String.Substring(idx0, length); + return length; + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + skippedCharsIfStringFoundOtherwiseNull = + !normalizeNewlinesInOutString || (nCR | nCRLF) == 0 + ? String.Substring(idx0, length) + : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); + return length - nCRLF; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = ""; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = null; + } + return 0; + } + + public int SkipCharsOrNewlinesUntilCaseFoldedString(string caseFoldedString, int maxCharsOrNewlines, out bool foundString) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + if (caseFoldedString.Length == 0) throw new ArgumentException("The string argument is empty."); + // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. + int first = caseFoldedString[0]; + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int idx = Idx; + int end1 = IndexEnd - caseFoldedString.Length; + char[] cftable = CaseFoldTable.FoldedChars; + if (idx >= 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; + for (;;) { + if (idx < end) { + char c = cftable[String[idx]]; + if (c != first) { + ++idx; + if (c > '\r' || c == '\t') continue; + } else { + if (idx <= end1 && RestOfStringEqualsCI(String, idx, caseFoldedString)) { + foundString = true; + break; + } + ++idx; + if (c > '\r') continue; + } + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != IndexEnd) ++end; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } else { + foundString = idx <= end1 && cftable[String[idx]] == first && RestOfStringEqualsCI(String, idx, caseFoldedString); + break; + } + } + if (idx != Idx) { + int count = idx - Idx - nCRLF; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + } else { + foundString = false; + } + return 0; + } + + public int SkipCharsOrNewlinesUntilCaseFoldedString(string caseFoldedString, int maxCharsOrNewlines, bool normalizeNewlinesInOutString, out string skippedCharsIfStringFoundOtherwiseNull) { + if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); + if (caseFoldedString.Length == 0) throw new ArgumentException("The string argument is empty."); + // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. + int first = caseFoldedString[0]; + int lineBegin = 0; + int lineOffset = 0; + int nCRLF = 0; + int nCR = 0; + int idx = Idx; + int end1 = IndexEnd - caseFoldedString.Length; + char[] cftable = CaseFoldTable.FoldedChars; + if (idx >= 0) { + uint end2 = (uint)idx + (uint)maxCharsOrNewlines; + int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; + for (;;) { + if (idx < end) { + char c = cftable[String[idx]]; + if (c != first) { + ++idx; + if (c > '\r' || c == '\t') continue; + } else { + if (idx <= end1 && RestOfStringEqualsCI(String, idx, caseFoldedString)) break; + ++idx; + if (c > '\r') continue; + } + if (c == '\r') { + if (idx != IndexEnd && String[idx] == '\n') { + ++idx; + ++nCRLF; + if (end != IndexEnd) ++end; + } else { + ++nCR; + } + } else if (c != '\n') continue; + lineBegin = idx; + ++lineOffset; + } else { + if (idx <= end1 && cftable[String[idx]] == first && RestOfStringEqualsCI(String, idx, caseFoldedString)) break; + // string not found + skippedCharsIfStringFoundOtherwiseNull = null; + if (idx != Idx) { + int count = idx - Idx - nCRLF; + if (idx == IndexEnd) idx = Int32.MinValue; + Idx = idx; + if (lineOffset == 0) + ++StateTag; + else + RegisterNewLineBegin(lineBegin, lineOffset); + return count; + } + return 0; + } + } + // found string + int idx0 = Idx; + int length = idx - idx0; + if (length != 0) { + Idx = idx; + if (lineOffset == 0) { + ++StateTag; + skippedCharsIfStringFoundOtherwiseNull = String.Substring(idx0, length); + return length; + } else { + RegisterNewLineBegin(lineBegin, lineOffset); + skippedCharsIfStringFoundOtherwiseNull = + !normalizeNewlinesInOutString || (nCR | nCRLF) == 0 + ? String.Substring(idx0, length) + : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); + return length - nCRLF; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = ""; + } + } else { + skippedCharsIfStringFoundOtherwiseNull = null; + } + return 0; + } +} // class CharStream + + +public struct CharStreamState { +#if DEBUG + internal readonly CharStream CharStream; + private long Index { get { return GetIndex(CharStream); } } +#endif + internal readonly int Idx; +#if SMALL_STATETAG + public readonly uint Tag; +#else + public readonly ulong Tag; +#endif + public readonly long Line; + public readonly long LineBegin; + public readonly TUserState UserState; + public readonly string Name; + + public CharStreamState(CharStream charStream) { + #if DEBUG + CharStream = charStream; + #endif + Idx = charStream.Idx; + Tag = charStream.StateTag; + Line = charStream._Line; + LineBegin = charStream._LineBegin; + UserState = charStream._UserState; + Name = charStream._Name; + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidState() { + throw new InvalidOperationException("The CharStreamState is invalid."); + } + + public CharStreamIndexToken IndexToken { get { + if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state + + return new CharStreamIndexToken( + #if DEBUG + CharStream, + #endif + Idx); + } } + + public long GetIndex(CharStream charStreamFromWhichStateWasRetrieved) { + if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state + #if DEBUG + Debug.Assert(CharStream == charStreamFromWhichStateWasRetrieved); + #endif + return charStreamFromWhichStateWasRetrieved.GetIndex(Idx); + } + + public Position GetPosition(CharStream charStreamFromWhichStateWasRetrieved) { + if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state + #if DEBUG + Debug.Assert(CharStream == charStreamFromWhichStateWasRetrieved); + #endif + long index = charStreamFromWhichStateWasRetrieved.GetIndex(Idx); + return new Position(Name, index, Line, index - LineBegin + 1); + } +} + + +/// Provides read‐access to a sequence of UTF‐16 chars. +public sealed class CharStream : CharStream { + internal CharStream(string chars) : base(chars) {} + + public CharStream(string chars, int index, int length) : base(chars, index, length) {} + + public CharStream(string chars, int index, int length, long streamBeginIndex) + : base(chars, index, length, streamBeginIndex) {} + +#if !PCL + public CharStream(string path, Encoding encoding) : base(path, encoding) {} + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : base(path, encoding, detectEncodingFromByteOrderMarks) {} + + public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) + : base(path, encoding, detectEncodingFromByteOrderMarks, byteBufferLength) {} +#endif + + public CharStream(Stream stream, Encoding encoding) : base(stream, encoding) {} + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding) + : base(stream, leaveOpen, encoding) {} + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) + : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks) {} + + public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) + : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, byteBufferLength) {} + + + internal TUserState _UserState; + public TUserState UserState { + get { return _UserState; } + set { _UserState = value; ++StateTag; } + } + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + public CharStreamState State { get { + return new CharStreamState(this); + } } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + private void ThrowInvalidState() { + throw new ArgumentException("The CharStreamState is invalid."); + } + + public void BacktrackTo(CharStreamState state) { + BacktrackTo(ref state); + } + public void BacktrackTo(ref CharStreamState state) { + if (state.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states + #if DEBUG + Debug.Assert(this == state.CharStream); + #endif + Idx = state.Idx; + Debug.Assert((Idx >= IndexBegin && Idx < IndexEnd) || Idx == Int32.MinValue); + StateTag = state.Tag; + _Line = state.Line; + _LineBegin = state.LineBegin; + _UserState = state.UserState; + _Name = state.Name; + } + + public string ReadFrom(CharStreamState stateWhereStringBegins, bool normalizeNewlines) { + return ReadFrom(ref stateWhereStringBegins, normalizeNewlines); + } + public string ReadFrom(ref CharStreamState state, bool normalizeNewlines) { + if (state.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states + #if DEBUG + Debug.Assert(this == state.CharStream); + #endif + var str = ReadFrom(state.Idx); + if (!normalizeNewlines || state.Line == _Line) return str; + return Text.NormalizeNewlines(str); + } + + public CharStream CreateSubstream(CharStreamState stateWhereSubstreamBegins) { + return CreateSubstream(ref stateWhereSubstreamBegins); + } + public CharStream CreateSubstream(ref CharStreamState stateWhereSubstreamBegins) { + if (stateWhereSubstreamBegins.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states + #if DEBUG + Debug.Assert(this == stateWhereSubstreamBegins.CharStream); + #endif + int idx0 = stateWhereSubstreamBegins.Idx; + if (unchecked((uint)idx0 > (uint)Idx)) + throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); + var subStream = new CharStream(String); + subStream._Name = stateWhereSubstreamBegins.Name; + subStream.Idx = idx0 == Idx ? Int32.MinValue : idx0; + subStream.IndexBegin = idx0 < 0 ? IndexEnd : idx0; + subStream.IndexEnd = Idx < 0 ? IndexEnd : Idx; + subStream.StringToStreamIndexOffset = StringToStreamIndexOffset; + subStream._Line = stateWhereSubstreamBegins.Line; + subStream._LineBegin = stateWhereSubstreamBegins.LineBegin; + return subStream; + } +} + +} + +#endif diff --git a/src/FParsecCS/Cloning.cs b/src/FParsecCS/Cloning.cs new file mode 100644 index 0000000..f0a0717 --- /dev/null +++ b/src/FParsecCS/Cloning.cs @@ -0,0 +1,1981 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +using System.Reflection; +using System.Reflection.Emit; + +#if !LOW_TRUST + +using System; +using System.Collections.Generic; +using System.Reflection; +using System.Runtime.Serialization; +using System.Diagnostics; +using System.Reflection.Emit; + +namespace FParsec.Cloning +{ + + // The classes in this namespace provide a cloning service based on the serialization API. + + // Capturing the state of an object and/or cloning it with this API is often at least an + // order of magnitude faster than doing the same with the BinaryFormatter and a MemoryStream + // (ignoring some initial setup costs and the JITing time). + + // Some implementation details: + // + // The serialization API of the BCL (as supported by the BinaryFormatter) spans several + // interfaces, classes and attributes under the System.Runtime.Serialization namespace. + // Unfortunately the publicly available API documentation provided by Microsoft does not + // sufficiently cover certain important details of the serialization API behaviour. + // + // For example, the documentation only vaguely discusses whether serialization events + // like OnDeserialized are invoked in a certain order on multiple objects in a graph. + // It seems that many API users intuitively expect a certain ordering, at least in + // simple cases, but it is also clear that an ordering can not be guaranteed in all + // cases (e.g. in the case of a cyclic object graph). + // + // The .NET BinaryFormatter seems to attempt to invoke the deserialization events + // on dependent objects first, but its behaviour is inconsistent and in some situations + // arguably buggy. The following bug report discusses these issues in more detail: + // https://connect.microsoft.com/VisualStudio/feedback/details/549277 + // + // For the sake of compatibility with the .NET BinaryFormatter we try to mimic the + // basic principles of its behaviour. However, we certainly do not try to copy every + // bug or inconsistency. + // + // In order to be on the safe side we sort the serialized object graph topologically + // if it contains objects implementing an OnDeserialized handler or the ISerializable + // or IObjectReferenence interfaces. Since the object graph can contain cycles, we + // first identify strongly connected components using a variant of Tarjan's algorithm. + // Any OnDeserializing handler, deserialization constructor, OnDeserialized handler or + // IObjectReference.GetRealObject method (in that order) is then invoked in the + // topological order starting with the most dependent objects. Objects in a strongly + // connected component (with more than 1 object) are processed in the reverse order + // in which the objects in the component where discovered during a depth-first search + // of the serialized object graph. In a first pass the OnDeserializing handlers and + // deserialization constructors of the objects in the component are invoked. Any + // OnDeserialized handlers are then invoked in a second pass. + // OnSerializing handlers are invoked immediately before an object is serialized. + // OnSerialized handlers are invoked in an undefined order at the end of the + // serialization job (not immediately after an object's subgraph has been serialized). + // + // We only allow an object implementing IObjectReference in a cycle of the serialized + // object graph under the following conditions (which are more restrictive than + // what the .NET BinaryFormatter enforces): + // - There may only be 1 object implementing IObjectReference in a cycle. + // - The type implementing IObjectReference must not be a value type. + // - All objects containing references to the IObjectReference object must have reference types. + // - The type implementing IObjectReference must not have any OnDeserialized handler. + // - There must not be any other object in the cycle implementing ISerializable. + // + // Similar to the .NET BinaryFormatter we delay all IDeserializationCallbacks until + // the end of the deserialization of the complete object graph (not just the relevant + // subgraph). As explained in the referenced Connect bug report this behaviour has some + // severe consequences for the usefulness of IDeserializationCallbacks and the + // composability of the whole serialization API. However, for compatibility we really + // have to stick to Microsoft's design, even if in our case it would actually + // be simpler to invoke the callbacks in topological order as soon as an object's + // subgraph (and its strongly connected component) is completely deserialized. + // + // If the serialized object graph contains unboxed value type instances, any event + // handlers are invoked on boxed copies as follows: + // OnSerializing and OnSerialized handlers are not called on the original value type + // instance (which can be a field or an array element), but on a boxed copy of the + // instance. Thus, if the handler mutates the instance, the changes do not show up in the + // object graph that was serialized, though changes made by OnSerializing (but not + // OnSerialized) will show up in the deserialized object graph. This behaviour + // simplifies the implementation and is in accordance with the behaviour of the + // .NET BinaryFormatter. + // OnDeserializing and OnDeserialized handlers are invoked on a boxed value type instance + // too, but this time any changes show up in the deserialized object graph, because + // the boxed instance is copied into the deserialized object graph after the + // OnDeserialized event (at least if the instance is not part of an object cycle). + // This deviates from the BinaryFormatter behaviour in that + // the BinaryFormatter seems to copy the boxed instance into the deserialized object + // graph before the OnDeserialized event. However, since mutating the instance + // in an OnDeserialized handler has no effect when using the BinaryFormatter, + // hopefully no one causes an incompatibility with this implementation by actually trying + // to mutate the instance. (Note that mutable structs with serialization event handlers + // are extremely rare anyway). + // An IDeserializationCallback.OnDeserialization handler is invoked on the boxed instance + // after it has been copied into the deserialized object graph (and then is not copied + // again), so any changes won't show up in the deserialized unboxed value type instance + // (this holds for both the .NET BinaryFormatter and this implementation). + + + /// Contains the serialized state of on object. + public abstract class CloneImage + { + /// Deserializes the object state into a new object. + public abstract object CreateClone(); + + internal CloneImage() { } + } + + public abstract class Cloner + { + // public interface + public readonly Type Type; + + /// Returns a cloner for the given run-time type. + /// The run-time type of the objects to clone. The type must be serializable. + public static Cloner Create(Type type) + { + lock (Cache) return CreateWithoutLock(type); + } + + /// Copies the given object using the serialization API. + /// The object to clone. instance.GetType() must equal the Type the Cloner was created for. + public object Clone(object instance) + { + return CaptureImage(instance, false).CreateClone(); + } + + /// Returns an image of the given object instance. + /// The object to capture an image of. + public CloneImage CaptureImage(object instance) + { + return CaptureImage(instance, true); + } + + // internal/protected interface + + private readonly CloneEventHandlers EventHandlers; + + private Cloner(Type type, CloneEventHandlers eventHandlers) { Type = type; EventHandlers = eventHandlers; } + + internal abstract State CaptureShallowStateAndEnqueueNestedState(object value, CaptureContext captureContext); + + internal sealed class CaptureContext + { + public readonly bool IsReturnedToUser; + + public CaptureContext(bool stateIsReturnedToUser) + { + IsReturnedToUser = stateIsReturnedToUser; + } + + // currently uses a static queue, but could easily be rewritten to use an instance queue + public int GetObjectIndex(object instance, Cloner cloner) + { + Debug.Assert(instance.GetType() == cloner.Type); + int objectIndex; + if (!ObjectIndices.TryGetValue(instance, out objectIndex)) + { + objectIndex = ObjectIndices.Count; + ObjectIndices.Add(instance, objectIndex); + var item = new WorkItem { Cloner = cloner, Instance = instance }; + WorkQueue.Enqueue(item); + } + return objectIndex; + } + } + + // internal interface + + internal abstract class State + { + /// May be null. + public readonly CloneEventHandlers EventHandlers; + + /// Indices of nested objects in the object graph. May be null. + public readonly int[] ObjectIndices; + + /// May be null. + public int[] StronglyConnectedComponent; + + public abstract Type Type { get; } + public abstract object CreateUninitializedObject(); + public abstract void WriteToUninitializedObject(object instance, object[] objectGraph); + + public State(CloneEventHandlers eventHandlers, int[] objectIndices) + { + EventHandlers = eventHandlers; + ObjectIndices = objectIndices; + } + + private State() { } + public static readonly State Dummy = new DummyState(); + private sealed class DummyState : State + { + public override Type Type + { + get + { + throw new NotImplementedException(); + } + } + + public override object CreateUninitializedObject() + { + throw new NotImplementedException(); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + throw new NotImplementedException(); + } + } + } + + private static readonly StreamingContext StreamingContext = new StreamingContext(StreamingContextStates.Clone); + private static readonly FormatterConverter FormatterConverter = new FormatterConverter(); + + private static readonly Func CloneMemberwise = CreateMemberwiseCloneDelegate(); + private static Func CreateMemberwiseCloneDelegate() + { + var dynamicMethod = new DynamicMethod("InvokeMemberwiseClone", typeof(object), new Type[] { typeof(object) }, true); + var ilg = dynamicMethod.GetILGenerator(); + ilg.Emit(OpCodes.Ldarg_0); + var method = typeof(object).GetMethod("MemberwiseClone", BindingFlags.NonPublic | BindingFlags.Instance); + ilg.EmitCall(OpCodes.Call, method, null); // non-virtual call + ilg.Emit(OpCodes.Ret); + return (Func)dynamicMethod.CreateDelegate(typeof(Func)); + } + + // private data and methods + + // Cache serves as the synchronization root for the Create and CaptureImage methods + private static readonly Dictionary Cache = new Dictionary(); + + private static Cloner CreateWithoutLock(Type type) + { + Cloner cloner; + if (Cache.TryGetValue(type, out cloner)) return cloner; + + if (!type.IsSerializable) + throw new SerializationException("The type '" + type.ToString() + "' is not marked as serializable."); + + if (!type.IsArray) + { + var eventHandlers = CloneEventHandlers.Create(type); + if (eventHandlers != null && (eventHandlers.Events & CloneEvents.ISerializable) != 0) + { + cloner = new CustomSerializationCloner(type, eventHandlers); + } + else + { + bool typeIsBlittable; + var fields = GetSerializedFields(type, out typeIsBlittable); + if (typeIsBlittable && (eventHandlers == null || (eventHandlers.Events & CloneEvents.OnDeserializing) == 0)) + cloner = new BlittableCloner(type, eventHandlers, fields); + else + cloner = new NativeSerializationCloner(type, eventHandlers, fields); + } + } + else + { // array + var elementType = type.GetElementType(); + if (elementType.IsPrimitive || elementType == typeof(string)) + { + cloner = new BlittableCloner(type, null, new FieldInfo[0]); + } + else + { + var elementCloner = CreateWithoutLock(elementType); + if (elementType.IsValueType && elementCloner is BlittableCloner) + cloner = new BlittableCloner(type, null, new FieldInfo[0]); + else if (type.GetArrayRank() == 1) + cloner = new Rank1ArrayCloner(type, elementCloner); + else + cloner = new RankNArrayCloner(type, elementCloner); + } + } + + Cache.Add(type, cloner); + return cloner; + } + + // for optimization purposes CaptureImage uses some static queues + + private sealed class PhyiscalEqualityObjectComparer : System.Collections.Generic.EqualityComparer + { + public override bool Equals(object x, object y) { return x == y; } + public override int GetHashCode(object obj) + { + return System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(obj); + } + } + + private static readonly Dictionary ObjectIndices = new Dictionary(new PhyiscalEqualityObjectComparer()); + private static readonly List States = new List(); + private static readonly Queue WorkQueue = new Queue(); + private static readonly List OnSerializedList = new List(); + private static readonly List ObjectReferenceList = new List(); + + private struct WorkItem + { + public Cloner Cloner; + public object Instance; + + public WorkItem(Cloner cloner, object instance) + { + Cloner = cloner; + Instance = instance; + } + } + + private struct OnSerializedListItem + { + public CloneEventHandlers EventHandlers; + public object Instance; + + public OnSerializedListItem(CloneEventHandlers cloneEventHandlers, object instance) + { + EventHandlers = cloneEventHandlers; + Instance = instance; + } + } + + private static bool Contains(int[] arrayOrNull, int element) + { + if (arrayOrNull != null) + { + foreach (var e in arrayOrNull) + if (e == element) return true; + } + return false; + } + + private CloneImage CaptureImage(object instance, bool imageIsReturnedToUser) + { + if (instance.GetType() != Type) + throw new ArgumentException("The object instance does not have the run-time type the Cloner was created for."); + lock (Cache) + { + try + { + bool needSort = false; + + // reserve 0-index spot + ObjectIndices.Add(State.Dummy, 0); + States.Add(null); + + var captureInfo = new CaptureContext(imageIsReturnedToUser); + + ObjectIndices.Add(instance, 1); + WorkQueue.Enqueue(new WorkItem(this, instance)); + int deserializationCallbackCount = 0; + do + { + var item = WorkQueue.Dequeue(); + var cloner = item.Cloner; + if (cloner.EventHandlers == null) + { + States.Add(item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo)); + } + else if (cloner.EventHandlers.Events == CloneEvents.ISerializable) + { + States.Add(item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo)); + needSort = true; + } + else + { + var eventHandlers = cloner.EventHandlers; + if ((eventHandlers.Events & CloneEvents.OnSerializing) != 0) + eventHandlers.InvokeOnSerializing(item.Instance, StreamingContext); + if ((eventHandlers.Events & CloneEvents.OnSerialized) != 0) + OnSerializedList.Add(new OnSerializedListItem(eventHandlers, item.Instance)); + var state = item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo); + States.Add(state); + eventHandlers = state.EventHandlers; // may be different from cloner.EventHandlers (for CustomSerializationState) + if ((eventHandlers.Events & (CloneEvents.ISerializable + | CloneEvents.OnDeserialized + | CloneEvents.IObjectReference)) != 0) // + { + needSort = true; + if ((eventHandlers.Events & CloneEvents.IObjectReference) != 0) + ObjectReferenceList.Add(States.Count - 1); + } + // unfortunately the BinaryFormatter doesn't guarantee any order for IDeserializationCallbacks + if ((eventHandlers.Events & (CloneEvents.IDeserializationCallback)) != 0) + ++deserializationCallbackCount; + } + } while (WorkQueue.Count != 0); + var states = States.ToArray(); + + if (OnSerializedList.Count != 0) + { + foreach (var item in OnSerializedList) + item.EventHandlers.InvokeOnSerialized(item.Instance, StreamingContext); + } + + if (!needSort) + return new SimpleImage(states, deserializationCallbackCount); + + int[] order = ComputeTopologicalOrder(states); + if (ObjectReferenceList.Count != 0) + { + foreach (var index1 in ObjectReferenceList) + { + var state1 = states[index1]; + var scc = state1.StronglyConnectedComponent; + if (scc == null) continue; + var type1 = state1.Type; + if (type1.IsValueType) + throw new SerializationException("The serialized object graph contains a cycle that includes a value type object (type: " + type1.FullName + ") implementing IObjectReference."); + if ((state1.EventHandlers.Events & CloneEvents.OnDeserialized) != 0) + throw new SerializationException("The serialized object graph contains a cycle that includes an object (type: " + type1.FullName + ") implementing IObjectReference and also exposing an OnDeserialized handler."); + foreach (var index2 in scc) + { + if (index2 == index1) continue; + var state2 = states[index2]; + var type2 = state2.Type; + if (state2.EventHandlers != null && (state2.EventHandlers.Events & (CloneEvents.ISerializable | CloneEvents.IObjectReference)) != 0) + { + var msg = String.Format("The serialized object graph contains a cycle that includes an object (type: {0}) implementing IObjectReference and another object (type: {1}) implementing ISerializable and/or IObjectReference .", type1.FullName, type2.FullName); + throw new SerializationException(msg); + } + if (type2.IsValueType && Contains(state2.ObjectIndices, index1)) + { + var msg = String.Format("The serialized object graph contains a cycle that includes a value type object (type: {0}) referencing an IObjectReference object (type: {1}) in the same cycle.", type2.FullName, type1.FullName); + throw new SerializationException(msg); + } + } + } + } + return new OrderedImage(states, order, deserializationCallbackCount); + } + finally + { + States.Clear(); + ObjectIndices.Clear(); + if (WorkQueue.Count != 0) WorkQueue.Clear(); + if (OnSerializedList.Count != 0) OnSerializedList.Clear(); + if (ObjectReferenceList.Count != 0) ObjectReferenceList.Clear(); + } + } + } + + private sealed class BlittableCloner : Cloner + { + internal readonly FieldInfo[] SerializedFields; + + public BlittableCloner(Type type, CloneEventHandlers eventHandlers, FieldInfo[] serializedFields) : base(type, eventHandlers) + { + Debug.Assert(serializedFields != null); + SerializedFields = serializedFields; + } + + internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) + { + Debug.Assert(Type == instance.GetType()); + if (captureContext.IsReturnedToUser) + { + return new BlittableState(EventHandlers, CloneMemberwise(instance)); + } + else + { + return new BlittableState(EventHandlers, instance); + } + } + } + + private sealed class BlittableState : State + { + private object Value; + + public BlittableState(CloneEventHandlers eventHandlers, object value) : base(eventHandlers, null) + { + Value = value; + } + + public override Type Type { get { return Value.GetType(); } } + + public override object CreateUninitializedObject() + { + return Cloner.CloneMemberwise(Value); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) { } + } + + private sealed class Rank1ArrayCloner : Cloner + { + Cloner PreviousElementCloner; + + public Rank1ArrayCloner(Type type, Cloner elementCloner) : base(type, null) + { + PreviousElementCloner = elementCloner; + } + + internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) + { + Debug.Assert(Type == instance.GetType()); + var array = (Array)instance; + var lowerBound = array.GetLowerBound(0); + var length = array.Length; // should throw an exception if length > Int32.MaxValue + if (length == 0) return new BlittableState(null, instance); + var throwExceptionOnOverflow = checked(lowerBound + length); + var objectIndices = new int[length]; + var cloner = PreviousElementCloner; + var previousType = cloner.Type; + for (int i = 0; i < length; ++i) + { + var value = array.GetValue(lowerBound + i); + if (value != null) + { + var type = value.GetType(); + if (type != previousType) + { + cloner = CreateWithoutLock(type); + previousType = type; + } + objectIndices[i] = captureContext.GetObjectIndex(value, cloner); + } + } + PreviousElementCloner = cloner; + return new Rank1ArrayState(Type.GetElementType(), lowerBound, objectIndices); + } + } + + private sealed class Rank1ArrayState : State + { + private readonly Type ElementType; + private readonly int LowerBound; + + public Rank1ArrayState(Type elementType, int lowerBound, int[] objectIndices) : base(null, objectIndices) + { + Debug.Assert(objectIndices != null); + ElementType = elementType; + LowerBound = lowerBound; + } + + public override Type Type { get { return ElementType.MakeArrayType(); } } + + public override object CreateUninitializedObject() + { + if (LowerBound == 0) + return Array.CreateInstance(ElementType, ObjectIndices.Length); + else + return Array.CreateInstance(ElementType, new int[] { ObjectIndices.Length }, new int[] { LowerBound }); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + var array = (Array)instance; + var objectIndices = ObjectIndices; + for (int i = 0; i < objectIndices.Length; ++i) + { + var objectIndex = objectIndices[i]; + if (objectIndex == 0) continue; + array.SetValue(objectGraph[objectIndex], LowerBound + i); + } + } + } + + private sealed class RankNArrayCloner : Cloner + { + Cloner PreviousElementCloner; + + public RankNArrayCloner(Type type, Cloner elementCloner) : base(type, null) + { + PreviousElementCloner = elementCloner; + } + + internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) + { + Debug.Assert(Type == instance.GetType()); + var array = (Array)instance; + var rank = array.Rank; + var lowerBounds = new int[rank]; + var lengths = new int[rank]; + var ends = new int[rank]; + var numberOfElements = 1; + for (int d = 0; d < rank; ++d) + { + var lowerBound = array.GetLowerBound(d); + lowerBounds[d] = lowerBound; + var length = array.GetLength(d); + lengths[d] = length; + ends[d] = checked(lowerBound + length); + numberOfElements = checked(numberOfElements * length); + } + var objectIndices = new int[numberOfElements]; + var cloner = PreviousElementCloner; + var previousType = cloner.Type; + var indices = (int[])lowerBounds.Clone(); + for (int i = 0; i < numberOfElements; ++i) + { + var value = array.GetValue(indices); + if (value != null) + { + var type = value.GetType(); + if (type != previousType) + { + cloner = CreateWithoutLock(type); + previousType = type; + } + objectIndices[i] = captureContext.GetObjectIndex(value, cloner); + } + // increment multi-dimensional index + var d = rank - 1; + do + { + if (++indices[d] < ends[d]) break; + indices[d] = lowerBounds[d]; + } while (--d >= 0); + } + PreviousElementCloner = cloner; + return new RankNArrayState(Type.GetElementType(), lengths, lowerBounds, ends, objectIndices); + } + } + + private sealed class RankNArrayState : State + { + private readonly Type ElementType; + private readonly int[] Lengths; + private readonly int[] LowerBounds; + private readonly int[] Ends; + + public RankNArrayState(Type elementType, int[] lengths, int[] lowerBounds, int[] ends, int[] objectIndices) + : base(null, objectIndices) + { + Debug.Assert(lengths != null && lengths.Length == lowerBounds.Length && lengths.Length == ends.Length && objectIndices != null); + ElementType = elementType; + Lengths = lengths; + LowerBounds = lowerBounds; + Ends = ends; + } + + public override Type Type { get { return ElementType.MakeArrayType(Lengths.Length); } } + + public override object CreateUninitializedObject() + { + return Array.CreateInstance(ElementType, Lengths, LowerBounds); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + var array = (Array)instance; + var indices = (int[])LowerBounds.Clone(); + foreach (var objectIndex in ObjectIndices) + { + if (objectIndex != 0) + array.SetValue(objectGraph[objectIndex], indices); + // increment multi-dimensional index + var d = LowerBounds.Length - 1; + do + { + if (++indices[d] < Ends[d]) break; + indices[d] = LowerBounds[d]; + } while (--d >= 0); + } + } + } + + private sealed class NativeSerializationCloner : Cloner + { + internal readonly FieldInfo[] SerializedFields; + private readonly Cloner[] Cloners; + + private Func FieldValuesGetter; // lazily initialized + internal Action FieldValuesSetter; // lazily initialized + + public NativeSerializationCloner(Type type, CloneEventHandlers eventHandlers, FieldInfo[] serializedFields) : base(type, eventHandlers) + { + SerializedFields = serializedFields; + Cloners = new Cloner[serializedFields.Length]; + } + + internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) + { + Debug.Assert(Type == instance.GetType()); + if (SerializedFields.Length == 0) + return new NativeSerializationState(this); + var getter = FieldValuesGetter; + if (getter == null) + FieldValuesGetter = getter = CreateFieldValuesGetter(Type, SerializedFields); + var values = getter(instance); // GetFieldValues(instance, SerializedFields); + int[] objectIndices = new int[values.Length]; + for (int i = 0; i < values.Length; ++i) + { + var value = values[i]; + if (value == null) continue; + var type = value.GetType(); + if (type.IsPrimitive || type == typeof(string)) continue; + values[i] = null; + var cloner = Cloners[i]; + if (cloner == null || type != cloner.Type) + { + cloner = CreateWithoutLock(type); + Cloners[i] = cloner; + } + objectIndices[i] = captureContext.GetObjectIndex(value, cloner); + } + return new NativeSerializationState(this, values, objectIndices); + } + } + + private sealed class NativeSerializationState : State + { + private readonly NativeSerializationCloner Cloner; + private readonly object[] Values; // maybe null if object has no fields + + public NativeSerializationState(NativeSerializationCloner cloner) + : base(cloner.EventHandlers, null) + { + Cloner = cloner; + } + + public NativeSerializationState(NativeSerializationCloner cloner, object[] values, int[] objectIndices) + : base(cloner.EventHandlers, objectIndices) + { + Debug.Assert(cloner != null && values.Length != 0 && values.Length == objectIndices.Length); + Cloner = cloner; + Values = values; + } + + public override Type Type { get { return Cloner.Type; } } + + public override object CreateUninitializedObject() + { + return FormatterServices.GetUninitializedObject(Cloner.Type); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + if (ObjectIndices == null) return; + var setter = Cloner.FieldValuesSetter; + if (setter == null) + Cloner.FieldValuesSetter = setter = CreateFieldValuesSetter(Cloner.Type, Cloner.SerializedFields); + setter(instance, Values, ObjectIndices, objectGraph); + } + } + + // NativeSerializationProxyState is used by CustomSerializationCloner to store the state of + // proxy objects which don't implement ISerializable. + private sealed class NativeSerializationProxyState : State + { + private readonly Type Type_; + private readonly FieldInfo[] Fields; + private readonly object[] Values; + + public NativeSerializationProxyState(Type type, CloneEventHandlers eventHandlers) + : base(eventHandlers, null) + { + Type_ = type; + } + + public NativeSerializationProxyState(Type type, CloneEventHandlers eventHandlers, FieldInfo[] fields, object[] values, int[] objectIndices) + : base(eventHandlers, objectIndices) + { + Debug.Assert(fields.Length == values.Length && values.Length == objectIndices.Length); + Type_ = type; + Fields = fields; + Values = values; + } + + public override Type Type { get { return Type_; } } + + public override object CreateUninitializedObject() + { + return FormatterServices.GetUninitializedObject(Type_); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + if (ObjectIndices == null) return; + // We can't use a NativeSerializationCloner.FieldValuesSetter here + // because some primitive values might have a type different from the type of the field + // they are assigned to. FieldInfo.SetValue does some automatic conversions in those + // cases that the FieldValuesSetter doesn't (e.g. integer type widening). + for (int i = 0; i < ObjectIndices.Length; ++i) + { + var objectIndex = ObjectIndices[i]; + if (objectIndex == 0) + { + var value = Values[i]; + if (value != null) Fields[i].SetValue(instance, value); + } + else + { + Fields[i].SetValue(instance, objectGraph[objectIndex]); + } + } + } + } + + private struct CustomSerializationMemberInfo + { + public string Name; + public Type Type; + public object Value; + } + + private sealed class CustomSerializationCloner : Cloner + { + internal readonly ConstructorInfo Constructor; + internal Action ConstructorCaller; // lazily initalized + private Cloner PreviousProxyCloner; + private Cloner[] Cloners; + + private static Type[] SerializableConstructorArgumentTypes = new Type[] { typeof(SerializationInfo), typeof(StreamingContext) }; + + public CustomSerializationCloner(Type type, + CloneEventHandlers eventHandlers) + : base(type, eventHandlers) + { + Constructor = type.GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, SerializableConstructorArgumentTypes, null); + PreviousProxyCloner = this; + } + + internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) + { + Debug.Assert(Type == instance.GetType()); + var info = new SerializationInfo(Type, FormatterConverter); + ((ISerializable)instance).GetObjectData(info, StreamingContext); + var n = info.MemberCount; + var members = new CustomSerializationMemberInfo[n]; + var objectIndices = new int[n]; + if (Cloners == null || Cloners.Length != n) Cloners = new Cloner[n]; + var iter = info.GetEnumerator(); + for (int i = 0; iter.MoveNext(); ++i) + { + var entry = iter.Current; + members[i].Name = entry.Name; + members[i].Type = entry.ObjectType; + var value = entry.Value; + if (value == null) continue; + Type type = value.GetType(); + if (type.IsPrimitive || type == typeof(string)) + { + members[i].Value = value; + continue; + } + var cloner = Cloners[i]; + if (cloner == null || type != cloner.Type) + { + cloner = CreateWithoutLock(type); + Cloners[i] = cloner; + } + objectIndices[i] = captureContext.GetObjectIndex(value, cloner); + } + + Type proxyType; + + if (!info.IsFullTypeNameSetExplicit && !info.IsAssemblyNameSetExplicit) + { + proxyType = info.ObjectType; + } + else + { + try + { + var assembly = Assembly.Load(info.AssemblyName); + proxyType = assembly.GetType(info.FullTypeName, true); + } + catch (Exception e) + { + var msg = "Can not load the type '" + info.FullTypeName + "' in the assembly '" + info.AssemblyName + "'."; + throw new SerializationException(msg, e); + } + } + + if (proxyType == Type) + { + if (Constructor == null) throw new SerializationException("The ISerializable type '" + Type.ToString() + "' does not define a proper deserialization constructor."); + return new CustomSerializationState(this, members, objectIndices); + } + + Cloner proxyCloner; + if (proxyType == PreviousProxyCloner.Type) + { + proxyCloner = PreviousProxyCloner; + } + else + { + proxyCloner = CreateWithoutLock(proxyType); + PreviousProxyCloner = proxyCloner; + } + + if (proxyType.IsArray) + { + // On .NET a NullReferenceException is thrown on deserialization of an array type proxy. + throw new SerializationException("The type '" + Type.ToString() + "' uses an array type ('" + proxyType.ToString() + "') as its serialization proxy type."); + } + + CustomSerializationCloner csc = proxyCloner as CustomSerializationCloner; + if (csc != null) + { + if (csc.Constructor == null) throw new SerializationException("The ISerializable type '" + csc.Type.ToString() + "' does not define a proper deserialization constructor."); + return new CustomSerializationState(csc, members, objectIndices); + } + + if (n == 0) return new NativeSerializationProxyState(proxyType, proxyCloner.EventHandlers); + + FieldInfo[] proxyFields; + { + var nsc = proxyCloner as NativeSerializationCloner; + if (nsc != null) + { + proxyFields = nsc.SerializedFields; + } + else + { + var bc = proxyCloner as BlittableCloner; + Debug.Assert(bc != null); + proxyFields = bc.SerializedFields; + } + } + + // The BinaryFormatter on .NET simply assigns the values in the SerializationInfo + // to the field with the same name (of the most derived class) in the proxy object. + // There are no checks whether all fields are assigned values or whether the target has + // multiple fields with the same name. The types are only checked once the values are + // assigned to the proxy object fields. Integer types are automatically widened and + // types are cast to base or interface types if necessary. + + var proxyValues = new object[proxyFields.Length]; + var proxyObjectIndices = new int[proxyFields.Length]; + for (int i = 0; i < n; ++i) + { + var name = members[i].Name; + for (int j = 0; j < proxyFields.Length; ++j) + { + if (name == proxyFields[j].Name) + { + proxyValues[j] = members[i].Value; + proxyObjectIndices[j] = objectIndices[i]; + break; + } + } + } + return new NativeSerializationProxyState(proxyType, proxyCloner.EventHandlers, proxyFields, proxyValues, proxyObjectIndices); + } + } + + private sealed class CustomSerializationState : State + { + private readonly CustomSerializationCloner Cloner; + private readonly CustomSerializationMemberInfo[] Members; + + public CustomSerializationState(CustomSerializationCloner cloner, + CustomSerializationMemberInfo[] members, + int[] objectIndices) + : base(cloner.EventHandlers, objectIndices) + { + Cloner = cloner; + Members = members; + } + + public override Type Type { get { return Cloner.Type; } } + + public override object CreateUninitializedObject() + { + return FormatterServices.GetUninitializedObject(Cloner.Type); + } + + public override void WriteToUninitializedObject(object instance, object[] objectGraph) + { + var info = new SerializationInfo(Cloner.Type, FormatterConverter); + for (int i = 0; i < Members.Length; ++i) + { + var member = Members[i]; + var index = ObjectIndices[i]; + var value = index == 0 ? member.Value : objectGraph[index]; + info.AddValue(member.Name, value, member.Type); + } + var constructorCaller = Cloner.ConstructorCaller; + if (constructorCaller == null) + Cloner.ConstructorCaller = constructorCaller = CreateISerializableConstructorCaller(Cloner.Constructor); + constructorCaller(instance, info, StreamingContext); + } + } + + private sealed class SimpleImage : CloneImage + { + private readonly Cloner.State[] States; + private readonly int DeserializationCallbackCount; + + internal SimpleImage(Cloner.State[] states, int deserializationCallbackCount) + { + Debug.Assert(states.Length > 1 && states[0] == null); + States = states; + DeserializationCallbackCount = deserializationCallbackCount; + } + + public override object CreateClone() + { + int callbackIndicesIndex = DeserializationCallbackCount; + int[] callbackIndices = + DeserializationCallbackCount == 0 ? null : new int[DeserializationCallbackCount]; + var objects = new object[States.Length]; + // States[0] is null + for (int i = 1; i < States.Length; ++i) + objects[i] = States[i].CreateUninitializedObject(); + for (int index = States.Length - 1; index != 0; --index) + { + var state = States[index]; + var instance = objects[index]; + var eventHandlers = state.EventHandlers; + if (eventHandlers == null) + { + state.WriteToUninitializedObject(objects[index], objects); + } + else + { + var events = eventHandlers.Events; + Debug.Assert((events & (CloneEvents.ISerializable + | CloneEvents.OnDeserialized + | CloneEvents.IObjectReference)) == 0); + if ((events & CloneEvents.OnDeserializing) != 0) + eventHandlers.InvokeOnDeserializing(instance, Cloner.StreamingContext); + if ((events & CloneEvents.IDeserializationCallback) != 0) + callbackIndices[--callbackIndicesIndex] = index; + state.WriteToUninitializedObject(instance, objects); + } + } + if (callbackIndices != null) + { + Debug.Assert(callbackIndicesIndex == 0); + foreach (var index in callbackIndices) + ((IDeserializationCallback)objects[index]).OnDeserialization(null); + } + return objects[1]; + } + } + + private sealed class OrderedImage : CloneImage + { + private readonly Cloner.State[] States; + private readonly int[] Order; + private readonly int DeserializationCallbackCount; + + internal OrderedImage(Cloner.State[] states, int[] order, int deserializationCallbackCount) + { + Debug.Assert(states.Length > 1 && states.Length == order.Length && states[0] == null); + States = states; + Order = order; + DeserializationCallbackCount = deserializationCallbackCount; + } + + public static object GetRealObject(object instance) + { + var or = (IObjectReference)instance; + instance = or.GetRealObject(Cloner.StreamingContext); + if (instance != or) + { + or = instance as IObjectReference; + int i = 0; + while (or != null) + { + if (++i == 100) throw new SerializationException("An object's implementation of the IObjectReference interface returned too many nested references to other objects that implement IObjectReference."); + instance = or.GetRealObject(Cloner.StreamingContext); + if (instance == or) break; + or = instance as IObjectReference; + } + if (instance == null) throw new SerializationException("An object's IObjectReference.GetRealObject implementation returned null."); + } + return instance; + } + + public override object CreateClone() + { + int callbackIndicesIndex = DeserializationCallbackCount; + object[] callbackObjects = + DeserializationCallbackCount == 0 ? null : new object[DeserializationCallbackCount]; + var objects = new object[States.Length]; + for (int i = 1; i < States.Length; ++i) + objects[i] = States[i].CreateUninitializedObject(); + var delayedOnDeserializedEvents = new List(); + int objectReferenceIndex = 0; + object objectReference = null; + int[] lastScc = null; + for (int i = Order.Length - 1; i != 0; --i) + { + var index = Order[i]; + var state = States[index]; + var scc = state.StronglyConnectedComponent; + if (scc != lastScc) + { + lastScc = scc; + if (objectReference != null) + { + ReplaceObjectReferenceInSCCWithRealObject(objectReference, objectReferenceIndex, objects); + objectReferenceIndex = 0; + objectReference = null; + } + if (delayedOnDeserializedEvents.Count != 0) + InvokeDelayedOnDeserializedEvents(delayedOnDeserializedEvents, objects); // also clears delayedOnDeserializedEvents + if (scc != null) + { + foreach (var idx in scc) + { + var handlers = States[idx].EventHandlers; + if (handlers != null && (handlers.Events & CloneEvents.IObjectReference) != 0) + { + objectReferenceIndex = idx; + objectReference = objects[idx]; + objects[idx] = null; // set to null until we call ReplaceObjectReferenceInSCCWithRealObject + } + } + } + } + var instance = objects[index]; + var eventHandlers = state.EventHandlers; + if (eventHandlers == null) + { + state.WriteToUninitializedObject(instance, objects); + } + else + { + var events = eventHandlers.Events; + if (instance != null) + { + if ((events & CloneEvents.OnDeserializing) != 0) + eventHandlers.InvokeOnDeserializing(instance, Cloner.StreamingContext); + state.WriteToUninitializedObject(instance, objects); + if ((events & CloneEvents.OnDeserialized) != 0) + { + if (scc == null) eventHandlers.InvokeOnDeserialized(instance, Cloner.StreamingContext); + else delayedOnDeserializedEvents.Add(index); + } + if ((events & CloneEvents.IObjectReference) != 0) + { + Debug.Assert(state.StronglyConnectedComponent == null); + objects[index] = GetRealObject(instance); + } + } + else + { + Debug.Assert(index == objectReferenceIndex); + } + // It's a pity we have to process the IDeserializationCallback separately + // from OnDeserialized events to stay compatible with the .NET BinaryFormatter. + if ((events & CloneEvents.IDeserializationCallback) != 0) + callbackObjects[--callbackIndicesIndex] = instance ?? objectReference; + } + } + if (objectReference != null) + ReplaceObjectReferenceInSCCWithRealObject(objectReference, objectReferenceIndex, objects); + if (delayedOnDeserializedEvents.Count != 0) + InvokeDelayedOnDeserializedEvents(delayedOnDeserializedEvents, objects); + if (callbackObjects != null) + { + Debug.Assert(callbackIndicesIndex == 0); + // We call the callback in in the reverse topological order at the end of + // deserialization, which is similar to what the BinaryFormatter does, unfortunately. + foreach (var obj in callbackObjects) + ((IDeserializationCallback)obj).OnDeserialization(null); + } + return objects[1]; + } + + private void InvokeDelayedOnDeserializedEvents(List indices, object[] objects) + { + foreach (var index in indices) + { + var handlers = States[index].EventHandlers; + handlers.InvokeOnDeserialized(objects[index], Cloner.StreamingContext); + } + indices.Clear(); + } + + private void ReplaceObjectReferenceInSCCWithRealObject(object objectReference, int objectReferenceIndex, object[] objects) + { + var state = States[objectReferenceIndex]; + var eventHandlers = state.EventHandlers; + var events = eventHandlers.Events; + if ((events & CloneEvents.OnDeserializing) != 0) + eventHandlers.InvokeOnDeserializing(objectReference, Cloner.StreamingContext); + state.WriteToUninitializedObject(objectReference, objects); + Debug.Assert((events & CloneEvents.OnDeserialized) == 0); + objects[objectReferenceIndex] = GetRealObject(objectReference); + // set all references to real object + foreach (var index2 in state.StronglyConnectedComponent) + { + if (index2 == objectReferenceIndex) continue; + var state2 = States[index2]; + Debug.Assert(state2.EventHandlers == null || (state2.EventHandlers.Events & (CloneEvents.ISerializable | CloneEvents.IObjectReference)) == 0); + if (Cloner.Contains(state2.ObjectIndices, objectReferenceIndex)) + { + Debug.Assert(!state2.Type.IsValueType); + state2.WriteToUninitializedObject(objects[index2], objects); // overwrite all fields + } + } + } + } + + /// Returns the public and non-public fields of the type (and its base types), + /// except fields with the NonSerialized attribute. In the returned array fields from + /// derived types come before fields from base types. + internal static FieldInfo[] GetSerializedFields(Type type, out bool typeIsBlittable) + { + Debug.Assert(type.IsSerializable && !type.IsInterface); + // We need the fields of the most derived type first, but GetFields returns the + // field in an undefined order, so we have to climb the type hierarchy. + var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); + bool isBlittable = true; + int nonSerialized = 0; + foreach (var f in fields) + { + if (f.IsNotSerialized) ++nonSerialized; + var ft = f.FieldType; + if (!ft.IsPrimitive && ft != typeof(string)) + { + if (!ft.IsValueType) isBlittable = false; + else + { + bool fIsBlittable; + GetSerializedFields(ft, out fIsBlittable); + isBlittable &= fIsBlittable; + } + } + } + int numberOfBases = 0; + var bt = type.BaseType; + while (bt != null && bt != typeof(object)) + { + if (!bt.IsSerializable) + throw new SerializationException(BaseTypeNotSerializableMessage(bt, type)); + ++numberOfBases; + bt = bt.BaseType; + } + if (numberOfBases == 0) + { + if (nonSerialized == 0) + { + typeIsBlittable = isBlittable; + return fields; + } + else + { + typeIsBlittable = false; + var serializedFields = new FieldInfo[fields.Length - nonSerialized]; + int i = 0; + foreach (var f in fields) if (!f.IsNotSerialized) serializedFields[i++] = f; + return serializedFields; + } + } + else + { + var baseFieldArrays = new FieldInfo[numberOfBases][]; + bt = type.BaseType; + for (int i = 0; i < numberOfBases; ++i, bt = bt.BaseType) + { + var baseFields = bt.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); + foreach (var bf in baseFields) + { + if (bf.IsNotSerialized) ++nonSerialized; + var bft = bf.FieldType; + if (!bft.IsPrimitive && bft != typeof(string)) + { + if (!bft.IsValueType) isBlittable = false; + else + { + bool bfIsBlittable; + GetSerializedFields(bft, out bfIsBlittable); + isBlittable &= bfIsBlittable; + } + } + } + baseFieldArrays[i] = baseFields; + } + + typeIsBlittable = nonSerialized == 0 & isBlittable; + + var numberOfSerializedFields = fields.Length - nonSerialized; + foreach (var baseFields in baseFieldArrays) numberOfSerializedFields += baseFields.Length; + + if (nonSerialized == 0 && numberOfSerializedFields == fields.Length) return fields; + + var combinedFields = new FieldInfo[numberOfSerializedFields]; + if (nonSerialized == 0) + { + int i = 0; + foreach (var f in fields) combinedFields[i++] = f; + foreach (var baseFields in baseFieldArrays) + foreach (var bf in baseFields) combinedFields[i++] = bf; + } + else + { + int i = 0; + foreach (var f in fields) + if (!f.IsNotSerialized) combinedFields[i++] = f; + foreach (var baseFields in baseFieldArrays) + foreach (var bf in baseFields) + if (!bf.IsNotSerialized) combinedFields[i++] = bf; + } + return combinedFields; + } + } + + internal static string BaseTypeNotSerializableMessage(Type baseType, Type childType) + { + return "The serializable type '" + childType.ToString() + "' has a base type '" + baseType.ToString() + "' that is not serializable."; + } + + /* + private static object[] GetFieldValues(object instance, FieldInfo[] fields) { + var values = new object[fields.Length]; + for (int i = 0; i < fields.Length; ++i) { + var f = fields[i]; + values[i] = f.GetValue(instance); + } + return values; + } + */ + internal static Func CreateFieldValuesGetter(Type type, FieldInfo[] fields) + { + if (fields.Length == 0) throw new ArgumentException("The fields array must be non-empty."); + + var dynamicMethod = new DynamicMethod("FieldValuesGetter", + MethodAttributes.Public | MethodAttributes.Static, + CallingConventions.Standard, + typeof(object[]), new Type[] { typeof(object), typeof(object) }, + type, true); + var ilg = dynamicMethod.GetILGenerator(); + var isValueType = type.IsValueType; + + // arg 0: dummy argument (makes delegate invocation faster) + // arg 1: (boxed) object instance + + ilg.DeclareLocal(typeof(object[])); // local 0: the returned values array + ilg.DeclareLocal(typeof(object)); // local 1: temporary object value + + // create the values array + ilg.Emit(OpCodes.Ldc_I4, fields.Length); + ilg.Emit(OpCodes.Newarr, typeof(object)); + ilg.Emit(OpCodes.Stloc_0); + + // cast/unbox the object instace + ilg.Emit(OpCodes.Ldarg_1); + if (!isValueType) + ilg.Emit(OpCodes.Castclass, type); + else + ilg.Emit(OpCodes.Unbox, type); + + // The unbox IL construction doesn't return a normal managed pointer + // but a "controlled-mutability" managed pointer. Since there's no way + // to declare a controlled-mutability managed pointer local and one + // can't convert such a pointer into a normal managed pointer, we can't + // store away the pointer for later field accesses. Instead we use + // OpCodes.Dup to keep the pointer around. Alternatively we could copy + // the value type instance onto the stack, but that can be costly for + // large value types. + + for (int i = 0; i < fields.Length; ++i) + { + if (i + 1 != fields.Length) ilg.Emit(OpCodes.Dup); + + var field = fields[i]; + ilg.Emit(OpCodes.Ldfld, field); + if (field.FieldType.IsValueType) + ilg.Emit(OpCodes.Box, field.FieldType); + ilg.Emit(OpCodes.Stloc_1); + + // store object into result array + ilg.Emit(OpCodes.Ldloc_0); + ilg.Emit(OpCodes.Ldc_I4, i); + ilg.Emit(OpCodes.Ldloc_1); + ilg.Emit(OpCodes.Stelem_Ref); + } + + ilg.Emit(OpCodes.Ldloc_0); + ilg.Emit(OpCodes.Ret); + + return (Func)dynamicMethod.CreateDelegate(typeof(Func), null); + } + + /* + private static void SetFieldValues(FieldInfo[] fields, object instance, object[] values, int[] objectIndices, object[] objectGraph) { + for (int i = 0; i < objectIndices.Length; ++i) { + var objectIndex = ObjectIndices[i]; + if (objectIndex == 0) + fields[i].SetValue(instance, values[i]); + else + fields[i].SetValue(instance, objectGraph[objectIndex]); + } + } + */ + internal static Action CreateFieldValuesSetter(Type type, FieldInfo[] fields) + { + if (fields.Length == 0) throw new ArgumentException("The fields array must be non-empty."); + + // It is important that we use the 8 argument DynamicMethod constructor + // to associate the method with the type, so that the method is allowed + // to set readonly (initonly) fields. + var dynamicMethod = new DynamicMethod("FieldValuesSetter", + MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard, + null, new Type[]{typeof(object), typeof(object), typeof(object[]), typeof(int[]), + typeof(object[])}, + type, true); + var ilg = dynamicMethod.GetILGenerator(); + var isValueType = type.IsValueType; + + // arg0: dummy argument (makes delegate invocation faster) + // arg1: (boxed) object instance + // arg2: values array + // arg3: objectIndices array + // arg4: objectGraph array + + // local 0: object index + ilg.DeclareLocal(typeof(int)); + + ilg.Emit(OpCodes.Ldarg_1); + if (!isValueType) + ilg.Emit(OpCodes.Castclass, type); + else + ilg.Emit(OpCodes.Unbox, type); // returns a controlled-mutability pointer + // which we can't store in a local... + for (int i = 0; i < fields.Length; ++i) + { + if (i + 1 != fields.Length) ilg.Emit(OpCodes.Dup); // ... so we use OpCodes.Dup to keep it around + + var field = fields[i]; + + // is field value an object in the object graph array? + ilg.Emit(OpCodes.Ldarg_3); + ilg.Emit(OpCodes.Ldc_I4, i); + ilg.Emit(OpCodes.Ldelem, typeof(int)); + ilg.Emit(OpCodes.Stloc_0); + ilg.Emit(OpCodes.Ldloc_0); + var label1 = ilg.DefineLabel(); + ilg.Emit(OpCodes.Brtrue, label1); + + // load boxed value + ilg.Emit(OpCodes.Ldarg_2); + ilg.Emit(OpCodes.Ldc_I4, i); + ilg.Emit(OpCodes.Ldelem, typeof(object)); + var label2 = ilg.DefineLabel(); + ilg.Emit(OpCodes.Br, label2); + + // load object graph array + ilg.MarkLabel(label1); + ilg.Emit(OpCodes.Ldarg, 4); + ilg.Emit(OpCodes.Ldloc_0); + ilg.Emit(OpCodes.Ldelem, typeof(object)); + + ilg.MarkLabel(label2); + // store value into field + if (field.FieldType != typeof(object)) + ilg.Emit(OpCodes.Unbox_Any, field.FieldType); + ilg.Emit(OpCodes.Stfld, field); + } + + ilg.Emit(OpCodes.Ret); + + return (Action)dynamicMethod.CreateDelegate(typeof(Action), null); + } + + internal static Action CreateISerializableConstructorCaller(ConstructorInfo constructor) + { + var type = constructor.DeclaringType; + var dynamicMethod = new DynamicMethod("SerializableConstructorCaller", + MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard, + null, new Type[] { typeof(object), typeof(object), typeof(SerializationInfo), typeof(StreamingContext) }, + type, true); + var ilg = dynamicMethod.GetILGenerator(); + var isValueType = type.IsValueType; + ilg.Emit(OpCodes.Ldarg_1); + if (!isValueType) + ilg.Emit(OpCodes.Castclass, type); + else + ilg.Emit(OpCodes.Unbox, type); + ilg.Emit(OpCodes.Ldarg_2); + ilg.Emit(OpCodes.Ldarg_3); + ilg.Emit(OpCodes.Call, constructor); + ilg.Emit(OpCodes.Ret); + return (Action)dynamicMethod.CreateDelegate(typeof(Action), null); + } + + // The following is a non-recursive implementation of David J. Pearce's improved + // version of Tarjan's algorithm for finding the strongly connected components of + // a directed graph, see http://homepages.ecs.vuw.ac.nz/~djp/files/P05.pdf + // The straighforward recursive version is obviously more elegant, but the + // non-recursive one has the principal advantage of not ending in a stack overflow + // for large components. + // (We test this version against the simpler one in CloningTests.fs, of course) + // Due to the non-recursive implementation we can also exploit that part of + // what would otherwise be the call stack can be shared with the stack used + // for holding elements of identified components (see the last paragraph of + // section 2 in the referenced paper). + + // For optimization purposes we use a static stack, which makes + // FindStronglyConnectedComponents and ComputeTopologicalOrder not thread-safe. + + private static int[] TopoIndices = new int[8]; + private static void GrowTopoIndices() + { + var newArray = new int[2 * TopoIndices.Length]; + TopoIndices.CopyTo(newArray, 0); + TopoIndices = newArray; + } + private static int GrowTopoIndices(int splitIndex) + { + Debug.Assert(splitIndex >= 0 && splitIndex <= TopoIndices.Length); + int n = TopoIndices.Length; + var newArray = new int[2 * n]; + Array.Copy(TopoIndices, newArray, splitIndex); + var newSplitIndex = 2 * n; + int d = n - splitIndex; + if (d != 0) + { + newSplitIndex -= d; + Array.Copy(TopoIndices, splitIndex, newArray, newSplitIndex, n - splitIndex); + } + TopoIndices = newArray; + return newSplitIndex; + } + + private static int[] TopoSubIndices = new int[8]; + private static void GrowTopoSubIndices() + { + var newArray = new int[2 * TopoSubIndices.Length]; + TopoSubIndices.CopyTo(newArray, 0); + TopoSubIndices = newArray; + } + + /// Fills the Strongly StronglyConnectedComponent fields of the + /// states passed in the array. Returns an array mapping each state to an + /// integer component identifier. + /// + /// The object states to traverse. The object with array index + /// 0 is ignored. All other objects are assumed to be reachable from the object + /// with array index 1. + internal static int[] FindStronglyConnectedComponents(State[] states) + { + Debug.Assert(states.Length > 1); + int[] components = new int[states.Length]; + // The path stack and the component stack are both stored in TopoIndices. + // The path stack starts at the beginning of TopoIndices, while + // the component stack starts at the end and progresses in reverse direction. + int pathStackCount = 0; // number of elements in the path stack + int componentStackIndex = TopoIndices.Length; // index of element last inserted into component stack + int counter = 1; // in the paper this variable is called "index" + int reverseCounter = states.Length - 1; // in the paper this variable is called "C" + bool root = true; + int objectIndex = 1; // states[1] is state for the root object, states[0] is null + int subIndex = 0; + var subObjectIndices = states[objectIndex].ObjectIndices; + components[1] = counter; + ++counter; + if (subObjectIndices != null) + { + for (; ; ) + { + while (subIndex < subObjectIndices.Length) + { + var subObjectIndex = subObjectIndices[subIndex]; + ++subIndex; + if (subObjectIndex == 0) continue; + var subObjectComponent = components[subObjectIndex]; + if (subObjectComponent == 0) + { + var subSubObjectIndices = states[subObjectIndex].ObjectIndices; + if (subSubObjectIndices == null) + { + components[subObjectIndex] = reverseCounter; + --reverseCounter; + } + else + { + subObjectIndices = subSubObjectIndices; + components[subObjectIndex] = counter; + ++counter; + TopoIndices[pathStackCount] = objectIndex; + TopoSubIndices[pathStackCount] = root ? subIndex : -subIndex; + root = true; + objectIndex = subObjectIndex; + subIndex = 0; + ++pathStackCount; + if (pathStackCount == componentStackIndex) + componentStackIndex = GrowTopoIndices(componentStackIndex); + if (pathStackCount == TopoSubIndices.Length) + GrowTopoSubIndices(); + continue; + } + } + else if (subObjectComponent < components[objectIndex]) + { + components[objectIndex] = subObjectComponent; + root = false; + } + } + if (root) + { + if (componentStackIndex < TopoIndices.Length) + { + int component = components[objectIndex]; + if (components[TopoIndices[componentStackIndex]] >= component) + { + int next = componentStackIndex + 1; + while (next < TopoIndices.Length && components[TopoIndices[next]] >= component) ++next; + int d = next - componentStackIndex; + var scc = new int[d + 1]; + for (int i = 0; i < d; ++i) + { + int idx = TopoIndices[componentStackIndex + i]; + scc[1 + i] = idx; + states[idx].StronglyConnectedComponent = scc; + components[idx] = reverseCounter; + --counter; + } + scc[0] = objectIndex; + states[objectIndex].StronglyConnectedComponent = scc; + componentStackIndex = next; + } + } + components[objectIndex] = reverseCounter; + --counter; + --reverseCounter; + if (pathStackCount == 0) break; + } + else + { + TopoIndices[--componentStackIndex] = objectIndex; + // we never need to grow the TopoIndices array here + // because we immediately decrement pathStackCount next + } + --pathStackCount; + int subObjectComponent_ = components[objectIndex]; + objectIndex = TopoIndices[pathStackCount]; + subIndex = TopoSubIndices[pathStackCount]; + if (subIndex > 0) + { + root = true; + } + else + { + subIndex = -subIndex; + root = false; + } + subObjectIndices = states[objectIndex].ObjectIndices; + if (subObjectComponent_ < components[objectIndex]) + { + components[objectIndex] = subObjectComponent_; + root = false; + } + } + } + return components; + } + + private static int[] SccIndexStack = new int[8]; + private static void GrowSccIndexStack() + { + var newStack = new int[2 * SccIndexStack.Length]; + SccIndexStack.CopyTo(newStack, 0); + SccIndexStack = newStack; + } + + /// Returns an array with the topologically sorted indices of the states. + /// In the returned array the indices of states belonging to the same strongly + /// connected component are adjacent (but the order within a strongly connected + /// component is undefined). + /// + /// The object states to traverse. The object with array index + /// 0 is ignored. All other objects are assumed to be reachable from the object + /// with array index 1. + internal static int[] ComputeTopologicalOrder(State[] states) + { + Debug.Assert(states.Length > 1); + + // Fill the State.StronglyConnectedComponent fields. + // (We don't need the returned array, so we can recycle it for our purposes.) + int[] orderedObjectIndices = FindStronglyConnectedComponents(states); + Array.Clear(orderedObjectIndices, 0, orderedObjectIndices.Length); + int nextPosition = orderedObjectIndices.Length - 1; + + TopoIndices = new int[2]; + TopoSubIndices = new int[2]; + SccIndexStack = new int[2]; + + // We traverse the graph non-recursively in depth-first order. + + int topoStackCount = 0; + int sccIndexStackCount = 0; + + int[] visitedBits = new int[(checked(states.Length + 31)) / 32]; + + int objectIndex; + int[] subObjectIndices; + { + var state = states[1]; + if (state.StronglyConnectedComponent == null) + { + objectIndex = 1; + subObjectIndices = state.ObjectIndices; + if (subObjectIndices == null) + { + orderedObjectIndices[1] = 1; + return orderedObjectIndices; + } + visitedBits[0] = 1 << 1; + } + else + { + foreach (var sccIndex in state.StronglyConnectedComponent) + visitedBits[sccIndex / 32] |= 1 << (sccIndex % 32); + objectIndex = state.StronglyConnectedComponent[0]; + subObjectIndices = states[objectIndex].ObjectIndices; + SccIndexStack[0] = 1; + sccIndexStackCount = 1; + } + } + int subIndex = subObjectIndices.Length - 1; + + for (; ; ) + { + // First we iterate over the sub objects... + Debug.Assert(subObjectIndices != null); + + // (The states array was constructed in breadth-first order, while we construct + // the topological order using depth-first search. With a bit of luck we can + // keep the resulting orderedObjectIndices close to a simple increasing sequence + // by iterating over the sub-objects in the depth-first search in reverse order.) + while (subIndex >= 0) + { + var subObjectIndex = subObjectIndices[subIndex]; + --subIndex; + if (subObjectIndex == 0) continue; + int w = subObjectIndex / 32, b = subObjectIndex % 32; + if (((visitedBits[w] >> b) & 1) == 0) + { + var subState = states[subObjectIndex]; + var subSubObjectIndices = subState.ObjectIndices; + if (subState.StronglyConnectedComponent == null) + { + visitedBits[w] |= 1 << b; + if (subSubObjectIndices == null) + { + orderedObjectIndices[nextPosition] = subObjectIndex; + --nextPosition; + continue; + } + subObjectIndices = subSubObjectIndices; + } + else + { + foreach (var sccIndex in subState.StronglyConnectedComponent) + visitedBits[sccIndex / 32] |= 1 << (sccIndex % 32); + subObjectIndex = subState.StronglyConnectedComponent[0]; + subObjectIndices = states[subObjectIndex].ObjectIndices; + SccIndexStack[sccIndexStackCount] = 1; + if (++sccIndexStackCount == SccIndexStack.Length) GrowSccIndexStack(); + } + TopoIndices[topoStackCount] = objectIndex; + TopoSubIndices[topoStackCount] = subIndex; + ++topoStackCount; + if (topoStackCount == TopoIndices.Length) GrowTopoIndices(); + if (topoStackCount == TopoSubIndices.Length) GrowTopoSubIndices(); + objectIndex = subObjectIndex; + subIndex = subObjectIndices.Length - 1; + continue; + } + } + + // ... then we iterate over other object in the same strongly connected component. + var scc = states[objectIndex].StronglyConnectedComponent; + if (scc == null) + { + orderedObjectIndices[nextPosition] = objectIndex; + --nextPosition; + } + else + { + Debug.Assert(sccIndexStackCount > 0); + int sccIndex = SccIndexStack[sccIndexStackCount - 1]; + if (sccIndex < scc.Length) + { + objectIndex = scc[sccIndex]; + subObjectIndices = states[objectIndex].ObjectIndices; + subIndex = subObjectIndices.Length - 1; + SccIndexStack[sccIndexStackCount - 1] = ++sccIndex; + continue; + } + --sccIndexStackCount; + for (int i = scc.Length - 1; i >= 0; --i) + { + sccIndex = scc[i]; + orderedObjectIndices[nextPosition] = sccIndex; + --nextPosition; + } + } + if (topoStackCount == 0) break; + --topoStackCount; + objectIndex = TopoIndices[topoStackCount]; + subIndex = TopoSubIndices[topoStackCount]; + subObjectIndices = states[objectIndex].ObjectIndices; + } + return orderedObjectIndices; + } + } + + [Flags] + internal enum CloneEvents + { + None = 0, + OnSerializing = 1, + OnSerialized = 2, + OnDeserializing = 4, + OnDeserialized = 8, + ISerializable = 16, + IDeserializationCallback = 32, + IObjectReference = 64 + } + + internal sealed class CloneEventHandlers + { + public readonly CloneEvents Events; + + private delegate void Handler(object instance, StreamingContext context); + + private readonly Handler OnSerializingHandler; + private readonly Handler OnSerializedHandler; + private readonly Handler OnDeserializingHandler; + private readonly Handler OnDeserializedHandler; + + private CloneEventHandlers(CloneEvents events, + Handler onSerializingHandler, + Handler onSerializedHandler, + Handler onDeserializingHandler, + Handler onDeserializedHandler) + { + Events = events; + OnSerializingHandler = onSerializingHandler; + OnSerializedHandler = onSerializedHandler; + OnDeserializingHandler = onDeserializingHandler; + OnDeserializedHandler = onDeserializedHandler; + } + + public void InvokeOnSerializing(object instance, StreamingContext context) + { + OnSerializingHandler.Invoke(instance, context); + } + + public void InvokeOnSerialized(object instance, StreamingContext context) + { + OnSerializedHandler.Invoke(instance, context); + } + + public void InvokeOnDeserializing(object instance, StreamingContext context) + { + OnDeserializingHandler.Invoke(instance, context); + } + + public void InvokeOnDeserialized(object instance, StreamingContext context) + { + OnDeserializedHandler.Invoke(instance, context); + } + + private static readonly CloneEventHandlers ISerializableOnly = new CloneEventHandlers(CloneEvents.ISerializable, null, null, null, null); + private static readonly CloneEventHandlers ISerializableAndObjectReferenceOnly = new CloneEventHandlers(CloneEvents.ISerializable | CloneEvents.IObjectReference, null, null, null, null); + + private static Handler WithBoxedArgument(Action handler) + { + return (object obj, StreamingContext context) => handler((T)obj, context); + } + private static readonly MethodInfo WithBoxedArgumentMethodInfo = typeof(CloneEventHandlers).GetMethod("WithBoxedArgument", BindingFlags.Static | BindingFlags.NonPublic); + + private static Handler CreateHandler(Type type, MethodInfo mi) + { + var delegateType = typeof(Action<,>).MakeGenericType(type, typeof(StreamingContext)); + var d = Delegate.CreateDelegate(delegateType, null, mi); + return (Handler)WithBoxedArgumentMethodInfo.MakeGenericMethod(type).Invoke(null, new object[] { d }); + } + + private static readonly Type typeofObject = typeof(object); + private static readonly Type typeofISerializable = typeof(ISerializable); + private static readonly Type typeofIObjectReference = typeof(IObjectReference); + private static readonly Type typeofIDeserializationCallback = typeof(IDeserializationCallback); + private static readonly Type typeofOnSerializingAttribute = typeof(OnSerializingAttribute); + private static readonly Type typeofOnSerializedAttribute = typeof(OnSerializedAttribute); + private static readonly Type typeofOnDeserializingAttribute = typeof(OnDeserializingAttribute); + private static readonly Type typeofOnDeserializedAttribute = typeof(OnDeserializedAttribute); + + public static CloneEventHandlers Create(Type type) + { + Debug.Assert(type != null); + if (type == typeofObject) return null; + var events = CloneEvents.None; + if (typeofISerializable.IsAssignableFrom(type)) events |= CloneEvents.ISerializable; + if (typeofIObjectReference.IsAssignableFrom(type)) events |= CloneEvents.IObjectReference; + if (typeofIDeserializationCallback.IsAssignableFrom(type)) events |= CloneEvents.IDeserializationCallback; + var bt = type; + for (; ; ) + { + var methods = bt.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); + for (int i = 0; i < methods.Length; ++i) + { + var mi = methods[i]; + if (mi.IsDefined(typeofOnSerializingAttribute, false) + || mi.IsDefined(typeofOnSerializedAttribute, false) + || mi.IsDefined(typeofOnDeserializingAttribute, false) + || mi.IsDefined(typeofOnDeserializedAttribute, false)) return CreateContinue(type, events, bt, methods, i); + } + bt = bt.BaseType; + if (bt == null || bt == typeofObject) break; + if (!bt.IsSerializable) throw new SerializationException(Cloner.BaseTypeNotSerializableMessage(bt, type)); + } + if (events == 0) return null; + if (events == CloneEvents.ISerializable) return ISerializableOnly; + if (events == (CloneEvents.ISerializable | CloneEvents.IObjectReference)) return ISerializableAndObjectReferenceOnly; + return new CloneEventHandlers(events, null, null, null, null); + } + private static CloneEventHandlers CreateContinue(Type type, CloneEvents events, Type baseType, MethodInfo[] methods, int i) + { + Delegate onSerializingHandler = null, onSerializedHandlers = null, onDeserializingHandlers = null, onDeserializedHandlers = null; + var bt = baseType; + for (; ; ) + { + for (; i < methods.Length; ++i) + { + var mi = methods[i]; + if (mi.IsDefined(typeofOnSerializingAttribute, false)) + { + var d = CreateHandler(bt, mi); + onSerializingHandler = onSerializingHandler == null ? d : Delegate.Combine(d, onSerializingHandler); // call base handler first + } + if (mi.IsDefined(typeofOnSerializedAttribute, false)) + { + var d = CreateHandler(bt, mi); + onSerializedHandlers = onSerializedHandlers == null ? d : Delegate.Combine(d, onSerializedHandlers); + } + if (mi.IsDefined(typeofOnDeserializingAttribute, false)) + { + var d = CreateHandler(bt, mi); + onDeserializingHandlers = onDeserializingHandlers == null ? d : Delegate.Combine(d, onDeserializingHandlers); + } + if (mi.IsDefined(typeofOnDeserializedAttribute, false)) + { + var d = CreateHandler(bt, mi); + onDeserializedHandlers = onDeserializedHandlers == null ? d : Delegate.Combine(d, onDeserializedHandlers); + } + } + bt = bt.BaseType; + if (bt == null || bt == typeofObject) break; + if (!bt.IsSerializable) throw new SerializationException(Cloner.BaseTypeNotSerializableMessage(bt, type)); + methods = bt.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); + i = 0; + } + Handler onSerializing = null, onSerialized = null, onDeserializing = null, onDeserialized = null; + if (onSerializingHandler != null) + { + events |= CloneEvents.OnSerializing; + onSerializing = (Handler)onSerializingHandler; + } + if (onSerializedHandlers != null) + { + events |= CloneEvents.OnSerialized; + onSerialized = (Handler)onSerializedHandlers; + } + if (onDeserializingHandlers != null) + { + events |= CloneEvents.OnDeserializing; + onDeserializing = (Handler)onDeserializingHandlers; + } + if (onDeserializedHandlers != null) + { + events |= CloneEvents.OnDeserialized; + onDeserialized = (Handler)onDeserializedHandlers; + } + return new CloneEventHandlers(events, onSerializing, onSerialized, onDeserializing, onDeserialized); + } + } + +} + +#endif diff --git a/src/FParsecCS/ErrorMessage.cs b/src/FParsecCS/ErrorMessage.cs new file mode 100644 index 0000000..503a083 --- /dev/null +++ b/src/FParsecCS/ErrorMessage.cs @@ -0,0 +1,274 @@ +// Copyright (c) Stephan Tolksdorf 2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Diagnostics; +using System.Collections.Generic; + +using Microsoft.FSharp.Core; + +namespace FParsec { + +public enum ErrorMessageType { + Expected, + ExpectedString, + ExpectedCaseInsensitiveString, + Unexpected, + UnexpectedString, + UnexpectedCaseInsensitiveString, + Message, + NestedError, + CompoundError, + Other +} + +[DebuggerDisplay("{GetDebuggerDisplay(),nq}")] +public class ErrorMessage : IEquatable { + public readonly ErrorMessageType Type; + + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + internal string String; + + internal ErrorMessage(ErrorMessageType messageType) { + Type = messageType; + } + + public class Expected : ErrorMessage { + public string Label { get { return String; } } + public Expected(string labelForExpectedInput) : base(ErrorMessageType.Expected) { + String = labelForExpectedInput; + } + } + + public class ExpectedString : ErrorMessage { + public new string String { get { return base.String; } } + public ExpectedString(string expectedString) : base(ErrorMessageType.ExpectedString) { + base.String = expectedString; + } + } + + public class ExpectedCaseInsensitiveString : ErrorMessage { + public string CaseInsensitiveString { get { return String; } } + public ExpectedCaseInsensitiveString(string expectedCaseInsensitiveString) : base(ErrorMessageType.ExpectedCaseInsensitiveString) { + String = expectedCaseInsensitiveString; + } + } + + public class Unexpected : ErrorMessage { + public string Label { get { return String; } } + public Unexpected(string labelForUnexpectedInput) : base(ErrorMessageType.Unexpected) { + String = labelForUnexpectedInput; + } + } + + public class UnexpectedString : ErrorMessage { + public new string String { get { return base.String; } } + public UnexpectedString(string unexpectedString) : base(ErrorMessageType.UnexpectedString) { + base.String = unexpectedString; + } + } + + public class UnexpectedCaseInsensitiveString : ErrorMessage { + public string CaseInsensitiveString { get { return String; } } + public UnexpectedCaseInsensitiveString(string unexpectedCaseInsensitiveString) : base(ErrorMessageType.UnexpectedCaseInsensitiveString) { + String = unexpectedCaseInsensitiveString; + } + } + + public class Message : ErrorMessage { + public new string String { get { return base.String; } } + public Message(string message) : base(ErrorMessageType.Message) { + base.String = message; + } + } + + public class NestedError : ErrorMessage { + public Position Position { get; private set; } + public object UserState { get; private set; } + public ErrorMessageList Messages { get; private set; } + + public NestedError(Position position, object userState, ErrorMessageList messages) : base(ErrorMessageType.NestedError) { + Position = position; + UserState = userState; + Messages = messages; + } + } + + public class CompoundError : ErrorMessage { + public string LabelOfCompound { get { return String; } } + + public Position NestedErrorPosition { get; private set; } + public object NestedErrorUserState { get; private set; } + public ErrorMessageList NestedErrorMessages { get; private set; } + + public CompoundError(string labelOfCompound, + Position nestedErrorPosition, + object nestedErrorUserState, + ErrorMessageList nestedErrorMessages) : base(ErrorMessageType.CompoundError) + { + String = labelOfCompound; + NestedErrorPosition = nestedErrorPosition; + NestedErrorUserState = nestedErrorUserState; + NestedErrorMessages = nestedErrorMessages; + } + } + + public class Other : ErrorMessage { + public object Data { get; private set; } + public Other(object data) : base(ErrorMessageType.Other) { + Data = data; + } + } + + public override bool Equals(object obj) { return Equals(obj as ErrorMessage); } + + public bool Equals(ErrorMessage other) { + return (object)this == (object)other + || ( (object)other != null + && Type == other.Type + && (Type > ErrorMessageType.Message + ? EqualsHelper(other) + : String == other.String)); + } + + public static bool operator==(ErrorMessage left, ErrorMessage right) { + return (object)left == (object)right + || ( (object)left != null + && (object)right != null + && left.Type == right.Type + && (left.Type > ErrorMessageType.Message + ? left.EqualsHelper(right) + : left.String == right.String)); + } + public static bool operator!=(ErrorMessage left, ErrorMessage right) { return !(left == right); } + + private bool EqualsHelper(ErrorMessage other) { + Debug.Assert(Type == other.Type + && Type > ErrorMessageType.Message); + if (Type == ErrorMessageType.NestedError) { + var ne1 = (NestedError)this; + var ne2 = (NestedError)other; + return ne1.Position == ne2.Position + && ne1.Messages == ne2.Messages + && LanguagePrimitives.GenericEqualityERComparer.Equals(ne1.UserState, ne2.UserState); + } else if (Type == ErrorMessageType.CompoundError) { + if (String != other.String) return false; + var ce1 = (CompoundError)this; + var ce2 = (CompoundError)other; + return ce1.NestedErrorPosition == ce2.NestedErrorPosition + && ce1.NestedErrorMessages == ce2.NestedErrorMessages + && LanguagePrimitives.GenericEqualityERComparer.Equals(ce1.NestedErrorUserState, ce2.NestedErrorUserState); + } else { // ErrorMessageType == ErrorMessageType.Other + Debug.Assert(Type == ErrorMessageType.Other); + return ((Other)this).Data == ((Other)other).Data; + } + } + + public override int GetHashCode() { + return (int)Type ^ (String == null ? 0 : String.GetHashCode()); + } + + private class ErrorMessageComparer : Comparer { + public override int Compare(ErrorMessage x, ErrorMessage y) { + if (x == null || y == null) { + return x == null && y == null ? 0 : (x == null ? -1 : 1); + } + int d = (int)x.Type - (int)y.Type; + if (d != 0) return d; + var type = x.Type; + if (type <= ErrorMessageType.Message) { + Debug.Assert(type >= 0); + return String.CompareOrdinal(x.String, y.String); + } else if (type == ErrorMessageType.NestedError) { + var ne1 = (NestedError)x; + var ne2 = (NestedError)y; + var c = Position.Compare(ne1.Position, ne2.Position); + if (c != 0) return c; + var msgs1 = ErrorMessageList.ToSortedArray(ne1.Messages); + var msgs2 = ErrorMessageList.ToSortedArray(ne2.Messages); + int n = Math.Min(msgs1.Length, msgs2.Length); + for (int i = 0; i < n; ++i) { + c = Compare(msgs1[i], msgs2[i]); + if (c != 0) return c; + } + return msgs1.Length - msgs2.Length; + } else if (type == ErrorMessageType.CompoundError) { + var c = String.CompareOrdinal(x.String, y.String); + if (c != 0) return c; + var ce1 = (CompoundError)x; + var ce2 = (CompoundError)y; + c = Position.Compare(ce1.NestedErrorPosition, ce2.NestedErrorPosition); + if (c != 0) return c; + var msgs1 = ErrorMessageList.ToSortedArray(ce1.NestedErrorMessages); + var msgs2 = ErrorMessageList.ToSortedArray(ce2.NestedErrorMessages); + int n = Math.Min(msgs1.Length, msgs2.Length); + for (int i = 0; i < n; ++i) { + c = Compare(msgs1[i], msgs2[i]); + if (c != 0) return c; + } + return msgs1.Length - msgs2.Length; + } else { + Debug.Assert(type == ErrorMessageType.Other); + return 0; + } + } + } + + internal static Comparer Comparer = new ErrorMessageComparer(); + internal static ErrorMessage[] EmptyArray = new ErrorMessage[0]; + + internal string GetDebuggerDisplay() { + switch (Type) { + case ErrorMessageType.Expected: + return String == null + ? "Expected(null)" + : Text.DoubleQuote("Expected(", String, ")"); + case ErrorMessageType.ExpectedString: + return String == null + ? "ExpectedString(null)" + : Text.DoubleQuote("ExpectedString(", String, ")"); + case ErrorMessageType.ExpectedCaseInsensitiveString: + return String == null + ? "ExpectedCaseInsensitiveString(null)" + : Text.DoubleQuote("ExpectedCaseInsensitiveString(", String, ")"); + case ErrorMessageType.Unexpected: + return String == null + ? "Unexpected(null)" + : Text.DoubleQuote("Unexpected(", String, ")"); + case ErrorMessageType.UnexpectedString: + return String == null + ? "UnexpectedString(null)" + : Text.DoubleQuote("UnexpectedString(", String, ")"); + case ErrorMessageType.UnexpectedCaseInsensitiveString: + return String == null + ? "UnexpectedCaseInsensitiveString(null)" + : Text.DoubleQuote("UnexpectedCaseInsensitiveString(", String, ")"); + case ErrorMessageType.Message: + return String == null + ? "Message(null)" + : Text.DoubleQuote("Message(", String, ")"); + case ErrorMessageType.NestedError: { + var ne = (NestedError)this; + var pos = ne.Position == null ? "null" : ne.Position.ToString(); + var msgs = ErrorMessageList.GetDebuggerDisplay(ne.Messages); + return "NestedError(" + pos + ", ..., " + msgs + ")"; + } + case ErrorMessageType.CompoundError: { + var ce = (CompoundError)this; + var label = ce.String == null ? "null" : Text.Escape(ce.String, "", "\"", "\"", "", '"'); + var pos = ce.NestedErrorPosition == null ? "" : ce.NestedErrorPosition.ToString(); + var msgs = ErrorMessageList.GetDebuggerDisplay(ce.NestedErrorMessages); + return "CompoundError(" + label + ", " + pos + ", ..., " + msgs + ")"; + } + case ErrorMessageType.Other: { + var oe = (Other)this; + return oe.Data == null ? "Other(null)" : "Other(" + oe.ToString() + ")"; + } + default: + throw new InvalidOperationException(); + } + } +} + + +} \ No newline at end of file diff --git a/src/FParsecCS/ErrorMessageList.cs b/src/FParsecCS/ErrorMessageList.cs new file mode 100644 index 0000000..69ea566 --- /dev/null +++ b/src/FParsecCS/ErrorMessageList.cs @@ -0,0 +1,111 @@ +// Copyright (c) Stephan Tolksdorf 2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Diagnostics; +using System.Collections.Generic; + +namespace FParsec { + +[DebuggerDisplay("{ErrorMessageList.GetDebuggerDisplay(this),nq}"), + DebuggerTypeProxy(typeof(ErrorMessageList.DebugView))] +public sealed class ErrorMessageList : IEquatable { + public readonly ErrorMessage Head; + public readonly ErrorMessageList Tail; + + public ErrorMessageList(ErrorMessage head, ErrorMessageList tail) { + var throwNullReferenceExceptionIfHeadIsNull = head.Type; + Head = head; + Tail = tail; + } + + public ErrorMessageList(ErrorMessage message) { + var throwNullReferenceExceptionIfMessageIsNull = message.Type; + Head = message; + } + + public ErrorMessageList(ErrorMessage message1, ErrorMessage message2) { + var throwNullReferenceExceptionIfMessage1IsNull = message1.Type; + Head = message1; + Tail = new ErrorMessageList(message2); + } + + public static ErrorMessageList Merge(ErrorMessageList list1, ErrorMessageList list2) { + if ((object)list1 == null) return list2; + return MergeContinue(list1, list2); + } + private static ErrorMessageList MergeContinue(ErrorMessageList list1, ErrorMessageList list2) { + while ((object)list2 != null) { + list1 = new ErrorMessageList(list2.Head, list1); + list2 = list2.Tail; + } + return list1; + } + + public static HashSet ToHashSet(ErrorMessageList messages) { + var msgs = messages; + var set = new HashSet(); + for (; (object)msgs != null; msgs = msgs.Tail) { + var msg = msgs.Head; + Debug.Assert(msg.Type >= 0); + if (msg.Type <= ErrorMessageType.Message && string.IsNullOrEmpty(msg.String)) continue; + set.Add(msg); + } + return set; + } + + public static ErrorMessage[] ToSortedArray(ErrorMessageList messages) { + var set = ToHashSet(messages); + var array = new ErrorMessage[set.Count]; + set.CopyTo(array); + Array.Sort(array, ErrorMessage.Comparer); + return array; + } + + public override bool Equals(object obj) { return Equals(obj as ErrorMessageList); } + + public bool Equals(ErrorMessageList other) { + return (object)this == (object)other + || ( (object)other != null + && ToHashSet(this).SetEquals(ToHashSet(other))); + } + + public static bool operator==(ErrorMessageList left, ErrorMessageList right) { + return (object)left == (object)right + || ( (object)left != null + && (object)right != null + && ToHashSet(left).SetEquals(ToHashSet(right))); + } + public static bool operator!=(ErrorMessageList left, ErrorMessageList right) { return !(left == right); } + + public override int GetHashCode() { + var set = ToHashSet(this); + var h = 0; + foreach (var msg in set) + h ^= msg.GetHashCode(); + return h; + } + + internal static string GetDebuggerDisplay(ErrorMessageList list) { + var es = ErrorMessageList.ToSortedArray(list); + switch (es.Length) { + case 0: return "[]"; + case 1: return "[" + es[0].GetDebuggerDisplay() + "]"; + case 2: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "]"; + case 3: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "; " + es[2].GetDebuggerDisplay() + "]"; + default: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "; " + es[2].GetDebuggerDisplay() + "; ...]"; + } + } + + internal class DebugView { + //[DebuggerBrowsable(DebuggerBrowsableState.Never)] + private ErrorMessageList List; + + public DebugView(ErrorMessageList list) { List = list; } + + [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)] + public ErrorMessage[] Items { get { return ErrorMessageList.ToSortedArray(List); } } + } +} + +} \ No newline at end of file diff --git a/src/FParsecCS/Errors.cs b/src/FParsecCS/Errors.cs new file mode 100644 index 0000000..36c6f54 --- /dev/null +++ b/src/FParsecCS/Errors.cs @@ -0,0 +1,120 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +namespace FParsec { + +internal static class Errors { + static private ErrorMessageList Expected(string str) { + return new ErrorMessageList(new ErrorMessage.Expected(str)); + } + + static private ErrorMessageList Unexpected(string str) { + return new ErrorMessageList(new ErrorMessage.Unexpected(str)); + } + + static private ErrorMessageList Message(string str) { + return new ErrorMessageList(new ErrorMessage.Message(str)); + } + + public static readonly ErrorMessageList ExpectedEndOfInput = Expected(Strings.EndOfInput); + public static readonly ErrorMessageList UnexpectedEndOfInput = Unexpected(Strings.EndOfInput); + + public static readonly ErrorMessageList ExpectedAnyChar = Expected(Strings.AnyChar); + public static readonly ErrorMessageList ExpectedWhitespace = Expected(Strings.Whitespace); + public static readonly ErrorMessageList ExpectedAsciiUppercaseLetter = Expected(Strings.AsciiUppercaseLetter); + public static readonly ErrorMessageList ExpectedAsciiLowercaseLetter = Expected(Strings.AsciiLowercaseLetter); + public static readonly ErrorMessageList ExpectedAsciiLetter = Expected(Strings.AsciiLetter); + public static readonly ErrorMessageList ExpectedUppercaseLetter = Expected(Strings.UppercaseLetter); + public static readonly ErrorMessageList ExpectedLowercaseLetter = Expected(Strings.LowercaseLetter); + public static readonly ErrorMessageList ExpectedLetter = Expected(Strings.Letter); + public static readonly ErrorMessageList ExpectedBinaryDigit = Expected(Strings.BinaryDigit); + public static readonly ErrorMessageList ExpectedOctalDigit = Expected(Strings.OctalDigit); + public static readonly ErrorMessageList ExpectedDecimalDigit = Expected(Strings.DecimalDigit); + public static readonly ErrorMessageList ExpectedHexadecimalDigit = Expected(Strings.HexadecimalDigit); + + public static readonly ErrorMessageList ExpectedNewline = Expected(Strings.Newline); + public static readonly ErrorMessageList UnexpectedNewline = Unexpected(Strings.Newline); + + public static readonly ErrorMessageList ExpectedTab = Expected(Strings.Tab); + + public static readonly ErrorMessageList ExpectedFloatingPointNumber = Expected(Strings.FloatingPointNumber); + + public static readonly ErrorMessageList ExpectedInt64 = Expected(Strings.Int64); + public static readonly ErrorMessageList ExpectedInt32 = Expected(Strings.Int32); + public static readonly ErrorMessageList ExpectedInt16 = Expected(Strings.Int16); + public static readonly ErrorMessageList ExpectedInt8 = Expected(Strings.Int8); + public static readonly ErrorMessageList ExpectedUInt64 = Expected(Strings.UInt64); + public static readonly ErrorMessageList ExpectedUInt32 = Expected(Strings.UInt32); + public static readonly ErrorMessageList ExpectedUInt16 = Expected(Strings.UInt16); + public static readonly ErrorMessageList ExpectedUInt8 = Expected(Strings.UInt8); + + public static readonly ErrorMessageList ExpectedPrefixOperator = Expected(Strings.PrefixOperator); + public static readonly ErrorMessageList ExpectedInfixOperator = Expected(Strings.InfixOperator); + public static readonly ErrorMessageList ExpectedPostfixOperator = Expected(Strings.PostfixOperator); + public static readonly ErrorMessageList ExpectedInfixOrPostfixOperator = ErrorMessageList.Merge(ExpectedInfixOperator, ExpectedPostfixOperator); + + public static readonly ErrorMessageList NumberOutsideOfDoubleRange = Message(Strings.NumberOutsideOfDoubleRange); + public static readonly ErrorMessageList NumberOutsideOfInt64Range = Message(Strings.NumberOutsideOfInt64Range); + public static readonly ErrorMessageList NumberOutsideOfInt32Range = Message(Strings.NumberOutsideOfInt32Range); + public static readonly ErrorMessageList NumberOutsideOfInt16Range = Message(Strings.NumberOutsideOfInt16Range); + public static readonly ErrorMessageList NumberOutsideOfInt8Range = Message(Strings.NumberOutsideOfInt8Range); + public static readonly ErrorMessageList NumberOutsideOfUInt64Range = Message(Strings.NumberOutsideOfUInt64Range); + public static readonly ErrorMessageList NumberOutsideOfUInt32Range = Message(Strings.NumberOutsideOfUInt32Range); + public static readonly ErrorMessageList NumberOutsideOfUInt16Range = Message(Strings.NumberOutsideOfUInt16Range); + public static readonly ErrorMessageList NumberOutsideOfUInt8Range = Message(Strings.NumberOutsideOfUInt8Range); + + + public static ErrorMessageList ExpectedAnyCharIn(string chars) { + return Expected(Strings.AnyCharIn(chars)); + } + + public static ErrorMessageList ExpectedAnyCharNotIn(string chars) { + return Expected(Strings.AnyCharNotIn(chars)); + } + + public static ErrorMessageList ExpectedStringMatchingRegex(string regexPattern) { + return Expected(Strings.StringMatchingRegex(regexPattern)); + } + + public static ErrorMessageList ExpectedAnySequenceOfNChars(int n) { + return Expected(Strings.ExpectedAnySequenceOfNChars(n)); + } + + public static ErrorMessageList CouldNotFindString(string str) { + return Message(Strings.CouldNotFindString(str)); + } + + public static ErrorMessageList CouldNotFindCaseInsensitiveString(string str) { + return Message(Strings.CouldNotFindCaseInsensitiveString(str)); + } + + public static ErrorMessageList OperatorsConflict(Position position1, Operator operator1, + Position position2, Operator operator2) + { + return Message(Strings.OperatorsConflict(position1, operator1, position2, operator2)); + } + + public static ErrorMessageList UnexpectedNonPrefixOperator(Operator op) { + return new ErrorMessageList( + ExpectedPrefixOperator.Head, + new ErrorMessage.Unexpected(Strings.OperatorToString(op))); + } + + public static ErrorMessageList MissingTernary2ndString(Position position1, Position position2, Operator op) { + return new ErrorMessageList( + new ErrorMessage.ExpectedString(op.TernaryRightString), + new ErrorMessage.Message(Strings.OperatorStringIsRightPartOfTernaryOperator(position1, position2, op))); + } +} + +namespace Internal { // the internal namespace contains internal types that must be public for inlining reasons + public static class ParserCombinatorInInfiniteLoopHelper { + public static Exception CreateException(string combinatorName, CharStream stream) { + return new InvalidOperationException(stream.Position.ToString() + ": The combinator '" + combinatorName + "' was applied to a parser that succeeds without consuming input and without changing the parser state in any other way. (If no exception had been raised, the combinator likely would have entered an infinite loop.)"); + } + } +} + +} diff --git a/src/FParsecCS/FParsecCS.csproj b/src/FParsecCS/FParsecCS.csproj new file mode 100644 index 0000000..3ca753e --- /dev/null +++ b/src/FParsecCS/FParsecCS.csproj @@ -0,0 +1,15 @@ + + + + netstandard2.0 + + true + true + + + + + + + + diff --git a/src/FParsecCS/FastGenericEqualityERComparer.cs b/src/FParsecCS/FastGenericEqualityERComparer.cs new file mode 100644 index 0000000..01b9803 --- /dev/null +++ b/src/FParsecCS/FastGenericEqualityERComparer.cs @@ -0,0 +1,86 @@ +// Copyright (c) Stephan Tolksdorf 2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Collections; +using System.Collections.Generic; + +#if PCL || NETSTANDARD1_6 +using System.Reflection; +#endif + +using Microsoft.FSharp.Core; + +namespace FParsec { + +internal static class FastGenericEqualityERComparer { + // if T is a reference type, accessing the field requires a hash table lookup + public static EqualityComparer Instance = FastGenericEqualityERComparer.Create(); + + /// For reference types it's faster to call Instance.Equals directly + /// (due to limitations of the inliner of the .NET JIT.) + public static bool Equals(T left, T right) { + return Instance.Equals(left, right); + } +} + +internal static class FastGenericEqualityERComparer { + public static EqualityComparer Create() { + var t = typeof(T); + if (t.IsArray) return new ArrayStructuralEqualityERComparer(); + #if PCL || NETSTANDARD1_6 + var ti = t.GetTypeInfo(); + var ise = typeof(IStructuralEquatable).GetTypeInfo(); + #else + var ti = t; + var ise = typeof(IStructuralEquatable); + #endif + if (ise.IsAssignableFrom(ti)) { + var gct = ti.IsValueType ? typeof(StructStructuralEqualityERComparer<>) + : typeof(ClassStructuralEqualityERComparer<>); + var ct = gct.MakeGenericType(t); + #if LOW_TRUST || NETSTANDARD1_6 + return (EqualityComparer)Activator.CreateInstance(ct); + #else + return (EqualityComparer)System.Runtime.Serialization.FormatterServices.GetUninitializedObject(ct); + #endif + } + return EqualityComparer.Default; + } + + private class ClassStructuralEqualityERComparer : EqualityComparer where T : class, IStructuralEquatable { + public override bool Equals(T x, T y) { + return (object)x == (object)y + || ((object)x != null && x.Equals(y, LanguagePrimitives.GenericEqualityERComparer)); + } + + public override int GetHashCode(T obj) { + if ((object)obj == null) throw new ArgumentNullException("obj"); + return obj.GetHashCode(LanguagePrimitives.GenericEqualityERComparer); + } + } + + private class StructStructuralEqualityERComparer : EqualityComparer where T : struct, IStructuralEquatable { + public override bool Equals(T x, T y) { + return x.Equals(y, LanguagePrimitives.GenericEqualityERComparer); + } + + public override int GetHashCode(T obj) { + return obj.GetHashCode(LanguagePrimitives.GenericEqualityERComparer); + } + } + + /// Forwards all work to F#'s GenericEqualityERComparer. + private class ArrayStructuralEqualityERComparer : EqualityComparer { + public override bool Equals(T x, T y) { + return (object)x == (object)y || LanguagePrimitives.GenericEqualityERComparer.Equals(x, y); + } + + public override int GetHashCode(T obj) { + if ((object)obj == null) throw new ArgumentNullException("obj"); + return LanguagePrimitives.GenericEqualityERComparer.GetHashCode(obj); + } + } +} + +} \ No newline at end of file diff --git a/src/FParsecCS/HexFloat.cs b/src/FParsecCS/HexFloat.cs new file mode 100644 index 0000000..b758bfd --- /dev/null +++ b/src/FParsecCS/HexFloat.cs @@ -0,0 +1,596 @@ +// Copyright (c) Stephan Tolksdorf 2008-2013 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +namespace FParsec { + +public static class HexFloat { + +// see http://www.quanttec.com/fparsec/reference/charparsers.html#members.floatToHexString +// for more information on the supported hexadecimal floating-point format + +#pragma warning disable 0429 // unreachable expression code +#pragma warning disable 0162 // unreachable code + +// The non-LOW_TRUST code in this class relies on the endianness of floating-point +// numbers in memory being the same as the normal platform endianness, +// i.e. on *((uint*)(&s)) and *((ulong*)(&d)) returning the correct IEEE-754 bit +// representation of the single and double precision numbers s and d. +// I'm not aware of any .NET/Mono platform where this is not the case. +// In the unlikely event anyone ever runs this code on a platform where +// this is not the case the unit tests will detect the problem. + + private static readonly byte[] asciiHexValuePlus1s = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, + 0, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + +#if !LOW_TRUST + private unsafe struct _24CharsBuffer { + public fixed char chars[24]; + } +#endif + +#if !LOW_TRUST + unsafe +#endif +public static string DoubleToHexString(double x) { + const int expBits = 11; // bits for biased exponent + const int maxBits = 53; // significant bits (including implicit bit) +#if LOW_TRUST + const int maxChars = 24; // "-0x1.fffffffffffffp-1022" +#else + _24CharsBuffer buffer; +#endif + const int maxBiasedExp = (1 << expBits) - 1; + const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double + const int bias = maxExp - 1; + + const int maxFractNibbles = (maxBits - 1 + 3)/4; + const ulong mask = (1UL << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits + +#if LOW_TRUST + ulong xn = unchecked((ulong)BitConverter.DoubleToInt64Bits(x)); +#else + ulong xn = *((ulong*)(&x)); // reinterpret double as ulong +#endif + int sign = (int)(xn >> (maxBits - 1 + expBits)); + int e = (int)((xn >> (maxBits - 1)) & maxBiasedExp); // the biased exponent + ulong s = xn & mask; // the significand (without the implicit bit) + if (e < maxBiasedExp) { + if (e == 0 && s == 0) return sign == 0 ? "0x0.0p0" : "-0x0.0p0"; + #if LOW_TRUST + char[] str = new char[maxChars]; + #else + char* str = buffer.chars; + #endif + int i = 0; + if (sign != 0) str[i++] = '-'; + str[i++] = '0'; str[i++] = 'x'; + str[i++] = e > 0 ? '1' : '0'; + str[i++] = '.'; + if ((maxBits - 1)%4 > 0) { // normalize fraction to multiple of 4 bits + s <<= 4 - (maxBits - 1)%4; + } + int lastNonNull = i; + for (int j = 0; j < maxFractNibbles; ++j) { + int h = unchecked((int) (s >> ((maxFractNibbles - 1 - j) << 2))) & 0xf; + if (h != 0) lastNonNull = i; + str[i++] = "0123456789abcdef"[h]; + } + i = lastNonNull + 1; + str[i++] = 'p'; + if (e >= bias) e -= bias; + else { + str[i++] = '-'; + e = e > 0 ? -(e - bias) : bias - 1; + } + // e holds absolute unbiased exponent + int li = e < 10 ? 1 : (e < 100 ? 2 : (e < 1000 ? 3 : 4)); // floor(log(10, e))) + 1 + i += li; + do { + int r = e%10; e = e/10; + str[--i] = (char) (48 + r); + } while (e > 0); + i += li; + return new String(str, 0, i); + } else { + if (s == 0) return sign == 0 ? "Infinity" : "-Infinity"; + else return "NaN"; + } +} + +#if !LOW_TRUST + private unsafe struct _16CharsBuffer { + public fixed char chars[16]; + } +#endif + +#if !LOW_TRUST + unsafe +#endif +public static string SingleToHexString(float x) { + const int expBits = 8; // bits for biased exponent + const int maxBits = 24; // significant bits (including implicit bit) +#if LOW_TRUST + const int maxChars = 16; // "-0x1.fffffep-126" +#else + _16CharsBuffer buffer; +#endif + const int maxBiasedExp = (1 << expBits) - 1; + const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double + const int bias = maxExp - 1; + + const int maxFractNibbles = (maxBits - 1 + 3)/4; + const uint mask = (1U << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits + +#if LOW_TRUST + uint xn = BitConverter.ToUInt32(BitConverter.GetBytes(x), 0); +#else + uint xn = *((uint*)(&x)); // reinterpret float as ulong +#endif + int sign = (int)(xn >> (maxBits - 1 + expBits)); + int e = (int)((xn >> (maxBits - 1)) & maxBiasedExp); // the biased exponent + uint s = xn & mask; // the significand (without the implicit bit) + if (e < maxBiasedExp) { + if (e == 0 && s == 0) return sign == 0 ? "0x0.0p0" : "-0x0.0p0"; + #if LOW_TRUST + char[] str = new char[maxChars]; + #else + char* str = buffer.chars; + #endif + int i = 0; + if (sign != 0) str[i++] = '-'; + str[i++] = '0'; str[i++] = 'x'; + str[i++] = e > 0 ? '1' : '0'; + str[i++] = '.'; + int lastNonNull = i; + if ((maxBits - 1)%4 > 0) { // normalize fraction to multiple of 4 bits + s <<= 4 - (maxBits - 1)%4; + } + for (int j = 0; j < maxFractNibbles; ++j) { + int h = (int)(s >> ((maxFractNibbles - 1 - j) << 2)) & 0xf; + if (h != 0) lastNonNull = i; + str[i++] = "0123456789abcdef"[h]; + } + i = lastNonNull + 1; + str[i++] = 'p'; + if (e >= bias) e -= bias; + else { + str[i++] = '-'; + e = e > 0 ? -(e - bias) : bias - 1; + } + // e holds absolute unbiased exponent + int li = e < 10 ? 1 : (e < 100 ? 2 : 3); // floor(log(10, e))) + 1 + i += li; + do { + int r = e%10; e = e/10; + str[--i] = (char)(48 + r); + } while (e > 0); + i += li; + return new String(str, 0, i); + } else { + if (s == 0) return sign == 0 ? "Infinity" : "-Infinity"; + else return "NaN"; + } +} + +#pragma warning restore 0429 +#pragma warning restore 0162 + +#if !LOW_TRUST + unsafe +#endif +public static double DoubleFromHexString(string str) { + const int expBits = 11; // bits for exponent + const int maxBits = 53; // significant bits (including implicit bit) + + const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double + const int minExp = -maxExp + 3; // min n for which 0.5*2^n is a normal double + const int minSExp = minExp - (maxBits - 1); // min n for which 0.5*2^n is a subnormal double + + const int maxBits2 = maxBits + 2; + const ulong mask = (1UL << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits + + if (str == null) throw new ArgumentNullException("str"); + int n = str.Length; + if (n == 0) goto InvalidFormat; + + // n*4 <= Int32.MaxValue protects against an nBits overflow, + // the additional -minSExp + 10 margin is needed for parsing the exponent + if (n > (int.MaxValue + minSExp - 10)/4) + throw new System.FormatException("The given hexadecimal string representation of a double precision floating-point number is too long."); + + int sign = 0; // 0 == positive, 1 == negative + ulong xn = 0; // integer significand with up to maxBits + 2 bits, where the (maxBits + 2)th bit + // (the least significant bit) is the logical OR of the (maxBits + 2)th and all following input bits + int nBits = -1; // number of bits in xn, not counting leading zeros + int exp = 0; // the base-2 exponent +#if LOW_TRUST + var s = str; +#else + fixed (char* s = str) { +#endif + int i = 0; + // sign + if (s[0] == '+') i = 1; + else if (s[0] == '-') { + i = 1; + sign = 1; + } + // "0x" prefix + if (i + 1 < n && (s[i + 1] == 'x' || s[i + 1] == 'X')) { + if (s[i] != '0') goto InvalidFormat; + i += 2; + } + bool pastDot = false; + for (;;) { + if (i == n) { + if (!pastDot) exp = nBits; + if (nBits >= 0) break; + else goto InvalidFormat; + } + char c = s[i++]; + int h; + if (c < 128 && (h = asciiHexValuePlus1s[c]) != 0) { + --h; + if (nBits <= 0 ) { + xn |= (uint)h; + nBits = 0; + while (h > 0) { + ++nBits; + h >>= 1; + } + if (pastDot) exp -= 4 - nBits; + } else if (nBits <= maxBits2 - 4) { + xn <<= 4; + xn |= (uint)h; + nBits += 4; + } else if (nBits < maxBits2) { + int nRemBits = maxBits2 - nBits; + int nSurplusBits = 4 - nRemBits; + int surplusBits = h & (0xf >> nRemBits); + // The .NET JIT is not able to emit branch-free code for + // surplusBits = surplusBits != 0 ? 1 : 0; + // So we use this version instead: + surplusBits = (0xfffe >> surplusBits) & 1; // = surplusBits != 0 ? 1 : 0 + xn <<= nRemBits; + xn |= (uint)((h >> nSurplusBits) | surplusBits); + nBits += 4; + } else { + xn |= (uint)((0xfffe >> h) & 1); // (0xfffe >> h) & 1 == h != 0 ? 1 : 0 + nBits += 4; + } + } else if (c == '.') { + if (pastDot) goto InvalidFormat; + pastDot = true; + exp = nBits >= 0 ? nBits : 0; // exponent for integer part of float + } else if ((c | ' ') == 'p' && nBits >= 0) { + if (!pastDot) exp = nBits; + int eSign = 1; + if (i < n && (s[i] == '-' || s[i] == '+')) { + if (s[i] == '-') eSign = -1; + ++i; + } + if (i == n) goto InvalidFormat; + int e = 0; + do { + c = s[i++]; + if (((uint)c - (uint)'0') <= 9) { + if (e <= (int.MaxValue - 9)/10) e = e*10 + (c - '0'); + else e = int.MaxValue - 8; + } else goto InvalidFormat; + } while (i < n); + e*= eSign; + // either e is exact or |e| >= int.MaxValue - 8 + // |exp| <= n*4 <= int.MaxValue + minSExp - 10 + // + // Case 1: e and exp have the same sign + // Case 1.a: e is exact && |exp + e| <= int.MaxValue ==> |exp + e| is exact + // Case 1.b: |e| >= int.MaxValue - 8 || |exp + e| > int.MaxValue ==> |exp + e| >= int.MaxValue - 8 + // Case 2: e and exp have opposite signs + // Case 2.a: e is exact ==> |exp + e| is exact + // Case 2.b: |e| >= int.MaxValue - 8 + // ==> Case e > 0: + // exp + e >= -(int.MaxValue + minSExp - 10) + (int.MaxValue - 8) = -minSExp + 2 > maxExp + // Case e < 0: + // exp + e <= (int.MaxValue + minSExp - 10) - (int.MaxValue - 8) = minSExp - 2 + // + // hence, |exp + e| is exact || exp + e > maxExp || exp + e < minSExp - 1 + try { + exp = checked (exp + e); + } catch (System.OverflowException) { + exp = e < 0 ? int.MinValue : int.MaxValue; + } + break; + } else { + --i; + if (nBits == -1 && i + 3 <= n) { + if ( ((s[i ] | ' ') == 'i') + && ((s[i + 1] | ' ') == 'n') + && ((s[i + 2] | ' ') == 'f') + && (i + 3 == n + || (i + 8 == n && ((s[i + 3] | ' ') == 'i') + && ((s[i + 4] | ' ') == 'n') + && ((s[i + 5] | ' ') == 'i') + && ((s[i + 6] | ' ') == 't') + && ((s[i + 7] | ' ') == 'y')))) + { + return sign == 0 ? Double.PositiveInfinity : Double.NegativeInfinity; + } else if (i + 3 == n && ((s[i] | ' ') == 'n') + && ((s[i + 1] | ' ') == 'a') + && ((s[i + 2] | ' ') == 'n')) + { + return Double.NaN; + } + } + goto InvalidFormat; + } + } // for +#if !LOW_TRUST + } // fixed +#endif + if (nBits == 0) return sign == 0 ? 0.0 : -0.0; + if (exp <= maxExp) { + if (exp >= minExp && nBits <= maxBits) { + // not subnormal and no rounding is required + if (nBits < maxBits) xn <<= maxBits - nBits; // normalize significand to maxBits + xn &= mask; // mask out lower (maxBits - 1) bits, the most significant bit is encoded in exp + } else { + if (nBits < maxBits2) xn <<= maxBits2 - nBits; // normalize significand to (maxBits + 2) bits + int isSubnormal = 0; + if (exp < minExp) { + if (exp < minSExp - 1) return sign == 0 ? 0.0 : -0.0; // underflow (minSExp - 1 could still be rounded to minSExp) + isSubnormal = 1; + do { + xn = (xn >> 1) | (xn & 1); + } while (++exp < minExp); + if (xn <= 2) return sign == 0 ? 0.0 : -0.0; // underflow + } + int r = unchecked((int)xn) & 0x7; // (lsb, bit below lsb, logical OR of all bits below the bit below lsb) + xn >>= 2; // truncate to maxBits + if (r >= 6 || r == 3) { + xn++; + xn &= mask; + if (xn == 0) { // rounded to a power of two + exp += 1; + if (exp > maxExp) goto Overflow; + } + } else { + xn &= mask; + } + exp -= isSubnormal; + } + exp -= minExp - 1; // add bias + xn = (((ulong)sign) << ((maxBits - 1) + expBits)) | (((ulong)exp) << (maxBits - 1)) | xn; + #if LOW_TRUST + return BitConverter.Int64BitsToDouble(unchecked((long)xn)); + #else + return *((double*)(&xn)); + #endif + } + +Overflow: + string msg = n < 32 ? "The given string (\"" + str + "\") represents a value either too large or too small for a double precision floating-point number." + : "The given string represents a value either too large or too small for a double precision floating-point number."; + throw new System.OverflowException(msg); + +InvalidFormat: + string errmsg = n < 32 ? "The given hexadecimal string representation of a double precision floating-point number (\"" + str + "\") is invalid." + : "The given hexadecimal string representation of a double precision floating-point number is invalid."; + throw new System.FormatException(errmsg); +} + +#if !LOW_TRUST + unsafe +#endif +public static float SingleFromHexString(string str) { + const int expBits = 8; // bits for exponent + const int maxBits = 24; // significant bits (including implicit bit) + + const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double + const int minExp = -maxExp + 3; // min n for which 0.5*2^n is a normal double + const int minSExp = minExp - (maxBits - 1); // min n for which 0.5*2^n is a subnormal Single + + const int maxBits2 = maxBits + 2; + const int mask = (1 << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits + + if (str == null) throw new ArgumentNullException("str"); + int n = str.Length; + if (n == 0) goto InvalidFormat; + + // n*4 <= Int32.MaxValue protects against an nBits overflow, + // the additional -minSExp + 10 margin is needed for parsing the exponent + if (n > (int.MaxValue + minSExp - 10)/4) + throw new System.FormatException("The given hexadecimal string representation of a single precision floating-point number is too long."); + + int sign = 0; // 0 == positive, 1 == negative + int xn = 0; // integer significand with up to maxBits + 2 bits, where the (maxBits + 2)th bit + // (the least significant bit) is the logical OR of the (maxBits + 2)th and all following input bits + int nBits = -1; // number of bits in xn, not counting leading zeros + int exp = 0; // the base-2 exponent +#if LOW_TRUST + var s = str; +#else + fixed (char* s = str) { +#endif + int i = 0; + // sign + if (s[0] == '+') i = 1; + else if (s[0] == '-') { + i = 1; + sign = 1; + } + // "0x" prefix + if (i + 1 < n && (s[i + 1] == 'x' || s[i + 1] == 'X')) { + if (s[i] != '0') goto InvalidFormat; + i += 2; + } + bool pastDot = false; + for (;;) { + if (i == n) { + if (!pastDot) exp = nBits; + if (nBits >= 0) break; + else goto InvalidFormat; + } + char c = s[i++]; + int h; + if (c < 128 && (h = asciiHexValuePlus1s[c]) != 0) { + --h; + if (nBits <= 0 ) { + xn |= h; + nBits = 0; + while (h > 0) { + ++nBits; + h >>= 1; + } + if (pastDot) exp -= 4 - nBits; + } else if (nBits <= maxBits2 - 4) { + xn <<= 4; + xn |= h; + nBits += 4; + } else if (nBits < maxBits2) { + int nRemBits = maxBits2 - nBits; + int nSurplusBits = 4 - nRemBits; + int surplusBits = h & (0xf >> nRemBits); + // The .NET JIT is not able to emit branch-free code for + // surplusBits = surplusBits != 0 ? 1 : 0; + // So we use this version instead: + surplusBits = (0xfffe >> surplusBits) & 1; // == surplusBits != 0 ? 1 : 0 + xn <<= nRemBits; + xn |= (h >> nSurplusBits) | surplusBits; + nBits += 4; + } else { + xn |= (0xfffe >> h) & 1; // (0xfffe >> h) & 1 == h != 0 ? 1 : 0 + nBits += 4; + } + } else if (c == '.') { + if (pastDot) goto InvalidFormat; + pastDot = true; + exp = nBits >= 0 ? nBits : 0; // exponent for integer part of float + } else if ((c | ' ') == 'p' && nBits >= 0) { + if (!pastDot) exp = nBits; + int eSign = 1; + if (i < n && (s[i] == '-' || s[i] == '+')) { + if (s[i] == '-') eSign = -1; + ++i; + } + if (i == n) goto InvalidFormat; + int e = 0; + do { + c = s[i++]; + if (((uint)c - (uint)'0') <= 9) { + if (e <= (int.MaxValue - 9)/10) e = e*10 + (c - '0'); + else e = int.MaxValue - 8; + } else goto InvalidFormat; + } while (i < n); + e*= eSign; + // either e is exact or |e| >= int.MaxValue - 8 + // |exp| <= n*4 <= int.MaxValue + minSExp - 10 + // + // Case 1: e and exp have the same sign + // Case 1.a: e is exact && |exp + e| <= int.MaxValue ==> |exp + e| is exact + // Case 1.b: |e| >= int.MaxValue - 8 || |exp + e| > int.MaxValue ==> |exp + e| >= int.MaxValue - 8 + // Case 2: e and exp have opposite signs + // Case 2.a: e is exact ==> |exp + e| is exact + // Case 2.b: |e| >= int.MaxValue - 8 + // ==> Case e > 0: + // exp + e >= -(int.MaxValue + minSExp - 10) + (int.MaxValue - 8) = -minSExp + 2 > maxExp + // Case e < 0: + // exp + e <= (int.MaxValue + minSExp - 10) - (int.MaxValue - 8) = minSExp - 2 + // + // hence, |exp + e| is exact || exp + e > maxExp || exp + e < minSExp - 1 + try { + exp = checked (exp + e); + } catch (System.OverflowException) { + exp = e < 0 ? int.MinValue : int.MaxValue; + } + break; + } else { + --i; + if (nBits == -1 && i + 3 <= n) { + if ( ((s[i ] | ' ') == 'i') + && ((s[i + 1] | ' ') == 'n') + && ((s[i + 2] | ' ') == 'f') + && (i + 3 == n + || (i + 8 == n && ((s[i + 3] | ' ') == 'i') + && ((s[i + 4] | ' ') == 'n') + && ((s[i + 5] | ' ') == 'i') + && ((s[i + 6] | ' ') == 't') + && ((s[i + 7] | ' ') == 'y')))) + { + return sign == 0 ? Single.PositiveInfinity : Single.NegativeInfinity; + } else if (i + 3 == n && ((s[i] | ' ') == 'n') + && ((s[i + 1] | ' ') == 'a') + && ((s[i + 2] | ' ') == 'n')) + { + return Single.NaN; + } + } + goto InvalidFormat; + } + } // for +#if !LOW_TRUST + } // fixed +#endif + if (nBits == 0) return sign == 0 ? 0.0f : -0.0f; + if (exp <= maxExp) { + if (exp >= minExp && nBits <= maxBits) { + // not subnormal and no rounding is required + if (nBits < maxBits) xn <<= maxBits - nBits; // normalize significand to maxBits + xn &= mask; // mask out lower (maxBits - 1) bits, the most significant bit is encoded in exp + } else { + if (nBits < maxBits2) xn <<= maxBits2 - nBits; // normalize significand to (maxBits + 2) bits + int isSubnormal = 0; + if (exp < minExp) { + if (exp < minSExp - 1) return sign == 0 ? 0.0f : -0.0f; // underflow (minSExp - 1 could still be rounded to minSExp) + isSubnormal = 1; + do { + xn = (xn >> 1) | (xn & 1); + } while (++exp < minExp); + if (xn <= 2) return sign == 0 ? 0.0f : -0.0f; // underflow + } + int r = xn & 0x7; // (lsb, bit below lsb, logical OR of all bits below the bit below lsb) + xn >>= 2; // truncate to maxBits + if (r >= 6 || r == 3) { + xn++; + xn &= mask; + if (xn == 0) { // rounded to a power of two + exp += 1; + if (exp > maxExp) goto Overflow; + } + } else { + xn &= mask; + } + exp -= isSubnormal; + } + exp -= minExp - 1; // add bias + xn = (sign << ((maxBits - 1) + expBits)) | (exp << (maxBits - 1)) | xn; + #if LOW_TRUST + return BitConverter.ToSingle(BitConverter.GetBytes(xn), 0); + #else + return *((float*)(&xn)); + #endif + } + +Overflow: + string msg = n < 32 ? "The given string (\"" + str + "\") represents a value either too large or too small for a single precision floating-point number." + : "The given string represents a value either too large or too small for a single precision floating-point number."; + throw new System.OverflowException(msg); + +InvalidFormat: + string errmsg = n < 32 ? "The given hexadecimal string representation of a single precision floating-point number (\"" + str + "\") is invalid." + : "The given hexadecimal string representation of a single precision floating-point number is invalid."; + throw new System.FormatException(errmsg); +} + +} // class HexFloat + +} diff --git a/src/FParsecCS/IdentifierValidator.cs b/src/FParsecCS/IdentifierValidator.cs new file mode 100644 index 0000000..a6347e1 --- /dev/null +++ b/src/FParsecCS/IdentifierValidator.cs @@ -0,0 +1,709 @@ +// Copyright (c) Stephan Tolksdorf 2010-2012 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Text; +using System.Diagnostics; +using System.Runtime.InteropServices; + +using Microsoft.FSharp.Core; + +namespace FParsec { + +#if !LOW_TRUST + unsafe +#endif +public sealed class IdentifierValidator { + + internal enum IdentifierCharFlags : byte { + None = 0, + + Continue = 1, + NonContinue = 2, + //Start = NonContinue | Continue, + + // the following two values are used by the FParsec identifier parser, not this class + PreCheckContinue = 4, + PreCheckNonContinue = 8, + } + +#if !PCL + public NormalizationForm NormalizationForm { get; set; } + public bool NormalizeBeforeValidation { get; set; } +#endif + public bool AllowJoinControlCharsAsIdContinueChars { get; set; } + private readonly IdentifierCharFlags[] AsciiCharOptions; + + private void CheckAscii(char asciiChar) { + if (asciiChar == 0 || asciiChar >= 128) + throw new ArgumentOutOfRangeException("asciiChar", "The identifier char settings can only be read or set for non-zero ASCII chars, i.e. chars in the range '\u0001'-'\u007f'."); + } + + public void SetIsAsciiNoIdChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = 0; } + public void SetIsAsciiIdStartChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = IdentifierCharFlags.NonContinue | IdentifierCharFlags.Continue; } + public void SetIsAsciiIdNonStartChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = IdentifierCharFlags.Continue; } + + public IdentifierValidator() { + var ascii = new IdentifierCharFlags[128]; + var start = IdentifierCharFlags.NonContinue | IdentifierCharFlags.Continue; + // defaults as defined by XID_START/XID_CONTINUE + for (int c = 'A'; c <= 'Z'; ++c) ascii[c] = start; + for (int c = 'a'; c <= 'z'; ++c) ascii[c] = start; + for (int c = '0'; c <= '9'; ++c) ascii[c] = IdentifierCharFlags.Continue; + ascii['_'] = IdentifierCharFlags.Continue; + AsciiCharOptions = ascii; + } + + internal IdentifierValidator(IdentifierCharFlags[] asciiCharOptions) { + Debug.Assert(asciiCharOptions.Length == 128); + AsciiCharOptions = asciiCharOptions; + } + +#if PCL + #pragma warning disable 164, 219 // label not referenced, variable not used +#endif + + /// Returns the normalized string, or null in case an invalid identifier + /// character is found. If an invalid character is found, the string index of the + /// invalid character is assigned to the out parameter, otherwise -1. + public string ValidateAndNormalize(string str, out int errorPosition) { + // Pinning str and asciiOptions to avoid redundant bounds checks would actually + // slow down the code for small to medium size identifiers because of the + // (unnecessarily) high overhead associated with C#'s fixed statement. One + // issue is that the .NET C# compiler emits null and 0-length checks even + // though the C# standard leaves the respective behaviour undefined and + // one hence can't rely on them. Another, more severe issue is that the + // C# compiler puts the whole code inside the scope of the fixed statement + // into a try-finally block, even if the whole function has no exception + // handlers. The try-finally block in turn inhibits certain optimizations + // by the JIT, in particular it seems to prevent the 32-bit .NET JIT from + // compiling gotos into straighforward jumps. + var asciiOptions = AsciiCharOptions; + bool isSecondRound = false; + bool isOnlyAscii = true; + int i = 1; + int length = str.Length; // throws if str is null + if (length == 0) goto ReturnWithError; // check could be avoided for null-terminated buffer + + // Even if NormalizeBeforeValidation is set we first try to validate the + // identifier without normalization. If we don't get an error, we normalize + // after validation. If we get an error, we normalize and try + // to validate the identifier a second time. This doesn't change results + // because XID identifiers are "closed under normalization". + + IdStart: + char c = str[0]; + if (c < 128) { + if ((asciiOptions[c] & IdentifierCharFlags.NonContinue) == 0) goto Error; + } else { + isOnlyAscii = false; + if (!Text.IsSurrogate(c)) { + if (!IsXIdStartOrSurrogate(c)) goto Error; + } else { + if (i == length) goto Error; // check could be avoided for null-terminated buffer + char c1 = str[1]; + if (c > 0xDBFF || !Text.IsLowSurrogate(c1)) goto ReturnWithError; + int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 + if (!IsXIdStartSmp(cp)) goto Error; + ++i; + } + } + if (i < length) { + if (!AllowJoinControlCharsAsIdContinueChars) { + for (;;) { + c = str[i]; + ++i; + if (c < 128) { + if ((asciiOptions[c] & IdentifierCharFlags.Continue) == 0) goto Error; + if (i == length) break; + } else { + isOnlyAscii = false; + if (!Text.IsSurrogate(c)) { + if (!IsXIdContinueOrSurrogate(c)) goto Error; + if (i == length) break; + } else { + if (i == length) goto Error; // check could be avoided for null-terminated buffer + char c1 = str[i]; + if (c > 0xDBFF || !Text.IsLowSurrogate(c1)) goto ReturnWithError; + int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 + if (!IsXIdContinueSmp(cp)) goto Error; + if (++i >= length) break; + } + } + } + } else { // duplicates the code from the previous case, the only difference being the (*) line + for (;;) { + c = str[i]; + ++i; + if (c < 128) { + if ((asciiOptions[c] & IdentifierCharFlags.Continue) == 0) goto Error; + if (i == length) break; + } else { + isOnlyAscii = false; + if (!Text.IsSurrogate(c)) { + if (!IsXIdContinueOrJoinControlOrSurrogate(c)) goto Error; // (*) + if (i == length) break; + } else { + if (i == length) goto Error; // check could be avoided for null-terminated buffer + char c1 = str[i]; + if (c > 0xDBFF || !Text.IsLowSurrogate(c1)) goto ReturnWithError; + int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 + if (!IsXIdContinueSmp(cp)) goto Error; + if (++i >= length) break; + } + } + } + } + } + errorPosition = -1; +#if PCL + return str; // The PCL API subset does not support Unicode normalization. + Error: +#else + if (NormalizationForm == 0 || (isOnlyAscii | isSecondRound)) return str; + return str.Normalize(NormalizationForm); + Error: + if (NormalizeBeforeValidation && NormalizationForm != 0 && !(isOnlyAscii | isSecondRound)) { + string nstr; + try { nstr = str.Normalize(NormalizationForm); } // throws for invalid unicode characters + catch (ArgumentException) { nstr = str; } + if ((object)nstr != (object)str) { + str = nstr; + length = nstr.Length; + isSecondRound = true; + i = 1; + goto IdStart; + } + } +#endif + ReturnWithError: + errorPosition = i - 1; + return null; + } + +#if PCL + #pragma warning restore 164, 219 +#endif + + private class IsIdStartCharOrSurrogateFSharpFunc : FSharpFunc { + private IdentifierCharFlags[] AsciiCharOptions; + public IsIdStartCharOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } + + public override bool Invoke(char ch) { + if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.NonContinue) != 0; + return IsXIdStartOrSurrogate(ch); + } + } + + private class IsIdContinueCharOrSurrogateFSharpFunc : FSharpFunc { + private IdentifierCharFlags[] AsciiCharOptions; + public IsIdContinueCharOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } + + public override bool Invoke(char ch) { + if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.Continue) != 0; + return IsXIdContinueOrSurrogate(ch); + } + } + + private class IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc : FSharpFunc { + private IdentifierCharFlags[] AsciiCharOptions; + public IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } + + public override bool Invoke(char ch) { + if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.Continue) != 0; + return IsXIdContinueOrJoinControlOrSurrogate(ch); + } + } + + private FSharpFunc isIdStartOrSurrogateFunc; + public FSharpFunc IsIdStartOrSurrogateFunc { get { + return isIdStartOrSurrogateFunc + ?? (isIdStartOrSurrogateFunc = new IsIdStartCharOrSurrogateFSharpFunc(AsciiCharOptions)); + } } + + private FSharpFunc isIdContinueOrSurrogateFunc; + public FSharpFunc IsIdContinueOrSurrogateFunc { get { + return isIdContinueOrSurrogateFunc + ?? (isIdContinueOrSurrogateFunc = new IsIdContinueCharOrSurrogateFSharpFunc(AsciiCharOptions)); + } } + + private FSharpFunc isIdContinueOrJoinControlOrSurrogateFunc; + public FSharpFunc IsIdContinueOrJoinControlOrSurrogateFunc { get { + return isIdContinueOrJoinControlOrSurrogateFunc + ?? (isIdContinueOrJoinControlOrSurrogateFunc = new IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc(AsciiCharOptions)); + } } + + // The XID_START/XID_CONTINUE property data is stored in two multiple-stage lookup tables: + // the BMP codepoints (0 - 0xFFFF) are stored in a two-stage table and the SMP codepoints (0x10000 - 0x10FFFF) + // are stored in a three-stage table. + // + // Each two-stage table consists of an integer index arrays and one bit array. + // Each three-stage table consists of two integer index arrays and one bit array. + // + // The first stage array is divided into multiple parts: one for XID_START, one for XID_CONTINUE + // and -- only for the BMP table -- one in which in addition to the XID_CONTINUE chars the two + // JOIN_CONTROL chars "zero-width non-joiner" (ZWNJ, '\u200C') and "zero-width joiner" + // (ZWJ, '\u200D') are marked. + // All codepoints in the BMP reserved for surrogates are marked as XID_START and XID_CONTINUE. + // + // The bits in the last stage array are stored in 32-bit words, where each 32-bit word + // is stored in the platform byte order. + // + // To determine whether a codepoint has a property in a three-stage table, + // three indices are computed: + // idx1 = the (log_2 table1Length) most significant bits of the codepoint + // idx2 = table1(START|CONTINUE|CONTINUE_OR_JOIN_CONTROL)[idx]*table2BlockLength + // + the following (log_2 table2BlockLength) bits of the codepoint + // idx3 = table2[idx2]*table3BlockLength + the least significant (log_2 table3BlockLength) bits of the codepoint + // If the bit in table3 at the bit index idx3 is set, the codepoint has the property, otherwise not. + + public static bool IsXIdStartOrSurrogate(char bmpCodePoint) { // should get inlined + return (IsXIdStartOrSurrogate_(bmpCodePoint) & 1u) != 0; + } + private static uint IsXIdStartOrSurrogate_(char bmpCodePoint) { + uint cp = (uint)bmpCodePoint; + uint idx1 = cp >> XIdBmpTable2Log2BitBlockLength; + const uint f2 = 1u << (XIdBmpTable2Log2BitBlockLength - 5); + const uint m2 = f2 - 1; + uint idx2 = XIdStartBmpTable1[idx1]*f2 + ((cp >> 5) & m2); + return XIdBmpTable2[idx2] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } + + public static bool IsXIdContinueOrSurrogate(char bmpCodePoint) { // should get inlined + return (IsXIdContinueOrSurrogate_(bmpCodePoint) & 1u) != 0u; + } + private static uint IsXIdContinueOrSurrogate_(char bmpCodePoint) { + uint cp = (uint)bmpCodePoint; + uint idx1 = cp >> XIdBmpTable2Log2BitBlockLength; + const uint f2 = 1u << (XIdBmpTable2Log2BitBlockLength - 5); + const uint m2 = f2 - 1; + uint idx2 = XIdContinueBmpTable1[idx1]*f2 + ((cp >> 5) & m2); + return XIdBmpTable2[idx2] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } + + public static bool IsXIdContinueOrJoinControlOrSurrogate(char bmpCodePoint) { // should get inlined + return (IsXIdContinueOrJoinControlOrSurrogate_(bmpCodePoint) & 1u) != 0u; + } + private static uint IsXIdContinueOrJoinControlOrSurrogate_(char bmpCodePoint) { + uint cp = (uint)bmpCodePoint; + uint idx1 = cp >> XIdBmpTable2Log2BitBlockLength; + const uint f2 = 1u << (XIdBmpTable2Log2BitBlockLength - 5); + const uint m2 = f2 - 1; + uint idx2 = XIdContinueOrJoinerBmpTable1[idx1]*f2 + ((cp >> 5) & m2); + return XIdBmpTable2[idx2] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } + + public static bool IsXIdStartSmp(int smpCodePointMinus0x10000) { // should get inlined + return (IsXIdStartSmp_(smpCodePointMinus0x10000) & 1u) != 0; + } + private static uint IsXIdStartSmp_(int smpCodePointMinus0x10000) { + uint cp = unchecked((uint)smpCodePointMinus0x10000); + uint idx1 = cp >> (XIdSmpTable2Log2BlockLength + XIdSmpTable3Log2BlockLength); + const uint f2 = 1u << XIdSmpTable2Log2BlockLength, + f3 = 1u << (XIdSmpTable3Log2BlockLength - 5); + const uint m2 = f2 - 1, m3 = f3 - 1; + #if !LOW_TRUST + if ((idx1 & (0xffffffffu << XIdSmpTable1Log2Length)) != 0) throw new IndexOutOfRangeException(); + #endif + uint idx2 = XIdStartSmpTable1[idx1]*f2 + ((cp >> XIdSmpTable3Log2BlockLength) & m2); + uint idx3 = XIdSmpTable2[idx2]*f3 + ((cp >> 5) & m3); + return XIdSmpTable3[idx3] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } + + public static bool IsXIdContinueSmp(int smpCodePointMinus0x10000) { // should get inlined + return (IsXIdContinueSmp_(smpCodePointMinus0x10000) & 1u) != 0; + } + private static uint IsXIdContinueSmp_(int smpCodePointMinus0x10000) { + uint cp = unchecked((uint)smpCodePointMinus0x10000); + uint idx1 = cp >> (XIdSmpTable2Log2BlockLength + XIdSmpTable3Log2BlockLength); + const uint f2 = 1u << XIdSmpTable2Log2BlockLength, + f3 = 1u << (XIdSmpTable3Log2BlockLength - 5); + const uint m2 = f2 - 1, m3 = f3 - 1; + #if !LOW_TRUST + if ((idx1 & (0xffffffffu << XIdSmpTable1Log2Length)) != 0) throw new IndexOutOfRangeException(); + #endif + uint idx2 = XIdContinueSmpTable1[idx1]*f2 + ((cp >> XIdSmpTable3Log2BlockLength) & m2); + uint idx3 = XIdSmpTable2[idx2]*f3 + ((cp >> 5) & m3); + return XIdSmpTable3[idx3] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } + + // tables for Unicode 8.0.0 + + private const int XIdStartBmpTable1Offset = 0; + private const int XIdContinueBmpTable1Offset = 256; + private const int XIdContinueOrJoinerBmpTable1Offset = 512; + private const int XIdBmpTable1Size = 256; + private const int XIdBmpTable1Log2Length = 8; + private const int XIdBmpTable2Offset = 768; + private const int XIdBmpTable2Size = 2816; + private const int XIdBmpTable2Log2BitBlockLength = 8; + + private const int XIdStartSmpTable1Offset = 3584; + private const int XIdContinueSmpTable1Offset = 3840; + private const int XIdSmpTable1Size = 256; + private const int XIdSmpTable1Log2Length = 8; + private const int XIdSmpTable2Offset = 4096; + private const int XIdSmpTable2Size = 704; + private const int XIdSmpTable2Log2BlockLength = 5; + private const int XIdSmpTable3Offset = 4800; + private const int XIdSmpTable3Size = 1504; + private const int XIdSmpTable3Log2BlockLength = 7; + + private static readonly byte[] DataArray = new byte[] { + 0,2,3,4,6,8,10,12,14,16,18,20,22,24,26,28,30,2,32,33, + 35,2,36,37,39,41,43,45,47,49,2,51,52,55,56,56,56,56,56,56, + 56,56,56,56,57,59,56,56,61,63,56,56,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,64,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,65, + 2,2,2,2,66,2,67,69,70,72,74,76,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,78,2,2,2,2, + 2,2,2,2,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, + 56,56,56,56,56,56,56,56,56,2,79,80,82,83,84,86,1,2,3,5, + 7,9,11,13,15,17,19,21,23,25,27,29,31,2,32,34,35,2,36,38, + 40,42,44,46,48,50,2,51,53,55,56,56,56,56,56,56,56,56,56,56, + 58,60,56,56,62,63,56,56,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,64,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,65,2,2,2,2, + 66,2,68,69,71,73,75,77,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,78,2,2,2,2,2,2,2,2, + 56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, + 56,56,56,56,56,2,79,81,82,83,85,87,1,2,3,5,7,9,11,13, + 15,17,19,21,23,25,27,29,31,2,32,34,35,2,36,38,40,42,44,46, + 48,50,2,51,54,55,56,56,56,56,56,56,56,56,56,56,58,60,56,56, + 62,63,56,56,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,64,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,65,2,2,2,2,66,2,68,69, + 71,73,75,77,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,78,2,2,2,2,2,2,2,2,56,56,56,56, + 56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, + 56,2,79,81,82,83,85,87,0,0,0,0,0,0,0,0,254,255,255,7, + 254,255,255,7,0,0,0,0,0,4,32,4,255,255,127,255,255,255,127,255, + 0,0,0,0,0,0,255,3,254,255,255,135,254,255,255,7,0,0,0,0, + 0,4,160,4,255,255,127,255,255,255,127,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,195,255,3,0,31,80,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,223,184,64,215,255,255,251,255,255,255, + 255,255,255,255,255,255,191,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,223,184,192,215,255,255,251,255,255,255,255,255,255,255,255,255,191,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,252,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,251,252,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,254,255,255,255,127,2,254,255,255,255, + 255,0,0,0,0,0,0,0,0,0,255,255,255,7,7,0,255,255,255,255, + 255,255,254,255,255,255,127,2,254,255,255,255,255,0,254,255,255,255,255,191, + 182,0,255,255,255,7,7,0,0,0,0,0,255,255,255,255,255,7,0,0, + 0,192,254,255,255,255,255,255,255,255,255,255,255,255,47,0,96,192,0,156, + 0,0,255,7,255,255,255,255,255,255,255,255,255,195,255,255,255,255,255,255, + 255,255,255,255,255,255,239,159,255,253,255,159,0,0,253,255,255,255,0,0, + 0,224,255,255,255,255,255,255,255,255,255,255,63,0,2,0,0,252,255,255, + 255,7,48,4,0,0,255,255,255,255,255,255,255,231,255,255,255,255,255,255, + 255,255,255,255,255,255,3,0,255,255,255,255,255,255,63,4,255,255,63,4, + 16,1,0,0,255,255,255,1,0,0,0,0,0,0,0,0,255,255,31,0, + 0,0,0,0,0,0,0,0,255,255,255,255,255,63,0,0,255,255,255,15, + 0,0,0,0,0,0,0,0,255,255,31,0,0,0,0,0,248,255,255,255, + 240,255,255,255,255,255,255,35,0,0,1,255,3,0,254,255,225,159,249,255, + 255,253,197,35,0,64,0,176,3,0,3,0,255,255,255,255,255,255,255,255, + 255,255,255,255,207,255,254,255,239,159,249,255,255,253,197,243,159,121,128,176, + 207,255,3,0,224,135,249,255,255,253,109,3,0,0,0,94,0,0,28,0, + 224,191,251,255,255,253,237,35,0,0,1,0,3,0,0,2,238,135,249,255, + 255,253,109,211,135,57,2,94,192,255,63,0,238,191,251,255,255,253,237,243, + 191,59,1,0,207,255,0,2,224,159,249,255,255,253,237,35,0,0,0,176, + 3,0,2,0,232,199,61,214,24,199,255,3,0,0,1,0,0,0,0,0, + 238,159,249,255,255,253,237,243,159,57,192,176,207,255,2,0,236,199,61,214, + 24,199,255,195,199,61,129,0,192,255,0,0,224,223,253,255,255,253,255,35, + 0,0,0,7,3,0,0,0,224,223,253,255,255,253,239,35,0,0,0,64, + 3,0,6,0,239,223,253,255,255,253,255,227,223,61,96,7,207,255,0,0, + 238,223,253,255,255,253,239,243,223,61,96,64,207,255,6,0,224,223,253,255, + 255,255,255,39,0,64,0,128,3,0,0,252,224,255,127,252,255,255,251,47, + 127,0,0,0,0,0,0,0,238,223,253,255,255,255,255,231,223,125,128,128, + 207,255,0,252,236,255,127,252,255,255,251,47,127,132,95,255,192,255,12,0, + 254,255,255,255,255,255,5,0,127,0,0,0,0,0,0,0,150,37,240,254, + 174,236,5,32,95,0,0,240,0,0,0,0,254,255,255,255,255,255,255,7, + 255,127,255,3,0,0,0,0,150,37,240,254,174,236,255,59,95,63,255,243, + 0,0,0,0,1,0,0,0,0,0,0,0,255,254,255,255,255,31,0,0, + 0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3, + 255,3,160,194,255,254,255,255,255,31,254,255,223,255,255,254,255,255,255,31, + 64,0,0,0,0,0,0,0,255,255,255,255,255,7,0,128,0,0,63,60, + 98,192,225,255,3,64,0,0,255,255,255,255,191,32,255,255,255,255,255,247, + 255,255,255,255,255,255,255,255,255,3,255,255,255,255,255,255,255,255,255,63, + 255,255,255,255,191,32,255,255,255,255,255,247,255,255,255,255,255,255,255,255, + 255,61,127,61,255,255,255,255,255,61,255,255,255,255,61,127,61,255,127,255, + 255,255,255,255,255,255,61,255,255,255,255,255,255,255,255,7,0,0,0,0, + 255,255,0,0,255,255,255,255,255,255,255,255,255,255,63,63,255,255,61,255, + 255,255,255,255,255,255,255,231,0,254,3,0,255,255,0,0,255,255,255,255, + 255,255,255,255,255,255,63,63,254,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,159,255,255,254,255,255,7, + 255,255,255,255,255,255,255,255,255,199,255,1,255,223,3,0,255,255,3,0, + 255,255,3,0,255,223,1,0,255,255,255,255,255,255,15,0,0,0,128,16, + 0,0,0,0,255,223,31,0,255,255,31,0,255,255,15,0,255,223,13,0, + 255,255,255,255,255,255,255,255,255,255,143,48,255,3,0,0,0,0,0,0, + 255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,5,255,255, + 255,255,255,255,255,255,63,0,0,56,255,3,255,255,255,255,255,255,255,255, + 255,255,255,0,255,255,255,255,255,7,255,255,255,255,255,255,255,255,63,0, + 255,255,255,127,0,0,0,0,0,0,255,255,255,63,31,0,255,255,255,255, + 255,15,255,255,255,3,0,0,0,0,0,0,255,255,255,127,255,15,255,15, + 192,255,255,255,255,63,31,0,255,255,255,255,255,15,255,255,255,3,255,7, + 0,0,0,0,255,255,127,0,255,255,255,255,255,255,31,0,0,0,0,0, + 0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,255,255,255,15, + 255,255,255,255,255,255,255,127,255,255,255,159,255,3,255,3,128,0,255,63, + 0,0,0,0,0,0,0,0,224,255,255,255,255,255,15,0,224,15,0,0, + 0,0,0,0,248,255,255,255,1,192,0,252,255,255,255,255,63,0,0,0, + 255,255,255,255,255,255,255,255,255,15,255,3,0,248,15,0,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,15,0,255,255,255,255,15,0,0,0, + 0,224,0,252,255,255,255,63,0,0,0,0,0,0,0,0,0,0,0,0, + 0,222,99,0,255,255,255,255,255,255,255,0,255,227,255,255,255,255,255,63, + 0,0,0,0,0,0,0,0,0,0,247,255,255,255,127,3,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,240, + 255,255,63,63,255,255,255,255,63,63,255,170,255,255,255,63,255,255,255,255, + 255,255,223,95,220,31,207,15,255,31,220,31,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,2,128,0,0,255,31,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,128,1,0,16,0,0,0,2,128, + 0,0,255,31,0,0,0,0,0,0,255,31,226,255,1,0,0,48,0,0, + 0,0,0,128,1,0,16,0,0,0,2,128,0,0,255,31,0,0,0,0, + 0,0,255,31,226,255,1,0,132,252,47,63,80,253,255,243,224,67,0,0, + 255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,127,255,255, + 255,255,255,127,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 31,120,12,0,255,255,255,255,255,127,255,255,255,255,255,127,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,31,248,15,0,255,255,255,255, + 191,32,255,255,255,255,255,255,255,128,0,0,255,255,127,0,127,127,127,127, + 127,127,127,127,0,0,0,0,255,255,255,255,191,32,255,255,255,255,255,255, + 255,128,0,128,255,255,127,0,127,127,127,127,127,127,127,127,255,255,255,255, + 224,0,0,0,254,3,62,31,254,255,255,255,255,255,255,255,255,255,127,224, + 254,255,255,255,255,255,255,255,255,255,255,247,224,0,0,0,254,255,62,31, + 254,255,255,255,255,255,255,255,255,255,127,230,254,255,255,255,255,255,255,255, + 255,255,255,247,224,255,255,255,255,63,254,255,255,255,255,255,255,255,255,255, + 255,127,0,0,255,255,255,7,0,0,0,0,0,0,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0, + 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,0,0, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,0, + 0,0,0,0,0,0,255,255,255,255,255,63,255,31,255,255,0,12,0,0, + 255,255,255,255,255,127,0,128,255,255,255,63,255,255,255,255,255,255,255,255, + 255,255,0,0,255,31,255,255,255,15,0,0,255,255,255,255,255,255,240,191, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,128,255, + 252,255,255,255,255,255,255,255,255,255,255,255,255,249,255,255,255,63,255,0, + 0,0,0,0,0,0,128,255,187,247,255,255,7,0,0,0,255,255,255,255, + 255,255,15,0,252,255,255,255,255,255,15,0,0,0,0,0,0,0,252,40, + 255,255,255,255,255,0,0,0,255,255,255,255,255,255,15,0,255,255,255,255, + 255,255,255,255,31,0,255,3,255,255,255,40,0,252,255,255,63,0,255,255, + 127,0,0,0,255,255,255,31,240,255,255,255,255,255,7,0,0,128,0,0, + 223,255,0,124,255,255,255,255,255,63,255,255,255,255,15,0,255,255,255,31, + 255,255,255,255,255,255,255,255,1,128,255,3,255,255,255,127,255,255,255,255, + 255,1,0,0,247,15,0,0,255,255,127,196,255,255,255,255,255,255,98,62, + 5,0,0,56,255,7,28,0,255,255,255,255,255,255,127,0,255,63,255,3, + 255,255,127,252,255,255,255,255,255,255,255,255,7,0,0,56,255,255,124,0, + 126,126,126,0,127,127,255,255,255,255,255,247,63,0,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,7,0,0,0,126,126,126,0,127,127,255,255, + 255,255,255,247,63,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,55,255,3,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,15,0,255,255,127,248,255,255,255,255,255,15,255,255,255,255, + 255,255,255,255,255,255,255,255,255,63,255,255,255,255,255,255,255,255,255,255, + 255,255,255,3,0,0,0,0,127,0,248,160,255,253,127,95,219,255,255,255, + 255,255,255,255,255,255,255,255,255,255,3,0,0,0,248,255,255,255,255,255, + 127,0,248,224,255,253,127,95,219,255,255,255,255,255,255,255,255,255,255,255, + 255,255,3,0,0,0,248,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,63,240,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,63,0,0,255,255,255,255,255,255, + 255,255,252,255,255,255,255,255,255,0,0,0,0,0,255,3,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,138,170,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,31,255,255,0,0,255,255,24,0,0,224,0,0, + 0,0,138,170,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31, + 0,0,0,0,254,255,255,7,254,255,255,7,192,255,255,255,255,255,255,63, + 255,255,255,127,252,252,252,28,0,0,0,0,0,0,255,3,254,255,255,135, + 254,255,255,7,192,255,255,255,255,255,255,255,255,255,255,127,252,252,252,28, + 0,0,0,0, + 0,2,4,5,6,7,8,7,7,7,7,10,7,12,14,7,16,16,16,16, + 16,16,16,16,16,16,17,18,19,7,7,20,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,1,3,4,5, + 6,7,9,7,7,7,7,11,7,13,15,7,16,16,16,16,16,16,16,16, + 16,16,17,18,19,7,7,20,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,21,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,0,1,2,3,3,5,7,9, + 10,11,13,3,10,10,14,3,15,16,17,18,19,21,23,24,25,26,3,3, + 3,3,3,3,0,1,2,4,3,6,8,9,10,12,13,3,10,10,14,3, + 15,16,17,18,20,22,23,24,25,26,3,3,3,3,3,3,27,29,31,33, + 35,37,39,3,3,41,3,43,45,47,49,3,3,51,3,3,3,53,3,3, + 3,3,3,3,3,3,3,3,28,30,32,34,36,38,40,3,3,42,3,44, + 46,48,50,3,3,52,3,3,3,53,3,3,3,3,3,3,3,3,3,3, + 10,10,10,10,10,10,10,49,54,10,55,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10,10,10,10,10, + 56,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10,57,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,10,10,10,10,58,60,62,64,3,3,3,3,3,3,65,67, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10, + 59,61,63,64,3,3,3,3,3,3,66,68,69,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,70,71,3,3, + 3,3,3,3,69,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,70,72,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,76,77,78,10,10,79,80,81,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,73,74,75,3,3,3,76,77,78,10, + 10,79,80,82,3,3,3,3,83,84,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,10,85,3,3, + 3,3,3,3,3,3,3,3,87,88,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,10,86,3,3,3,3,3,3,3,3,3,3, + 87,88,3,3,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,89,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,90,10,91,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,92,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,10,10,10,10,11,3,3,3,3,3,3,3, + 3,3,3,3,3,3,10,93,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,255,239,255,255, + 127,255,255,183,255,63,255,63,0,0,0,0,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,7,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,255,255,255,31, + 255,255,255,255,255,255,1,0,0,0,0,0,255,255,255,31,255,255,255,255, + 255,255,1,0,1,0,0,0,255,255,255,255,0,0,255,255,255,7,255,255, + 255,255,63,0,255,255,255,255,0,0,255,255,255,7,255,255,255,255,255,7, + 255,255,255,63,255,255,255,255,15,255,62,0,0,0,0,0,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,0, + 0,0,0,0,0,0,0,0,255,255,255,63,255,3,0,0,0,0,0,0, + 0,0,0,0,255,255,255,255,255,0,255,255,255,255,255,255,15,0,0,0, + 255,255,255,255,255,255,127,0,255,255,63,0,255,0,0,0,63,253,255,255, + 255,255,191,145,255,255,63,0,255,255,127,0,255,255,255,127,0,0,0,0, + 0,0,0,0,255,255,55,0,255,255,63,0,255,255,255,3,0,0,0,0, + 0,0,0,0,255,255,255,255,255,255,255,192,0,0,0,0,0,0,0,0, + 1,0,239,254,255,255,15,0,0,0,0,0,255,255,255,31,111,240,239,254, + 255,255,15,135,0,0,0,0,255,255,255,31,255,255,255,31,0,0,0,0, + 255,254,255,255,31,0,0,0,255,255,255,31,0,0,0,0,255,254,255,255, + 127,0,0,0,255,255,255,255,255,255,63,0,255,255,63,0,255,255,7,0, + 255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,255,1,0,0,0,0,0,0,255,255,255,255,255,255,7,0, + 255,255,255,255,255,255,7,0,248,255,255,255,255,255,255,0,0,0,0,0, + 0,0,0,0,255,255,255,255,255,255,255,255,127,0,0,0,192,255,0,128, + 248,255,255,255,255,255,0,0,0,0,255,255,255,1,0,0,255,255,255,255, + 255,255,255,7,0,0,255,255,255,1,255,3,248,255,255,255,127,0,0,0, + 0,0,255,255,255,255,71,0,255,255,255,255,255,255,223,255,0,0,255,255, + 255,255,79,0,248,255,255,255,255,255,7,0,30,0,0,20,0,0,0,0, + 255,255,255,255,255,255,255,255,31,28,255,23,0,0,0,0,255,255,251,255, + 255,15,0,0,0,0,0,0,0,0,0,0,255,255,251,255,255,255,255,0, + 0,0,0,0,0,0,0,0,127,189,255,191,255,1,255,255,255,255,255,127, + 0,0,0,0,127,189,255,191,255,1,255,255,255,255,255,255,255,7,255,3, + 224,159,249,255,255,253,237,35,0,0,1,224,3,0,0,0,239,159,249,255, + 255,253,237,243,159,57,129,224,207,31,31,0,255,255,255,255,255,255,0,0, + 176,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,191,0,255,3, + 0,0,0,0,255,255,255,255,255,127,0,0,0,0,0,15,0,0,0,0, + 255,255,255,255,255,255,63,255,1,0,0,63,0,0,0,0,255,255,255,255, + 255,255,0,0,16,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, + 17,0,255,3,0,0,0,0,255,255,255,255,255,7,0,0,0,0,0,0, + 0,0,0,0,255,255,255,255,255,255,255,0,255,3,0,0,0,0,0,0, + 255,255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,227, + 255,15,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,0,0,0,128,0,0,0,0,255,255,255,255,255,255,255,255, + 255,3,0,128,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,1, + 255,255,255,255,255,255,255,255,255,255,255,255,255,127,0,0,255,255,255,255, + 255,255,255,255,15,0,0,0,0,0,0,0,255,255,255,255,255,127,0,0, + 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,127,0,0,0, + 0,0,0,0,255,255,255,255,255,255,255,1,255,255,255,127,0,0,0,0, + 255,255,255,255,255,255,255,1,255,255,255,127,255,3,0,0,0,0,0,0, + 0,0,0,0,0,0,255,255,255,63,0,0,0,0,0,0,0,0,0,0, + 0,0,255,255,255,63,31,0,255,255,255,255,255,255,0,0,15,0,0,0, + 248,255,255,224,255,255,255,255,255,255,127,0,15,0,255,3,248,255,255,224, + 255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,31,0,1,0,0,0,0,0,255,255,255,255,255,255,255,255, + 31,0,255,255,255,255,255,127,0,0,248,255,0,0,0,0,0,0,0,0, + 0,0,0,0,0,128,255,255,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,255,255,255,255,255,7,255,31,255,1,255,3,0,0,0,0, + 0,0,0,0,0,0,0,0,255,1,255,99,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,227,7,248, + 231,15,0,0,0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,28,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, + 255,255,223,255,255,255,255,255,255,255,255,223,100,222,255,235,239,255,255,255, + 255,255,255,255,191,231,223,223,255,255,255,123,95,252,253,255,255,255,255,255, + 255,255,255,255,63,255,255,255,253,255,255,247,255,255,255,247,255,255,223,255, + 255,255,223,255,255,127,255,255,255,127,255,255,255,253,255,255,255,253,255,255, + 247,15,0,0,0,0,0,0,255,253,255,255,255,253,255,255,247,207,255,255, + 255,255,255,255,255,255,255,255,255,255,127,248,255,255,255,255,255,31,32,0, + 16,0,0,248,254,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255, + 255,255,255,255,31,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, + 31,0,127,0,0,0,0,0,239,255,255,255,150,254,247,10,132,234,150,170, + 150,247,247,94,255,251,255,15,238,251,255,15,0,0,0,0,0,0,0,0, + 255,255,255,255,255,255,255,255,255,255,127,0,0,0,0,0,255,255,255,255, + 255,255,31,0,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,0,0,0,0, + 0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0, + + }; + +#if LOW_TRUST + private static readonly byte[] XIdStartBmpTable1 = Buffer.CopySubarray(DataArray, XIdStartBmpTable1Offset, XIdBmpTable1Size); + private static readonly byte[] XIdContinueBmpTable1 = Buffer.CopySubarray(DataArray, XIdContinueBmpTable1Offset, XIdBmpTable1Size); + private static readonly byte[] XIdContinueOrJoinerBmpTable1 = Buffer.CopySubarray(DataArray, XIdContinueOrJoinerBmpTable1Offset, XIdBmpTable1Size); + private static readonly uint[] XIdBmpTable2 = Buffer.CopyUIntsStoredInLittleEndianByteArray(DataArray, XIdBmpTable2Offset, XIdBmpTable2Size); + + private static readonly byte[] XIdStartSmpTable1 = Buffer.CopySubarray(DataArray, XIdStartSmpTable1Offset, XIdSmpTable1Size); + private static readonly byte[] XIdContinueSmpTable1 = Buffer.CopySubarray(DataArray, XIdContinueSmpTable1Offset, XIdSmpTable1Size); + private static readonly byte[] XIdSmpTable2 = Buffer.CopySubarray(DataArray, XIdSmpTable2Offset, XIdSmpTable2Size); + private static readonly uint[] XIdSmpTable3 = Buffer.CopyUIntsStoredInLittleEndianByteArray(DataArray, XIdSmpTable3Offset, XIdSmpTable3Size); +#else + private static byte* Data = LoadDataArrayIntoFixedBuffer(); + + private static readonly byte* XIdStartBmpTable1 = Data + XIdStartBmpTable1Offset; + private static readonly byte* XIdContinueBmpTable1 = Data + XIdContinueBmpTable1Offset; + private static readonly byte* XIdContinueOrJoinerBmpTable1 = Data + XIdContinueOrJoinerBmpTable1Offset; + private static readonly uint* XIdBmpTable2 = (uint*)(Data + XIdBmpTable2Offset); + + private static readonly byte* XIdStartSmpTable1 = Data + XIdStartSmpTable1Offset; + private static readonly byte* XIdContinueSmpTable1 = Data + XIdContinueSmpTable1Offset; + private static readonly byte* XIdSmpTable2 = Data + XIdSmpTable2Offset; + private static readonly uint* XIdSmpTable3 = (uint*)(Data + XIdSmpTable3Offset); + + private static byte* LoadDataArrayIntoFixedBuffer() { + IntPtr buffer = UnmanagedMemoryPool.Allocate(DataArray.Length); + Marshal.Copy(DataArray, 0, buffer, DataArray.Length); + Debug.Assert(XIdBmpTable2Size%sizeof(uint) == 0); + Debug.Assert(XIdSmpTable3Size%sizeof(uint) == 0); + if (!System.BitConverter.IsLittleEndian) { + Buffer.SwapByteOrder((uint*)((byte*)buffer + XIdBmpTable2Offset), XIdBmpTable2Size/sizeof(uint)); + Buffer.SwapByteOrder((uint*)((byte*)buffer + XIdSmpTable3Offset), XIdSmpTable3Size/sizeof(uint)); + } + return (byte*)buffer; + } +#endif + +} +} \ No newline at end of file diff --git a/src/FParsecCS/ManyChars.cs b/src/FParsecCS/ManyChars.cs new file mode 100644 index 0000000..e4e1c50 --- /dev/null +++ b/src/FParsecCS/ManyChars.cs @@ -0,0 +1,255 @@ +// Copyright (c) Stephan Tolksdorf 2008-2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Text; +using Microsoft.FSharp.Core; + +namespace FParsec { + +#if !LOW_TRUST +internal unsafe struct _16CharBuffer { + public UInt64 UInt64_0; + public UInt64 UInt64_1; + public UInt64 UInt64_2; + public UInt64 UInt64_3; +} +#endif + +internal class Many1Chars : FSharpFunc, Reply> { + protected FSharpFunc, Reply> CharParser1; + protected FSharpFunc, Reply> CharParser; + + public Many1Chars(FSharpFunc, Reply> charParser1, + FSharpFunc, Reply> charParser) + { + CharParser1 = charParser1; + CharParser = charParser; + } + + public override Reply Invoke(CharStream stream) { + var reply = CharParser1.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) + return ParseRestOfString(stream, reply.Result, reply.Error); + else + return new Reply{Status = reply.Status, Error = reply.Error}; + } + +#if !LOW_TRUST + unsafe +#endif + protected Reply ParseRestOfString(CharStream stream, char firstChar, ErrorMessageList error) { + #if LOW_TRUST + var sb = new StringBuilder(16); + sb.Append(firstChar); + #else + _16CharBuffer buffer_; // produces more efficient code on .NET than stackalloc char[16] + char* buffer = (char*)(&buffer_); + buffer[0] = firstChar; + char[] chars = null; + uint n = 1; + #endif + for (;;) { + var tag = stream.StateTag; + var reply = CharParser.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) { + if (tag == stream.StateTag) + throw Internal.ParserCombinatorInInfiniteLoopHelper.CreateException("manyChars", stream); + error = reply.Error; + #if LOW_TRUST + sb.Append(reply.Result); + #else + var i = n%16; + if (i != 0) { + buffer[i] = reply.Result; + ++n; + } else { + if (chars == null) chars = new char[32]; + else if (n == chars.Length) { + var newChars = new char[2*chars.Length]; + Array.Copy(chars, newChars, chars.Length); + chars = newChars; + } + for (i = 0; i < 16; ++i) + chars[n - 16 + i] = buffer[i]; + buffer[0] = reply.Result; + ++n; + } + #endif + } else if (reply.Status == ReplyStatus.Error && tag == stream.StateTag) { + string str; + #if LOW_TRUST + str = sb.ToString(); + #else + if (n <= 16) str = new String(buffer, 0, (int)n); + else { + for (uint i = (n - 1) & 0x7ffffff0u; i < n; ++i) + chars[i] = buffer[i%16]; + str = new string(chars, 0, (int)n); + } + #endif + error = ErrorMessageList.Merge(error, reply.Error); + return new Reply{Status = ReplyStatus.Ok, Result = str, Error = error}; + } else { + error = tag == stream.StateTag ? ErrorMessageList.Merge(error, reply.Error) : reply.Error; + return new Reply{Status = reply.Status, Error = error}; + } + } + } + + public FSharpFunc, Reply> AsFSharpFunc { get { return this; } } +} + + +internal class ManyChars : Many1Chars { + public ManyChars(FSharpFunc, Reply> charParser1, + FSharpFunc, Reply> charParser) + : base(charParser1, charParser) { } + + public override Reply Invoke(CharStream stream) { + var tag = stream.StateTag; + var reply = CharParser1.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) + return ParseRestOfString(stream, reply.Result, reply.Error); + else if (reply.Status == ReplyStatus.Error && tag == stream.StateTag) + return new Reply{Status = ReplyStatus.Ok, Result = "", Error = reply.Error}; + else + return new Reply{Status = reply.Status, Error = reply.Error}; + } +} + +internal class Many1CharsTill : FSharpFunc, Reply> { + protected FSharpFunc, Reply> CharParser1; + protected FSharpFunc, Reply> CharParser; + protected FSharpFunc, Reply> EndParser; + protected OptimizedClosures.FSharpFunc Mapping; + + public Many1CharsTill(FSharpFunc, Reply> charParser1, + FSharpFunc, Reply> charParser, + FSharpFunc, Reply> endParser, + FSharpFunc> mapping) + { + CharParser1 = charParser1; + CharParser = charParser; + EndParser = endParser; + Mapping = (OptimizedClosures.FSharpFunc)(object)OptimizedClosures.FSharpFunc.Adapt(mapping); + } + + public override Reply Invoke(CharStream stream) { + var reply = CharParser1.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) + return ParseRestOfString(stream, reply.Result, reply.Error); + else + return new Reply{Status = reply.Status, Error = reply.Error}; + } + +#if !LOW_TRUST + unsafe +#endif + protected Reply ParseRestOfString(CharStream stream, char firstChar, ErrorMessageList error) { + #if LOW_TRUST + var sb = new StringBuilder(16); + sb.Append(firstChar); + #else + _16CharBuffer buffer_; // produces more efficient code than stackalloc char[16] + char* buffer = (char*)(&buffer_); + buffer[0] = firstChar; + char[] chars = null; + uint n = 1; + #endif + for (;;) { + var tag = stream.StateTag; + var eReply = EndParser.Invoke(stream); + if (eReply.Status == ReplyStatus.Error && tag == stream.StateTag) { + var reply = CharParser.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) { + if (tag == stream.StateTag) + throw Internal.ParserCombinatorInInfiniteLoopHelper.CreateException("manyCharsTill", stream); + error = reply.Error; + #if LOW_TRUST + sb.Append(reply.Result); + #else + var i = n%16; + if (i != 0) { + buffer[i] = reply.Result; + ++n; + } else { + if (chars == null) chars = new char[32]; + else if (n == chars.Length) { + var newChars = new char[2*chars.Length]; + Array.Copy(chars, newChars, chars.Length); + chars = newChars; + } + for (i = 0; i < 16; ++i) + chars[n - 16 + i] = buffer[i]; + buffer[0] = reply.Result; + ++n; + } + #endif + } else { + error = tag == stream.StateTag + ? ErrorMessageList.Merge(ErrorMessageList.Merge(error, eReply.Error), reply.Error) + : reply.Error; + return new Reply{Status = reply.Status, Error = error}; + } + } else if (eReply.Status == ReplyStatus.Ok) { + string str; + #if LOW_TRUST + str = sb.ToString(); + #else + if (n <= 16) str = new String(buffer, 0, (int)n); + else { + for (uint i = (n - 1) & 0x7ffffff0; i < n; ++i) + chars[i] = buffer[i%16]; + str = new string(chars, 0, (int)n); + } + #endif + var result = Mapping.Invoke(str, eReply.Result); + error = tag == stream.StateTag + ? ErrorMessageList.Merge(error, eReply.Error) + : eReply.Error; + return new Reply{Status = ReplyStatus.Ok, Result = result, Error = error}; + } else { + error = tag == stream.StateTag + ? ErrorMessageList.Merge(error, eReply.Error) + : eReply.Error; + return new Reply{Status = eReply.Status, Error = error}; + } + } + } + + public FSharpFunc, Reply> AsFSharpFunc { get { return this; } } +} + +internal class ManyCharsTill : Many1CharsTill { + public ManyCharsTill(FSharpFunc, Reply> charParser1, + FSharpFunc, Reply> charParser, + FSharpFunc, Reply> endParser, + FSharpFunc> mapping) + : base(charParser1, charParser, endParser, mapping) { } + + public override Reply Invoke(CharStream stream) { + var tag = stream.StateTag; + var eReply = EndParser.Invoke(stream); + if (eReply.Status == ReplyStatus.Error && tag == stream.StateTag) { + var reply = CharParser1.Invoke(stream); + if (reply.Status == ReplyStatus.Ok) { + return ParseRestOfString(stream, reply.Result, reply.Error); + } else { + var error = tag == stream.StateTag + ? ErrorMessageList.Merge(eReply.Error, reply.Error) + : reply.Error; + return new Reply{Status = reply.Status, Error = error}; + } + } else if (eReply.Status == ReplyStatus.Ok) { + var result = Mapping.Invoke("", eReply.Result); + return new Reply{Status = ReplyStatus.Ok, Result = result, Error = eReply.Error}; + } else { + return new Reply{Status = eReply.Status, Error = eReply.Error}; + } + } +} + + + +} \ No newline at end of file diff --git a/src/FParsecCS/OperatorPrecedenceParser.cs b/src/FParsecCS/OperatorPrecedenceParser.cs new file mode 100644 index 0000000..823b769 --- /dev/null +++ b/src/FParsecCS/OperatorPrecedenceParser.cs @@ -0,0 +1,771 @@ +// Copyright (c) Stephan Tolksdorf 2008-2011 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +using Microsoft.FSharp.Core; +using System.Diagnostics; +using System.Collections.Generic; + +namespace FParsec { + +public enum Associativity { + None = 0, + Left = 1, + Right = 2 +} + + +public enum OperatorType { + Infix = 0, + Prefix = 1, + Postfix = 2 +} + + +public class Operator { + public OperatorType Type { get; private set; } + + public string String { get; protected set; } + internal FSharpFunc, Reply> AfterStringParser { get; private set; } + + public string TernaryRightString { get; protected set; } + internal FSharpFunc, Reply> AfterTernaryRightStringParser { get; private set; } + public bool IsTernary { get { return TernaryRightString != null; } } + + public int Precedence { get; protected set; } + public Associativity Associativity { get; protected set; } + public bool IsAssociative { get { return Associativity != Associativity.None; } } + + internal OptimizedClosures.FSharpFunc Mapping1 { get; private set; } + internal OptimizedClosures.FSharpFunc Mapping2 { get; private set; } + internal OptimizedClosures.FSharpFunc Mapping3 { get; private set; } + + private Operator() {} + static readonly internal Operator ZeroPrecedenceOperator = new Operator{Type = OperatorType.Prefix}; + + private Operator(OperatorType type, + string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence) + { + Debug.Assert(type >= OperatorType.Infix && type <= OperatorType.Postfix); + Type = type; + if (string.IsNullOrEmpty(operatorString)) throw new ArgumentException("operatorString", "The operator string must not be empty."); + String = operatorString; + if (afterStringParser == null) throw new ArgumentNullException("afterStringParser"); + AfterStringParser = afterStringParser; + if (precedence < 1) throw new ArgumentOutOfRangeException("precedence", "The operator precedence must be greater than 0."); + Precedence = precedence; + } + + internal Operator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + Associativity associativity, + FSharpFunc>> mapping) + : this(OperatorType.Infix, operatorString, afterStringParser, precedence) + { + if (associativity < Associativity.None || associativity > Associativity.Right) + throw new ArgumentOutOfRangeException("associativity", "The associativity argument is invalid."); + Associativity = associativity; + if (mapping == null) throw new ArgumentNullException("mapping"); + Mapping2 = OptimizedClosures.FSharpFunc.Adapt(mapping); + } + + internal Operator(OperatorType type, + string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + bool isAssociative, + FSharpFunc> mapping) + : this(type, operatorString, afterStringParser, precedence) + { + Debug.Assert(type == OperatorType.Prefix || type == OperatorType.Postfix); + Associativity = !isAssociative ? Associativity.None : + type == OperatorType.Prefix ? Associativity.Right : Associativity.Left; + if (mapping == null) throw new ArgumentNullException("mapping"); + Mapping1 = OptimizedClosures.FSharpFunc.Adapt(mapping); + } + + + internal Operator(string leftString, + FSharpFunc, Reply> afterLeftStringParser, + string rightString, + FSharpFunc, Reply> afterRightStringParser, + int precedence, + Associativity associativity, + FSharpFunc>>>> mapping) + { + Type = OperatorType.Infix; + if (string.IsNullOrEmpty(leftString)) throw new ArgumentException("leftString", "The operator strings must not be empty."); + String = leftString; + if (afterLeftStringParser == null) throw new ArgumentNullException("afterLeftStringParser"); + AfterStringParser = afterLeftStringParser; + if (string.IsNullOrEmpty(rightString)) throw new ArgumentException("rightString", "The operator strings must not be empty."); + TernaryRightString = rightString; + if (afterRightStringParser == null) throw new ArgumentNullException("afterRightStringParser"); + AfterTernaryRightStringParser = afterRightStringParser; + if (precedence < 1) throw new ArgumentOutOfRangeException("precedence", "The operator precedence must be greater than 0."); + Precedence = precedence; + if (associativity < Associativity.None || associativity > Associativity.Right) + throw new ArgumentOutOfRangeException("associativity", "The associativity argument is invalid."); + Associativity = associativity; + if (mapping == null) throw new ArgumentNullException("mapping"); + Mapping3 = OptimizedClosures.FSharpFunc.Adapt(mapping); + } + + protected class NoAfterStringUnaryMappingAdapter + : OptimizedClosures.FSharpFunc + { + private FSharpFunc Mapping; + public NoAfterStringUnaryMappingAdapter(FSharpFunc mapping) { Mapping = mapping; } + public override TTerm Invoke(TAfterString afterString, TTerm term) { return Mapping.Invoke(term); } + } + + protected class NoAfterStringBinaryMappingAdapter + : OptimizedClosures.FSharpFunc + { + private OptimizedClosures.FSharpFunc Mapping; + public NoAfterStringBinaryMappingAdapter(OptimizedClosures.FSharpFunc mapping) { Mapping = mapping; } + public override TTerm Invoke(TAfterString afterString, TTerm leftTerm, TTerm rightTerm) { + return Mapping.Invoke(leftTerm, rightTerm); + } + } + + protected class NoAfterStringTernaryMappingAdapter + : OptimizedClosures.FSharpFunc + { + private OptimizedClosures.FSharpFunc Mapping; + public NoAfterStringTernaryMappingAdapter(OptimizedClosures.FSharpFunc mapping) { Mapping = mapping; } + public override TTerm Invoke(TAfterString afterLeftString, TAfterString afterRightString, + TTerm leftTerm, TTerm middleTerm, TTerm rightTerm) + { + return Mapping.Invoke(leftTerm, middleTerm, rightTerm); + } + } + +} + +public sealed class InfixOperator : Operator { + public InfixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + Associativity associativity, + FSharpFunc> mapping) + : base(operatorString, afterStringParser, precedence, associativity, + mapping == null ? null : new NoAfterStringBinaryMappingAdapter(OptimizedClosures.FSharpFunc.Adapt(mapping))) {} + + public InfixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + Associativity associativity, + Unit dummy, // disambiguates overloads in F# + FSharpFunc>> mapping) + : base(operatorString, afterStringParser, precedence, associativity, mapping) {} +} + +public sealed class PrefixOperator : Operator { + public PrefixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + bool isAssociative, + FSharpFunc mapping) + : base(OperatorType.Prefix, operatorString, afterStringParser, precedence, isAssociative, + mapping == null ? null : new NoAfterStringUnaryMappingAdapter(mapping)) {} + + public PrefixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + bool isAssociative, + Unit dummy, // disambiguates overloads in F# + FSharpFunc> mapping) + : base(OperatorType.Prefix, operatorString, afterStringParser, precedence, isAssociative, mapping) {} +} + + public sealed class PostfixOperator : Operator { + public PostfixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + bool isAssociative, + FSharpFunc mapping) + : base(OperatorType.Postfix, operatorString, afterStringParser, precedence, isAssociative, + mapping == null ? null : new NoAfterStringUnaryMappingAdapter(mapping)) {} + + public PostfixOperator(string operatorString, + FSharpFunc, Reply> afterStringParser, + int precedence, + bool isAssociative, + Unit dummy, // disambiguates overloads in F# + FSharpFunc> mapping) + : base(OperatorType.Postfix, operatorString, afterStringParser, precedence, isAssociative, mapping) {} +} + +public sealed class TernaryOperator : Operator { + public TernaryOperator(string leftString, + FSharpFunc, Reply> afterLeftStringParser, + string rightString, + FSharpFunc, Reply> afterRightStringParser, + int precedence, + Associativity associativity, + FSharpFunc>> mapping) + : base(leftString, afterLeftStringParser, rightString, afterRightStringParser, precedence, associativity, + mapping == null ? null : new NoAfterStringTernaryMappingAdapter(OptimizedClosures.FSharpFunc.Adapt(mapping))) {} + + public TernaryOperator(string leftString, + FSharpFunc, Reply> afterLeftStringParser, + string rightString, + FSharpFunc, Reply> afterRightStringParser, + int precedence, + Associativity associativity, + Unit dummy, // disambiguates overloads in F# + FSharpFunc>>>> mapping) + : base(leftString, afterLeftStringParser, rightString, afterRightStringParser, precedence, associativity, mapping) {} +} + + +public class OperatorPrecedenceParser : FSharpFunc, Reply> { + + internal struct OperatorData { // declared as struct, so we can allocate it on the stack + internal Operator Operator; + internal TAfterString AfterStringValue; + internal CharStreamIndexToken IndexToken; + internal long Line; + internal long LineBegin; + } + + /// The length of LhsOps and RhsOps. Must be a power of 2. + internal const int OpsArrayLength = 128; + + // LhsOps and RhsOps are arrays of operator arrays. LhsOps contains the prefix + // operator definitions, RhsOps contains all other operator definitions. + // Both have a fixed size of OpsArrayLength (which must be a power of 2). + // All operators beginning with the same char modulo OpsArrayLength are + // grouped together in the same inner array. The inner arrays are sorted + // by the Operator.String property in descending lexical order. + // The index of an inner array in the outer array is given by the + // inner array's operator strings' first char modulo oppArrayLength. + // An empty inner array is represended by null. + + private readonly Operator[][] LhsOps = new Operator[OpsArrayLength][]; + private readonly Operator[][] RhsOps = new Operator[OpsArrayLength][]; + + // initialized to 0 + private int PrefixOpCount; + private int InfixOpCount; + private int PostfixOpCount; + + private ErrorMessageList ExpectedInfixOrPostfixOperator; // initialized to null + + private readonly Dictionary> Reserved = new Dictionary>(); + + // The following two members aren't static because accessing static members of generic types is rather expensive. + + /// ParsePrefixOp returns this value to signal that it backtracked and we should try to parse a term. + private readonly Operator ErrorOp = Operator.ZeroPrecedenceOperator; + + /// Can not be readonly because it is passed as as a ref (for performance reasons), but it is never mutated. + private OperatorData ZeroPrecedenceOperatorData = new OperatorData{Operator = Operator.ZeroPrecedenceOperator}; + + public FSharpFunc, Reply> TermParser { get; set; } + + public FSharpFunc< + Tuple, TAfterString>, + ErrorMessageList> + MissingTernary2ndStringErrorFormatter { get; set; } + + + // C# really needs type abbreviations (or better type inference) + private OptimizedClosures.FSharpFunc< + Tuple, TAfterString>, + Tuple, TAfterString>, + ErrorMessageList> + _OperatorConflictErrorFormatter; + public FSharpFunc< + Tuple, TAfterString>, + FSharpFunc, TAfterString>, + ErrorMessageList>> + OperatorConflictErrorFormatter { + get { return _OperatorConflictErrorFormatter; } + set { _OperatorConflictErrorFormatter = OptimizedClosures.FSharpFunc, TAfterString>,Tuple, TAfterString>, ErrorMessageList> + .Adapt(value); } + } + + public OperatorPrecedenceParser() { + MissingTernary2ndStringErrorFormatter = new DefaultMissingTernary2ndStringErrorFormatter(); + OperatorConflictErrorFormatter = new DefaultOperatorConflictErrorFormatter(); + } + + public FSharpFunc, Reply> ExpressionParser { get { return this; } } + + private bool FindPosition(Operator[][] ops, string str, out int arrayIndex, out int indexInArray) { + var c0 = str[0]; + int i = c0 & (OpsArrayLength - 1); + arrayIndex = i; + var array = ops[i]; + int c = -1; + int j = 0; + if (array != null) { + for (j = 0; j < array.Length; ++j) { + c = String.CompareOrdinal(str, array[j].String); + if (c >= 0) break; + } + } + indexInArray = j; + return c == 0; + } + + private void ThrowDefinitionConflictException(Operator op, + Operator oldOp) + { + throw new ArgumentException("The definition of the " + op.ToString() + " conflicts with (or duplicates) the previous definition of the " + oldOp.ToString() + "."); + } + + public void AddOperator(Operator op) { + Operator oldOp; + if ( Reserved.TryGetValue(op.String, out oldOp) + || (op.IsTernary && Reserved.TryGetValue(op.TernaryRightString, out oldOp))) + { + ThrowDefinitionConflictException(op, oldOp); + } + var ops = op.Type == OperatorType.Prefix ? LhsOps : RhsOps; + int i, j; + if (FindPosition(ops, op.String, out i, out j)) + ThrowDefinitionConflictException(op, ops[i][j]); + if (op.IsTernary) { + int i2, j2; + // make sure the Ternary2ndString isn't registered as an operator + if (FindPosition(LhsOps, op.TernaryRightString, out i2, out j2)) + ThrowDefinitionConflictException(op, LhsOps[i2][j2]); + if (FindPosition(RhsOps, op.TernaryRightString, out i2, out j2)) + ThrowDefinitionConflictException(op, RhsOps[i2][j2]); + Reserved.Add(op.TernaryRightString, op); + } + var array = ops[i]; + if (array == null) { + ops[i] = new Operator[1]{op}; + } else { + int n = array.Length; + var newArray = new Operator[n + 1]; + if (j != 0) Array.Copy(array, 0, newArray, 0, j); + newArray[j] = op; + if (j != n) Array.Copy(array, j, newArray, j + 1, n - j); + ops[i] = newArray; + } + if (op.Type == OperatorType.Infix) { + ++InfixOpCount; + if (InfixOpCount == 1) { + ExpectedInfixOrPostfixOperator = PostfixOpCount == 0 + ? Errors.ExpectedInfixOperator + : Errors.ExpectedInfixOrPostfixOperator; + } + } else if (op.Type == OperatorType.Postfix) { + ++PostfixOpCount; + if (PostfixOpCount == 1) { + ExpectedInfixOrPostfixOperator = InfixOpCount == 0 + ? Errors.ExpectedPostfixOperator + : Errors.ExpectedInfixOrPostfixOperator; + } + } else ++PrefixOpCount; + } + + public bool RemoveInfixOperator(string opString) { return Remove(OperatorType.Infix, opString); } + public bool RemovePrefixOperator(string opString) { return Remove(OperatorType.Prefix, opString); } + public bool RemovePostfixOperator(string opString) { return Remove(OperatorType.Postfix, opString); } + public bool RemoveTernaryOperator(string opStringLeft, string opStringRight) { + Operator reservedOp; + if (!Reserved.TryGetValue(opStringRight, out reservedOp) || opStringLeft != reservedOp.String) return false; + Reserved.Remove(opStringRight); + return Remove(OperatorType.Infix, opStringLeft); + } + + public bool RemoveOperator(Operator op) { + var ops = op.Type == OperatorType.Prefix ? LhsOps : RhsOps; + int i, j; + if (!FindPosition(ops, op.String, out i, out j)) return false; + if (op != ops[i][j]) return false; + return op.IsTernary ? RemoveTernaryOperator(op.String, op.TernaryRightString) + : Remove(op.Type, op.String); + } + + private bool Remove(OperatorType operatorType, string opString) { + var ops = operatorType == OperatorType.Prefix ? LhsOps : RhsOps ; + int i, j; + if (!FindPosition(ops, opString, out i, out j)) return false; + var array = ops[i]; + var n = array.Length; + if (n == 1) ops[i] = null; + else { + var newArray = new Operator[n - 1]; + if (j != 0) Array.Copy(array, 0, newArray, 0, j); + if (j + 1 != n) Array.Copy(array, j + 1, newArray, j, n - j - 1); + ops[i] = newArray; + } + if (operatorType == OperatorType.Infix) { + --InfixOpCount; + if (InfixOpCount == 0) { + ExpectedInfixOrPostfixOperator = PostfixOpCount == 0 ? null : Errors.ExpectedPostfixOperator; + } + } else if (operatorType == OperatorType.Postfix) { + --PostfixOpCount; + if (PostfixOpCount == 0) { + ExpectedInfixOrPostfixOperator = InfixOpCount == 0 ? null : Errors.ExpectedInfixOperator; + } + } else --PrefixOpCount; + return true; + } + + public IEnumerable> Operators { get { + var result = new Operator[PrefixOpCount + InfixOpCount + PostfixOpCount]; + var n = 0; + if (PrefixOpCount != 0) { + foreach (var array in LhsOps) + if (array != null) + foreach (var op in array) + result[n++] = op; + } + if ((InfixOpCount | PostfixOpCount) != 0) { + foreach (var array in RhsOps) + if (array != null) + foreach (var op in array) + result[n++] = op; + } + Debug.Assert(n == result.Length); + return result; + } } + + private + Operator + PeekOp(CharStream stream, Operator[][] ops) + { + var cs = stream.Peek2(); + var c1 = cs.Char1; + var c0 = cs.Char0; + var array = ops[c0 & (OpsArrayLength - 1)]; + if (array != null) { + foreach (var op in array) { + var s = op.String; + if (s[0] == c0) { + if ( s.Length <= 1 + || (s[1] == c1 && (s.Length == 2 || stream.Match(s)))) return op; + } else if (s[0] < c0) break; + } + } + return null; + } + + public override Reply Invoke(CharStream stream) { + Reply reply = new Reply(); + reply.Status = ReplyStatus.Ok; + var nextOp = ParseExpression(ref ZeroPrecedenceOperatorData, ref reply, stream); + Debug.Assert(nextOp == null); + return reply; + } + + // ============================================================================= + // NOTE: The main complication in the below code arises from the handling of the + // backtracking related to the after-string-parser. Please see the reference + // documentation for an explanation of the after-string-parser behaviour. + // ============================================================================= + + internal + Operator + ParseExpression(ref OperatorData prevOpData, // prevOpData is passed as ref for performance reasons, but is not mutated + ref Reply reply, + CharStream stream) + { + Operator op; + if (PrefixOpCount != 0 && ((op = PeekOp(stream, LhsOps)) != null)) { + op = ParsePrefixOp(ref prevOpData, op, ref reply, stream); + // ParsePrefixOp returns ErrorOp when it backtracks and we should try to parse a term + if (op == null) goto Break; + if (op != ErrorOp) goto CheckNextOp; + } + var error = reply.Error; + var stateTag = stream.StateTag; + reply = TermParser.Invoke(stream); // <-- this is where we parse the terms + if (stateTag == stream.StateTag) { + error = ErrorMessageList.Merge(error, reply.Error); + if (PrefixOpCount != 0) error = ErrorMessageList.Merge(error, Errors.ExpectedPrefixOperator); + reply.Error = error; + } + if (reply.Status != ReplyStatus.Ok) goto ReturnNull; + op = PeekOp(stream, RhsOps); + CheckNextOp: + if (op != null) { + var prevOp = prevOpData.Operator; + if (prevOp.Precedence > op.Precedence) goto Break; + if (prevOp.Precedence < op.Precedence) goto Continue; + // prevOp.Precedence == op.Precedence + if (op.Type == OperatorType.Infix) { + var assoc = prevOp.Associativity & op.Associativity; + if (assoc == Associativity.Left || prevOp.Type == OperatorType.Prefix) goto Break; + if (assoc == Associativity.Right) goto Continue; + } else { + if (prevOp.Type == OperatorType.Infix) goto Continue; + Debug.Assert(prevOp.Type == OperatorType.Prefix && op.Type == OperatorType.Postfix); + if ((prevOp.Associativity | op.Associativity) != Associativity.None) goto Break; + } + HandlePossibleConflict(ref prevOpData, op, ref reply, stream); + } else { + error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); + reply.Error = error; + } + ReturnNull: + op = null; + Break: + return op; + Continue: + return ParseExpressionContinue(ref prevOpData, op, ref reply, stream); + } + + /// Parses the following prefix operators, plus the expression the operators apply to. + private + Operator + ParsePrefixOp(ref OperatorData prevOpData, + Operator op, + ref Reply reply, + CharStream stream) + { + var opData = new OperatorData(); + opData.Line = stream.Line; + opData.LineBegin = stream.LineBegin; + opData.IndexToken = stream.IndexToken; + opData.Operator = op; + var userState = stream.UserState; + #if DEBUG + var ok = stream.Skip(op.String); + Debug.Assert(ok); + #else + stream.Skip((uint)op.String.Length); + #endif + var stateTag = stream.StateTag; + var asReply = op.AfterStringParser.Invoke(stream); + if (asReply.Status == ReplyStatus.Ok) { + opData.AfterStringValue = asReply.Result; + var prevOp = prevOpData.Operator; + if ( prevOp.Precedence != op.Precedence + || prevOp.Type != OperatorType.Prefix + || (prevOp.Associativity | op.Associativity) != Associativity.None) + { + reply.Error = asReply.Error; + var nextOp = ParseExpression(ref opData, ref reply, stream); + if (reply.Status == ReplyStatus.Ok) + reply.Result = op.Mapping1.Invoke(opData.AfterStringValue, reply.Result); + return nextOp; + } + // backtrack to the beginning of the operator + stream.Seek(opData.IndexToken); + stream.SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(opData.Line); + stream.SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(opData.LineBegin); + stream.UserState = userState; + stream.StateTag = stateTag - 1; + ReportConflict(ref prevOpData, op, asReply.Result, ref reply, stream); + return null; + } else if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { + // backtrack to the beginning of the operator + stream.Seek(opData.IndexToken); + stream.StateTag = stateTag - 1; + return ErrorOp; + } else { + reply.Error = asReply.Error; + reply.Status = asReply.Status; + return null; + } + } + + /// Parses (higher-precedence) infix and postfix operators after the first term, together with the argument expressions. + private + Operator + ParseExpressionContinue(ref OperatorData prevOpData, + Operator op, + ref Reply reply, + CharStream stream) + { + var opData = new OperatorData(); + for (;;) { + opData.Line = stream.Line; + opData.LineBegin = stream.LineBegin; + opData.IndexToken = stream.IndexToken; + opData.Operator = op; + #if DEBUG + var ok = stream.Skip(op.String); + Debug.Assert(ok); + #else + stream.Skip((uint)op.String.Length); + #endif + var stateTag = stream.StateTag; + var asReply = op.AfterStringParser.Invoke(stream); + if (asReply.Status == ReplyStatus.Ok) { + opData.AfterStringValue = asReply.Result; + reply.Error = asReply.Error; + if (op.Type == OperatorType.Infix) { + var result1 = reply.Result; + if (!op.IsTernary) { + var nextOp = ParseExpression(ref opData, ref reply, stream); + if (reply.Status == ReplyStatus.Ok) + reply.Result = op.Mapping2.Invoke(opData.AfterStringValue, result1, reply.Result); + op = nextOp; + if (op == null) break; + goto CheckNextOp; + } else { + ParseExpression(ref ZeroPrecedenceOperatorData, ref reply, stream); + if (reply.Status != ReplyStatus.Ok) goto ReturnNull; + var result2 = reply.Result; + if (stream.Skip(op.TernaryRightString)) { + stateTag = stream.StateTag; + asReply = op.AfterTernaryRightStringParser.Invoke(stream); + if (asReply.Status == ReplyStatus.Ok) { + reply.Error = asReply.Error; + var nextOp = ParseExpression(ref opData, ref reply, stream); + if (reply.Status == ReplyStatus.Ok) + reply.Result = op.Mapping3.Invoke(opData.AfterStringValue, asReply.Result, result1, result2, reply.Result); + op = nextOp; + if (op == null) break; + goto CheckNextOp; + } else if (asReply.Status != ReplyStatus.Error || stateTag != stream.StateTag) { + reply.Error = asReply.Error; + reply.Status = asReply.Status; + goto ReturnNull; + } else { + // backtrack + stream.Skip(-op.TernaryRightString.Length); + stream.StateTag -= 2; + } + } + HandleMissingTernary2ndStringError(ref opData, ref reply, stream); + goto ReturnNull; + } + } else { + Debug.Assert(op.Type == OperatorType.Postfix); + reply.Result = op.Mapping1.Invoke(opData.AfterStringValue, reply.Result); + var lastOp = op; + op = PeekOp(stream, RhsOps); + // we check for adjacent postfix operators here ... + if (op != null) { + if (op.Type == OperatorType.Postfix && lastOp.Precedence <= op.Precedence) { + if ( lastOp.Precedence < op.Precedence + || (lastOp.Associativity | op.Associativity) != Associativity.None) continue; + // ... so we can report conflicting postfix operators + HandlePossibleConflict(ref opData, op, ref reply, stream); + goto ReturnNull; + } + } else { + reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); + break; + } + } + CheckNextOp: + var prevOp = prevOpData.Operator; + if (prevOp.Precedence < op.Precedence) continue; + if (prevOp.Precedence > op.Precedence) break; + // prevOp.Precedence == op.Precedence + if (op.Type == OperatorType.Infix) { + var assoc = prevOp.Associativity & op.Associativity; + if (assoc == Associativity.Left || prevOp.Type == OperatorType.Prefix) break; + if (assoc == Associativity.Right) continue; + } else { // op.OperatorType == OperatorType.Postfix + if (prevOp.Type == OperatorType.Infix) continue; + Debug.Assert(prevOp.Type == OperatorType.Prefix); + if ((prevOp.Associativity | op.Associativity) != Associativity.None) break; + } + HandlePossibleConflict(ref prevOpData, op, ref reply, stream); + } else { // asReply.Status != ReplyStatus.Ok + if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { + // backtrack + stream.Seek(opData.IndexToken); + stream.StateTag -= 2; + reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); + } else { + reply.Error = asReply.Error; + reply.Status = asReply.Status; + } + } + ReturnNull: + op = null; + break; + } + return op; + } + + private void HandleMissingTernary2ndStringError(ref OperatorData opData, + ref Reply reply, + CharStream stream) + { + var firstStringIndex = opData.IndexToken.GetIndex(stream); + var firstStringColumn = firstStringIndex - opData.LineBegin + 1; + var firstStringPos = new Position(stream.Name, firstStringIndex, opData.Line, firstStringColumn); + var secondStringPos = stream.Position; + var error1 = ExpectedInfixOrPostfixOperator; + var error2 = MissingTernary2ndStringErrorFormatter.Invoke(Tuple.Create(firstStringPos, secondStringPos, (TernaryOperator)opData.Operator, opData.AfterStringValue)); + reply.Error = ErrorMessageList.Merge(reply.Error, ErrorMessageList.Merge(error1, error2)); + reply.Status = ReplyStatus.Error; + } + + private void HandlePossibleConflict(ref OperatorData prevOpData, + Operator op, + ref Reply reply, + CharStream stream) + { + // "possible" conflict, because it's not a conflict when the + // after-string-parser fails without changing the parser state. + var state = stream.State; + var ok = stream.Skip(op.String); + Debug.Assert(ok); + var stateTag = stream.StateTag; + var asReply = op.AfterStringParser.Invoke(stream); + if (asReply.Status == ReplyStatus.Ok) { + stream.BacktrackTo(ref state); + ReportConflict(ref prevOpData, op, asReply.Result, ref reply, stream); + } else if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { + // backtrack and ignore the operator + stream.BacktrackTo(ref state); + reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); + } else { + // report AfterStringParser error instead of conflict + reply.Error = asReply.Error; + reply.Status = asReply.Status; + } + } + + private void ReportConflict(ref OperatorData prevOpData, + Operator op, + TAfterString afterStringValue, + ref Reply reply, + CharStream stream) + { + var prevOpIndex = prevOpData.IndexToken.GetIndex(stream); + var prevOpColumn = prevOpIndex - prevOpData.LineBegin + 1; + var prevOpPos = new Position(stream.Name, prevOpIndex, prevOpData.Line, prevOpColumn); + var error = _OperatorConflictErrorFormatter.Invoke( + Tuple.Create(prevOpPos, prevOpData.Operator, prevOpData.AfterStringValue), + Tuple.Create(stream.Position, op, afterStringValue)); + reply.Error = ErrorMessageList.Merge(reply.Error, error); + reply.Status = ReplyStatus.Error; + } + + private sealed class DefaultMissingTernary2ndStringErrorFormatter + : FSharpFunc, TAfterString>, ErrorMessageList> + { + public override ErrorMessageList Invoke(Tuple, TAfterString> value) { + var position1 = value.Item1; + var position2 = value.Item2; + var op = value.Item3; + return Errors.MissingTernary2ndString(position1, position2, op); + } + } + + private sealed class DefaultOperatorConflictErrorFormatter + : OptimizedClosures.FSharpFunc, TAfterString>, + Tuple, TAfterString>, + ErrorMessageList> + { + public override ErrorMessageList Invoke(Tuple, TAfterString> arg1, Tuple, TAfterString> arg2) { + return Errors.OperatorsConflict(arg1.Item1, arg1.Item2, arg2.Item1, arg2.Item2); + } + } + +} + + +} \ No newline at end of file diff --git a/src/FParsecCS/Position.cs b/src/FParsecCS/Position.cs new file mode 100644 index 0000000..495f8c1 --- /dev/null +++ b/src/FParsecCS/Position.cs @@ -0,0 +1,67 @@ +// Copyright (c) Stephan Tolksdorf 2007-2009 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +namespace FParsec { + +public sealed class Position : IEquatable, IComparable, IComparable { + public long Index { get; private set; } + public long Line { get; private set; } + public long Column { get; private set; } + public string StreamName { get; private set; } + + public Position(string streamName, long index, long line, long column) { + StreamName = streamName; Index = index; Line = line; Column = column; + } + + public override string ToString() { + var ln = String.IsNullOrEmpty(StreamName) ? "(Ln: " : Text.Escape(StreamName, "", "(\"", "\", Ln: ", "", '"'); + return ln + Line.ToString() + ", Col: " + Column.ToString() + ")"; + } + + public override bool Equals(object obj) { + return Equals(obj as Position); + } + public bool Equals(Position other) { + return (object)this == (object)other + || ( (object)other != null + && Index == other.Index + && Line == other.Line + && Column == other.Column + && StreamName == other.StreamName); + } + public static bool operator==(Position left, Position right) { + return (object)left == null ? (object)right == null : left.Equals(right); + } + public static bool operator!=(Position left, Position right) { return !(left == right); } + + public override int GetHashCode() { + return Index.GetHashCode(); + } + + public static int Compare(Position left, Position right) { + if ((object)left != null) return left.CompareTo(right); + return (object)right == null ? 0 : -1; + } + + public int CompareTo(Position other) { + if ((object)this == (object)other) return 0; + if ((object)other == null) return 1; + int r = String.CompareOrdinal(StreamName, other.StreamName); + if (r != 0) return r; + r = Line.CompareTo(other.Line); + if (r != 0) return r; + r = Column.CompareTo(other.Column); + if (r != 0) return r; + return Index.CompareTo(other.Index); + } + int IComparable.CompareTo(object value) { + Position position = value as Position; + if ((object)position != null) return CompareTo(position); + if (value == null) return 1; + throw new ArgumentException("Object must be of type Position."); + } +} + +} diff --git a/src/FParsecCS/Properties/AssemblyInfo.cs b/src/FParsecCS/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..9c2b71c --- /dev/null +++ b/src/FParsecCS/Properties/AssemblyInfo.cs @@ -0,0 +1,32 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: ComVisible(false)] + +#if LOW_TRUST + [assembly: System.Security.AllowPartiallyTrustedCallers] + [assembly: System.Security.SecurityTransparent] +#endif + +[assembly: InternalsVisibleTo ("FParsec" + FParsec.CommonAssemblyInfo.StrongNamePublicKey)] +[assembly: InternalsVisibleTo (FParsec.CommonAssemblyInfo.TestAssemblyName + FParsec.CommonAssemblyInfo.StrongNamePublicKey)] + +namespace FParsec { + +internal static partial class CommonAssemblyInfo { + public const string TestAssemblyName = "Test"; + +#if STRONG_NAME + public const string StrongNamePublicKey = + ", PublicKey=002400000480000094000000060200000024000052534131000400000100010077c6be48a40f5b" + + "194ec9f992e5b512bbbba33e211354d9ee50c3214decddad8356470a9a19a9ee84637cbd6ff690" + + "9527d3973741dbe0a69b1461eeae774af9a78de45618ffd6fe7c7d52e0441b92f3bc7e8fb5757f" + + "b8b1611a0b6b8c9f9ef64edcf51d44218ae040f3015373fd261d30f8e1f5a1f914fd9ebcde7d7e" + + "f42dbaa5"; +#else + public const string StrongNamePublicKey = ""; +#endif +}; + +} diff --git a/src/FParsecCS/Reply.cs b/src/FParsecCS/Reply.cs new file mode 100644 index 0000000..ddb7318 --- /dev/null +++ b/src/FParsecCS/Reply.cs @@ -0,0 +1,79 @@ +// Copyright (c) Stephan Tolksdorf 2008-2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +namespace FParsec { + +public enum ReplyStatus { + Ok = 1, + Error = 0, + FatalError = -1 +} + +[System.Diagnostics.DebuggerDisplay("{GetDebuggerDisplay(),nq}")] +public struct Reply : IEquatable> { + public ErrorMessageList Error; + public TResult Result; + public ReplyStatus Status; + + public Reply(TResult result) { + Result = result; + Error = null; + Status = ReplyStatus.Ok; + } + + public Reply(ReplyStatus status, ErrorMessageList error) { + Status = status; + Error = error; + Result = default(TResult); + } + + public Reply(ReplyStatus status, TResult result, ErrorMessageList error) { + Status = status; + Error = error; + Result = result; + } + + public override bool Equals(object other) { + if (!(other is Reply)) return false; + return Equals((Reply) other); + } + public bool Equals(Reply other) { + return Status == other.Status + && (Status != ReplyStatus.Ok || FastGenericEqualityERComparer.Instance.Equals(Result, other.Result)) + && Error == other.Error; + } + public override int GetHashCode() { + return (int)Status + ^ (Status != ReplyStatus.Ok ? 0 : FastGenericEqualityERComparer.Instance.GetHashCode(Result)); + } + public static bool operator==(Reply r1, Reply r2) { return r1.Equals(r2); } + public static bool operator!=(Reply r1, Reply r2) { return !r1.Equals(r2); } + + private string GetDebuggerDisplay() { + if (Status == ReplyStatus.Ok) { + string result; + if (Result == null) + result = typeof(TResult) == typeof(Microsoft.FSharp.Core.Unit) ? "()" : "null"; + else if (typeof(TResult) == typeof(string)) + result = Text.DoubleQuote(Result.ToString()); + else + result = Result.ToString(); + + return Error == null + ? "Reply(" + result + ")" + : "Reply(Ok, " + result + ", " + ErrorMessageList.GetDebuggerDisplay(Error) + ")"; + } else { + var status = Status == ReplyStatus.Error ? "Error" : + Status == ReplyStatus.FatalError ? "FatalError" : + "(ReplyStatus)" + ((int)Status).ToString(); + + return Error == null + ? "Reply(" + status + ", NoErrorMessages)" + : "Reply(" + status + ", " + ErrorMessageList.GetDebuggerDisplay(Error) + ")"; + } + } +} + +} \ No newline at end of file diff --git a/src/FParsecCS/StringBuffer.cs b/src/FParsecCS/StringBuffer.cs new file mode 100644 index 0000000..6b9ae52 --- /dev/null +++ b/src/FParsecCS/StringBuffer.cs @@ -0,0 +1,380 @@ +// Copyright (c) Stephan Tolksdorf 2009 +// License: Simplified BSD License. See accompanying documentation. + +#if !LOW_TRUST + +#if DEBUG + #define DEBUG_STRINGBUFFER +#endif + +using System; +using System.Runtime.InteropServices; +using System.Diagnostics; + +namespace FParsec { + +/// A substring of a pinned string on the large object heap. +/// StringBuffers are cached in a pool and hence need to be properly disposed. +internal unsafe sealed class StringBuffer : IDisposable { + private PoolSegment Segment; + public string String { get { return Segment == null ? "" : Segment.String; } } + public char* StringPointer { get { return Segment == null ? null : Segment.StringPointer; } } + public int Index { get; private set; } + public int Length { get; private set; } + + private StringBuffer(PoolSegment segment, int index, int length) { + Segment = segment; + Index = index; + Length = length; + } + + private sealed class FreeChunk { + public PoolSegment Segment; + + // free chunks in each segment form a doubly-linked list ordered by index + public FreeChunk PrevInSegment; + public FreeChunk NextInSegment; + + public static FreeChunk Smallest; + public static FreeChunk Largest; + + // all free chunks together form a doubly-linked list ordered by size + public FreeChunk PrevInSize; + public FreeChunk NextInSize; + + public int Index; + public int Size; + + public FreeChunk(PoolSegment segment, int index, int size) { + Debug.Assert(segment.FirstFreeChunk == null && index >= 0 && size > 0 && index + size <= segment.Size); + Segment = segment; + Index = index; + Size = size; + segment.FirstFreeChunk = this; + InsertIntoSizeList(); + } + + public FreeChunk(PoolSegment segment, FreeChunk prevInSegment, FreeChunk nextInSegment, int index, int size) { + Debug.Assert(index >= 0 && size > 0 && index + size <= segment.Size); + Segment = segment; + Index = index; + Size = size; + PrevInSegment = prevInSegment; + NextInSegment = nextInSegment; + if (prevInSegment != null) { + Debug.Assert(prevInSegment.Index + prevInSegment.Size < index); + prevInSegment.NextInSegment = this; + } else { + Debug.Assert(segment.FirstFreeChunk == nextInSegment); + segment.FirstFreeChunk = this; + } + if (nextInSegment != null) { + Debug.Assert(index + size < nextInSegment.Index); + nextInSegment.PrevInSegment = this; + } + InsertIntoSizeList(); + } + + private void InsertIntoSizeList() { + var largest = FreeChunk.Largest; + if (largest != null) { + if (largest.Size <= Size) { + largest.NextInSize = this; + PrevInSize = largest; + FreeChunk.Largest = this; + } else { + NextInSize = largest; + var prev = largest.PrevInSize; + largest.PrevInSize = this; + if (prev != null) { + PrevInSize = prev; + prev.NextInSize = this; + if (Size < prev.Size) MoveAfterSizeHasDecreased(); + } else FreeChunk.Smallest = this; + } + } else { + FreeChunk.Smallest = this; + FreeChunk.Largest = this; + } + } + + public void Remove() { + var prev = PrevInSegment; + var next = NextInSegment; + if (prev != null) prev.NextInSegment = next; + else Segment.FirstFreeChunk = next; + if (next != null) next.PrevInSegment = prev; + + prev = PrevInSize; + next = NextInSize; + if (prev != null) prev.NextInSize = next; + else Smallest = next; + if (next != null) next.PrevInSize = prev; + else Largest = prev; + } + + // the following two methods are dual to each other, + // i.e. one can be transformed into the other by way of simple search & replace + public void MoveAfterSizeHasDecreased() { + Debug.Assert(Size < PrevInSize.Size); + var prev = PrevInSize; + var next = NextInSize; + if (next != null) next.PrevInSize = prev; + else Largest = prev; + prev.NextInSize = next; + next = prev; + prev = prev.PrevInSize; + while (prev != null && prev.Size > Size) { + next = prev; + prev = prev.PrevInSize; + } + NextInSize = next; + next.PrevInSize = this; + PrevInSize = prev; + if (prev != null) prev.NextInSize = this; + else Smallest = this; + } + + public void MoveAfterSizeHasIncreased() { + Debug.Assert(Size > NextInSize.Size); + var next = NextInSize; + var prev = PrevInSize; + if (prev != null) prev.NextInSize = next; + else Smallest = next; + next.PrevInSize = prev; + prev = next; + next = next.NextInSize; + while (next != null && next.Size < Size) { + prev = next; + next = next.NextInSize; + } + PrevInSize = prev; + prev.NextInSize = this; + NextInSize = next; + if (next != null) next.PrevInSize = this; + else Largest = this; + } + } + + private const int MinChunkSize = 1536; // 3 * 2^9 + // segment sizes must be multiple of MinChunkSize and large enough to allocated on the LargeObjectHeap + private const int FirstSegmentSmallSize = 42 * MinChunkSize; // 64 512 + private const int FirstSegmentLargeSize = 128 * MinChunkSize; // 3 * 2^16 = 196 608 (default CharStream block size) + private const int MaxSegmentSize = 640 * MinChunkSize; // 983 040 + + private static int MaxNumberOfUnusedSegments = 3; + + private static int NumberOfUnusedSegments; + + private sealed class PoolSegment : IDisposable { + // segments form a doubly-linked list in the order they were constructed + + /// the last allocated segment + private static PoolSegment Last; + + private PoolSegment Next; + private PoolSegment Prev; + + public string String { get; private set; } + /// String.Length - x, where x > 0 + public int Size { get; private set; } + public char* StringPointer { get; private set; } + private GCHandle StringHandle; + + public FreeChunk FirstFreeChunk; + + public PoolSegment(int size, int firstBufferSize) { + Debug.Assert(firstBufferSize > 0 && firstBufferSize <= size && (size <= MaxSegmentSize || firstBufferSize == size)); + // + 1, so that no chunk can span the full string, which helps avoiding accidentally passing a reference to the internal buffer string to the "outside world" + String = new String('\u0000', size + 1); + Size = size; + StringHandle = GCHandle.Alloc(String, GCHandleType.Pinned); + StringPointer = (char*)StringHandle.AddrOfPinnedObject(); + if (Last != null) { + Last.Next = this; + Prev = Last; + } + Last = this; + if (firstBufferSize < size) + new FreeChunk(this, firstBufferSize, size - firstBufferSize); // inserts itself into the lists + } + + public void Dispose() { + if (StringPointer != null) { + Debug.Assert(FirstFreeChunk == null); + if (FirstFreeChunk != null) throw new InvalidOperationException(); + if (Prev != null) Prev.Next = Next; + if (Next != null) Next.Prev = Prev; + else Last = Prev; + StringPointer = null; + StringHandle.Free(); + } + } + + public static StringBuffer AllocateStringBufferInNewSegment(int length) { + int segmentSize = length > MaxSegmentSize + ? length + : (Last == null && length <= FirstSegmentLargeSize) + ? (length <= FirstSegmentSmallSize ? FirstSegmentSmallSize : FirstSegmentLargeSize) + : MaxSegmentSize; + return new StringBuffer(new PoolSegment(segmentSize, length), 0, length); + } + + [Conditional("DEBUG_STRINGBUFFER")] + public void AssertIntegrity() { + Debug.Assert(StringPointer != null); + int sumOfSegmentSizes = 0; + { // check list of segments + var segment = Last; + Debug.Assert(segment.Next == null); + var prev = segment.Prev; + sumOfSegmentSizes += segment.Size; + bool visitedThis = segment == this; + while (prev != null) { + Debug.Assert(segment == prev.Next); + segment = prev; + prev = prev.Prev; + sumOfSegmentSizes += segment.Size; + visitedThis = visitedThis || segment == this; + } + Debug.Assert(visitedThis); + } + { // check segment list of free chunks ordered by index + var chunk = FirstFreeChunk; + if (chunk != null) { + Debug.Assert( chunk.Index >= 0 && chunk.Size > 0 + && (chunk.PrevInSize != null ? chunk.Size >= chunk.PrevInSize.Size : chunk == FreeChunk.Smallest) + && (chunk.NextInSize != null ? chunk.Size <= chunk.NextInSize.Size : chunk == FreeChunk.Largest)); + int chunkEnd = chunk.Index + chunk.Size; + var next = chunk.NextInSegment; + while (next != null) { + Debug.Assert( (chunk == next.PrevInSegment && chunkEnd < next.Index && next.Size > 0) + && (next.PrevInSize != null ? next.Size >= next.PrevInSize.Size : next == FreeChunk.Smallest) + && (next.NextInSize != null ? next.Size <= next.NextInSize.Size : next == FreeChunk.Largest)); + chunk = next; + chunkEnd = chunk.Index + chunk.Size; + next = chunk.NextInSegment; + } + Debug.Assert(chunkEnd <= Size); + } + } + { // check global list of free chunks ordered by size + int free = 0; + var chunk = FreeChunk.Smallest; + if (chunk == null) Debug.Assert(FreeChunk.Largest == null); + else { + Debug.Assert(chunk.Size > 0 && chunk.PrevInSize == null); + free += chunk.Size; + var next = chunk.NextInSize; + while (next != null) { + Debug.Assert(chunk == next.PrevInSize && chunk.Size <= next.Size); + chunk = next; + free += chunk.Size; + next = chunk.NextInSize; + } + Debug.Assert(chunk == FreeChunk.Largest); + } + Debug.Assert(Allocated == sumOfSegmentSizes - free); + } + } + } + + + /// Sum of the lengths of all currently allocated StringBuffers + private static int Allocated = 0; + private static object SyncRoot = new Object(); + + public static StringBuffer Create(int minLength) { + int size = unchecked(minLength + (MinChunkSize - 1)); + if (size > (MinChunkSize - 1)) { // minLength > 0 && minLength <= System.Int32.MaxValue - (MinChunkSize - 1) + size -= (int)((uint)size%(uint)MinChunkSize); // round down to multiple of MinChunkSize + lock (SyncRoot) { + Allocated += size; + FreeChunk chunk = FreeChunk.Largest; + if (chunk != null) { // find smallest free chunk that is large enough to hold the buffer + if (size > 10*MinChunkSize) { + var prev = chunk.PrevInSize; + while (prev != null && prev.Size >= size) { + chunk = prev; + prev = prev.PrevInSize; + } + } else { + chunk = FreeChunk.Smallest; + var next = chunk.NextInSize; + while (chunk.Size < size && next != null) { + chunk = next; + next = next.NextInSize; + } + } + if (size <= chunk.Size) { + int index = chunk.Index; + if (index == 0 && chunk.Size == chunk.Segment.Size) --NumberOfUnusedSegments; + if (size != chunk.Size) { + chunk.Index += size; + chunk.Size -= size; + var prev = chunk.PrevInSize; + if (prev != null && chunk.Size < prev.Size) chunk.MoveAfterSizeHasDecreased(); + } else chunk.Remove(); + chunk.Segment.AssertIntegrity(); + return new StringBuffer(chunk.Segment, index, size); + } + } + return PoolSegment.AllocateStringBufferInNewSegment(size); + } + } else { + if (minLength < 0) throw new ArgumentOutOfRangeException("minLength", "minLength is negative."); + else if (minLength > 0) throw new ArgumentOutOfRangeException("minLength", "minLength is too large. The maximum string buffer length is approximately 2^30."); + return new StringBuffer(null, 0, 0); + } + } + + public void Dispose() { + int size = Length; + Length = -1; + if (size > 0) { + lock (SyncRoot) { + Allocated -= size; + if (size <= MaxSegmentSize) { + FreeChunk prev = null; + FreeChunk next = Segment.FirstFreeChunk; + while (next != null && Index > next.Index) { + prev = next; + next = next.NextInSegment; + } + if (prev == null || prev.Index + prev.Size != Index) { + if (next != null && Index + size == next.Index) { + next.Index = Index; + next.Size += size; + var nextNext = next.NextInSize; + if (nextNext != null && next.Size > nextNext.Size) next.MoveAfterSizeHasIncreased(); + } else { + new FreeChunk(Segment, prev, next, Index, size); // inserts itself into the lists + } + } else { + if (next != null && Index + size == next.Index) { + prev.Size += size + next.Size; + next.Remove(); + } else { + prev.Size += size; + } + if (prev.NextInSize != null && prev.Size > prev.NextInSize.Size) prev.MoveAfterSizeHasIncreased(); + } + Segment.AssertIntegrity(); + var first = Segment.FirstFreeChunk; + if (first.Size == Segment.Size && ++NumberOfUnusedSegments > MaxNumberOfUnusedSegments) { + --NumberOfUnusedSegments; + first.Remove(); + Segment.Dispose(); + } + } else { // size > MaxSegmentSize + Debug.Assert(size == Segment.Size); + Segment.Dispose(); + } + } + } + } +} + +} + +#endif \ No newline at end of file diff --git a/src/FParsecCS/Strings.cs b/src/FParsecCS/Strings.cs new file mode 100644 index 0000000..c54af3c --- /dev/null +++ b/src/FParsecCS/Strings.cs @@ -0,0 +1,315 @@ +// Copyright (c) Stephan Tolksdorf 2010-2011 +// License: Simplified BSD License. See accompanying documentation. + +using System; + +namespace FParsec { + +internal static class Strings { + + static internal string Quote(string stringToQuote) { + return Text.SingleQuote(stringToQuote); + } + static internal string Quote(string prefix, string stringToQuote, string postfix) { + return Text.SingleQuote(prefix, stringToQuote, postfix); + } + + static internal string AsciiQuote(string prefix, string stringToQuote, string postfix) { + return Text.AsciiEscape(stringToQuote, prefix, "'", "'", postfix, '\''); + } + + static internal string QuoteCaseInsensitive(string caseInsensitiveStringToQuote) { + return Quote("", caseInsensitiveStringToQuote, " (case-insensitive)"); + } + + static private string OrdinalEnding(int value) { + if (value < 1) throw new ArgumentOutOfRangeException("value", "The value must be greater than 0."); + var n100 = value%100; + var n10 = value%10; + if (n100 < 11 || n100 > 13) { + if (n10 == 1) return "st"; + if (n10 == 2) return "nd"; + if (n10 == 3) return "rd"; + } + return "th"; + } + + public static readonly string EndOfInput = "end of input"; + public static readonly string AnyChar = "any char"; + public static readonly string Whitespace = "whitespace"; + public static readonly string AsciiUppercaseLetter = "Ascii uppercase letter"; + public static readonly string AsciiLowercaseLetter = "Ascii lowercase letter"; + public static readonly string AsciiLetter = "Ascii letter"; + public static readonly string UppercaseLetter = "uppercase letter"; + public static readonly string LowercaseLetter = "lowercase letter"; + public static readonly string Letter = "letter"; + public static readonly string BinaryDigit = "binary digit"; + public static readonly string OctalDigit = "octal digit"; + public static readonly string DecimalDigit = "decimal digit"; + public static readonly string HexadecimalDigit = "hexadecimal digit"; + public static readonly string Newline = "newline"; + public static readonly string Tab = "tab"; + public static readonly string FloatingPointNumber = "floating-point number"; + public static readonly string Int64 = "integer number (64-bit, signed)"; + public static readonly string Int32 = "integer number (32-bit, signed)"; + public static readonly string Int16 = "integer number (16-bit, signed)"; + public static readonly string Int8 = "integer number (8-bit, signed)"; + public static readonly string UInt64 = "integer number (64-bit, unsigned)"; + public static readonly string UInt32 = "integer number (32-bit, unsigned)"; + public static readonly string UInt16 = "integer number (16-bit, unsigned)"; + public static readonly string UInt8 = "integer number (8-bit, unsigned)"; + + public static readonly string Identifier = "identifier"; + public static readonly string IdentifierContainsInvalidCharacterAtIndicatedPosition = "The identifier contains an invalid character at the indicated position."; + + + public static readonly string NumberOutsideOfDoubleRange = "This number is outside the allowable range for double precision floating-pointer numbers."; + + public static readonly string NumberOutsideOfInt64Range = "This number is outside the allowable range for signed 64-bit integers."; + public static readonly string NumberOutsideOfInt32Range = "This number is outside the allowable range for signed 32-bit integers."; + public static readonly string NumberOutsideOfInt16Range = "This number is outside the allowable range for signed 16-bit integers."; + public static readonly string NumberOutsideOfInt8Range = "This number is outside the allowable range for signed 8-bit integers."; + + public static readonly string NumberOutsideOfUInt64Range = "This number is outside the allowable range for unsigned 64-bit integers."; + public static readonly string NumberOutsideOfUInt32Range = "This number is outside the allowable range for unsigned 32-bit integers."; + public static readonly string NumberOutsideOfUInt16Range = "This number is outside the allowable range for unsigned 16-bit integers."; + public static readonly string NumberOutsideOfUInt8Range = "This number is outside the allowable range for unsigned 8-bit integers."; + + public static readonly string InfixOperator = "infix operator"; + public static readonly string TernaryOperator = "ternary operator"; + public static readonly string PrefixOperator = "prefix operator"; + public static readonly string PostfixOperator = "postfix operator"; + + private static readonly string AnyCharIn1 = "any char in "; + private static readonly string AnyCharIn2 = ""; + + private static readonly string AnyCharNotIn1 = "any char not in "; + private static readonly string AnyCharNotIn2 = ""; + + private static readonly string AnySequenceOfNChars1 = "any sequence of "; + private static readonly string AnySequenceOfNChars2 = " chars"; + + private static readonly string CouldNotFindString1 = "Could not find the string "; + private static readonly string CouldNotFindString2 = "."; + + private static readonly string CouldNotFindCaseInsensitiveString1 = "Could not find the case-insensitive string "; + private static readonly string CouldNotFindCaseInsensitiveString2 = "."; + + private static readonly string StringMatchingRegex1 = "string matching the regex "; + private static readonly string StringMatchingRegex2 = ""; + + private static readonly string ErrorPositionStreamNameFormat = " {0}:"; + private static readonly string ErrorPositionUnaccountedNewlinesFormat = " (+{0})"; + private static readonly string ErrorPositionUtf16ColumnFormat = " (UTF16-Col: {0})"; + private static readonly string ErrorPositionFormat = "Error in{0} Ln: {1}{2} Col: {3}{4}"; + // 0: ErrorPositionStreamName or "" + // 1: line + // 2: ErrorPositionUnaccountedNewlines or "" + // 3: column + // 4: ErrorPositionUtf16Col + + public static string ErrorPosition(Position position) { + var name = string.IsNullOrEmpty(position.StreamName) ? "" : string.Format(ErrorPositionStreamNameFormat, position.StreamName); + return string.Format(ErrorPositionFormat, name, position.Line, "", position.Column, ""); + } + + public static string ErrorPosition(Position position, int unaccountedNewlines, long column, long utf16Column) { + var name = string.IsNullOrEmpty(position.StreamName) ? "" : string.Format(ErrorPositionStreamNameFormat, position.StreamName); + var nlCorrection = unaccountedNewlines == 0 ? "" : string.Format(ErrorPositionUnaccountedNewlinesFormat, unaccountedNewlines); + var utf16Col = column == utf16Column ? "" : string.Format(ErrorPositionUtf16ColumnFormat, utf16Column); + return string.Format(ErrorPositionFormat, name, position.Line, nlCorrection, column, utf16Col); + } + + public static readonly string Note = "Note: "; + public static readonly string Expecting = "Expecting: "; + public static readonly string Unexpected = "Unexpected: "; + public static readonly string Comma = ", "; + public static readonly string Or = " or "; + public static readonly string And = " and "; + private static readonly string CompoundCouldNotBeParsedBecauseFormat = "{0} could not be parsed because: "; + + public static string CompoundCouldNotBeParsedBecause(string compoundLabel) { + return string.Format(CompoundCouldNotBeParsedBecauseFormat, compoundLabel); + } + + public static readonly string ParserBacktrackedAfter = "The parser backtracked after: "; + public static readonly string OtherErrors = "Other error messages: "; + public static readonly string UnknownErrors = "Unknown Error(s)"; + public static readonly string Utf16ColumnCountOnlyCountsEachTabAs1Char = " The UTF-16 column count only counts each tab as 1 char."; + public static readonly string ExactPositionBetweenCaretsDependsOnDisplayUnicodeCapabilities = "The exact error position between the two ^ depends on the unicode capabilities of the display."; + public static readonly string ErrorOccurredAtEndOfInputStream = "The error occurred at the end of the input stream."; + public static readonly string ErrorOccurredOnAnEmptyLine = "The error occurred on an empty line."; + public static readonly string ErrorOccurredAtEndOfLine = "The error occurred at the end of the line."; + public static readonly string ErrorOccurredAtSecondCharInNewline = "The error occured at the 2nd char in the newline char sequence '\r\n'."; + + private static readonly string NonAssociative = "non-associative"; + private static readonly string LeftAssociative = "left-associative"; + private static readonly string RightAssociative = "right-associative"; + + private static readonly string OperatorToStringFormat = "{0} {1} (precedence: {2}{3}{4})"; + // 0: InfixOperator/TernaryOperator/... + // 1: operator strings + // 2: precedence + // 3: Comma if 4 is not empty, otherwise empty + // 4: LeftAssociative/RightAssociative/... or empty if operator is an associative prefix or postfix operator + + // It would be more precise to write "UTF-16 colum" here, + // but that would probably only confuse users in most situations. + private static readonly string RelativePositionOnTheSameLine = "on the same line at column {0}"; + private static readonly string RelativePositionOnPreviousLine = "on the previous line column {0}"; + private static readonly string RelativePositionOnLineAbove = "{0} lines above column {1}"; + private static readonly string RelativePositionOnDifferentLine = "at (Ln: {0}, Col: {1} )"; + private static readonly string RelativePositionInDifferentFile = "at ({0}, Ln: {1}, Col: {2})"; + + private static readonly string OperatorsConflictsFormat = "The {1} conflicts with the {0} {2}."; + // 0: previous operator + // 1: current operator + // 2: relative position of previous operator + + private static readonly string OperatorStringIsRightPartOfTernaryOperatorFormat = "{0} is the right part of the ternary operator {1}. The left part is {2}."; + + + private static readonly string ColumnCountAssumesTabStopDistanceOfNChars1 = "The column count assumes a tab stop distance of "; + private static readonly string ColumnCountAssumesTabStopDistanceOfNChars2 = " chars."; + + private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence1 = "The error occurred at the "; + private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence2 = " char in the combining character sequence "; + private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence3 = "."; + + private static readonly string InputContainsAtLeastNUnaccountedNewlines1 = "The input contains at least "; + private static readonly string InputContainsAtLeastNUnaccountedNewlines2Singular = " newline in the input that wasn't properly registered in the parser stream state."; + private static readonly string InputContainsAtLeastNUnaccountedNewlines2Plural = " newlines in the input that weren't properly registered in the parser stream state."; + + private static readonly string ErrorOccurredAtBeginningOfSurrogatePair1 = "The error occurred at the beginning of the surrogate pair "; + private static readonly string ErrorOccurredAtBeginningOfSurrogatePair2 = "."; + + private static readonly string ErrorOccurredAtSecondCharInSurrogatePair1 = "The error occurred at the second char in the surrogate pair "; + private static readonly string ErrorOccurredAtSecondCharInSurrogatePair2 = "."; + + private static readonly string CharAtErrorPositionIsIsolatedHighSurrogate1 = "The char at the error position ('"; + private static readonly string CharAtErrorPositionIsIsolatedHighSurrogate2 = "') is an isolated high surrogate."; + + private static readonly string CharAtErrorPositionIsIsolatedLowSurrogate1 = "The char at the error position ('"; + private static readonly string CharAtErrorPositionIsIsolatedLowSurrogate2 = "') is an isolated low surrogate."; + + private static readonly string CharBeforeErrorPositionIsIsolatedHighSurrogate1 = "The char before the error position ('"; + private static readonly string CharBeforeErrorPositionIsIsolatedHighSurrogate2 = "') is an isolated high surrogate."; + + private static readonly string CharBeforeErrorPositionIsIsolatedLowSurrogate1 = "The char before the error position ('"; + private static readonly string CharBeforeErrorPositionIsIsolatedLowSurrogate2 = "') is an isolated low surrogate."; + + + public static string AnyCharIn(string chars) { + //return Quote(Strings.AnyCharIn1, chars, Strings.AnyCharIn2); + return Strings.AnyCharIn1 + "‘" + chars + "’" + Strings.AnyCharIn2; // Review: Should we use different quotes if the string contains ‘ or ’ chars? + } + + public static string AnyCharNotIn(string chars) { + //return Quote(Strings.AnyCharNotIn1, chars, Strings.AnyCharNotIn2); + return Strings.AnyCharNotIn1 + "‘" + chars + "’" + Strings.AnyCharNotIn2; + } + + public static string StringMatchingRegex(string regexPattern) { + return Quote(Strings.StringMatchingRegex1, regexPattern, Strings.StringMatchingRegex2); + } + + public static string ExpectedAnySequenceOfNChars(int n) { + return Strings.AnySequenceOfNChars1 + n.ToString() + Strings.AnySequenceOfNChars2; + } + + public static string CouldNotFindString(string str) { + return Quote(Strings.CouldNotFindString1, str, Strings.CouldNotFindString2); + } + + public static string CouldNotFindCaseInsensitiveString(string str) { + return Quote(Strings.CouldNotFindCaseInsensitiveString1, str, Strings.CouldNotFindCaseInsensitiveString2); + } + + internal static string OperatorToString(Operator op) { + var type = op.Type == OperatorType.Infix ? (op.IsTernary ? TernaryOperator : InfixOperator) : + op.Type == OperatorType.Prefix ? PrefixOperator : PostfixOperator; + var opString = op.IsTernary ? Quote(Quote("", op.String, " "), op.TernaryRightString, "") : Quote(op.String); + var comma = op.Type != OperatorType.Infix && op.IsAssociative ? "" : Comma; + var assoc = op.Type != OperatorType.Infix + ? (op.IsAssociative ? "" : NonAssociative) + : (op.Associativity == Associativity.Left ? LeftAssociative : + op.Associativity == Associativity.Right ? RightAssociative : NonAssociative); + return String.Format(OperatorToStringFormat, type, opString, op.Precedence, comma, assoc); + } + + private static string RelativePosition(Position previousPosition, Position currentPosition) { + if (previousPosition.StreamName == currentPosition.StreamName) { + if (previousPosition.Line == currentPosition.Line) + return String.Format(RelativePositionOnTheSameLine, previousPosition.Column); + long diff = currentPosition.Line - previousPosition.Line; + if (diff == 1) + return String.Format(RelativePositionOnPreviousLine, previousPosition.Column); + if (diff <= 3) + return String.Format(RelativePositionOnLineAbove, diff, previousPosition.Column); + return String.Format(RelativePositionOnDifferentLine, previousPosition.Line, previousPosition.Column); + } + return String.Format(RelativePositionInDifferentFile, Quote(previousPosition.StreamName), previousPosition.Line, previousPosition.Column); + } + + public static string OperatorsConflict(Position previousPosition, Operator previousOperator, + Position currentPosition, Operator currentOperator) + { + var prevOpString = OperatorToString(previousOperator); + var currentOpString = OperatorToString(currentOperator); + var relativePosition = RelativePosition(previousPosition, currentPosition); + return String.Format(OperatorsConflictsFormat, prevOpString, currentOpString, relativePosition); + } + + public static string OperatorStringIsRightPartOfTernaryOperator(Position position1, Position position2, Operator op) { + return String.Format(OperatorStringIsRightPartOfTernaryOperatorFormat, + Quote(op.TernaryRightString), + Quote(Quote("", op.String, " "), op.TernaryRightString, ""), + RelativePosition(position1, position2)); + } + + public static string ColumnCountAssumesTabStopDistanceOfNChars(int n) { + return ColumnCountAssumesTabStopDistanceOfNChars1 + n.ToString() + ColumnCountAssumesTabStopDistanceOfNChars2; + } + + public static string ErrorOccurredAtNthCharInCombiningCharacterSequence(int n, string textElement) { + return AsciiQuote(ErrorOccurredAtNthCharInCombiningCharacterSequence1 + n.ToString() + OrdinalEnding(n) + ErrorOccurredAtNthCharInCombiningCharacterSequence2, + textElement, + ErrorOccurredAtNthCharInCombiningCharacterSequence3); + } + + public static string InputContainsAtLeastNUnaccountedNewlines(int n) { + return InputContainsAtLeastNUnaccountedNewlines1 + n.ToString() + (n == 1 ? InputContainsAtLeastNUnaccountedNewlines2Singular + : InputContainsAtLeastNUnaccountedNewlines2Plural); + } + + public static string ErrorOccurredAtBeginningOfSurrogatePair(string surrogatePair) { + return AsciiQuote(ErrorOccurredAtBeginningOfSurrogatePair1, surrogatePair, ErrorOccurredAtBeginningOfSurrogatePair2); + } + + + public static string ErrorOccurredAtSecondCharInSurrogatePair(string surrogatePair) { + return AsciiQuote(ErrorOccurredAtSecondCharInSurrogatePair1, surrogatePair, ErrorOccurredAtSecondCharInSurrogatePair2); + } + + + public static string CharAtErrorPositionIsIsolatedHighSurrogate(char ch) { + return CharAtErrorPositionIsIsolatedHighSurrogate1 + Text.HexEscape(ch) + CharAtErrorPositionIsIsolatedHighSurrogate2; + } + + public static string CharAtErrorPositionIsIsolatedLowSurrogate(char ch) { + return CharAtErrorPositionIsIsolatedLowSurrogate1 + Text.HexEscape(ch) + CharAtErrorPositionIsIsolatedLowSurrogate2; + } + + public static string CharBeforeErrorPositionIsIsolatedHighSurrogate(char ch) { + return CharBeforeErrorPositionIsIsolatedHighSurrogate1 + Text.HexEscape(ch) + CharBeforeErrorPositionIsIsolatedHighSurrogate2; + } + + public static string CharBeforeErrorPositionIsIsolatedLowSurrogate(char ch) { + return CharBeforeErrorPositionIsIsolatedLowSurrogate1 + Text.HexEscape(ch) + CharBeforeErrorPositionIsIsolatedLowSurrogate2; + } + + +} + +} + diff --git a/src/FParsecCS/Text.cs b/src/FParsecCS/Text.cs new file mode 100644 index 0000000..21c5ced --- /dev/null +++ b/src/FParsecCS/Text.cs @@ -0,0 +1,679 @@ +// Copyright (c) Stephan Tolksdorf 2009-2010 +// License: Simplified BSD License. See accompanying documentation. + +using System; +using System.Globalization; +using System.Text; +using System.Diagnostics; +using System.Runtime.InteropServices; + +using Microsoft.FSharp.Core; + +using FParsec; + +namespace FParsec { + +public static class Text { + +/// Detects the presence of an encoding preamble in the first count bytes of the byte buffer. +/// If detectEncoding is false, this function only searches for the preamble of the given default encoding, +/// otherwise also for any of the standard unicode byte order marks (UTF-8, UTF-16 LE/BE, UTF-32 LE/BE). +/// If an encoding different from the given default encoding is detected, the new encoding +/// is assigned to the encoding reference. +/// Returns the number of bytes in the detected preamble, or 0 if no preamble is detected. +/// +internal static int DetectPreamble(byte[] buffer, int count, ref Encoding encoding, bool detectEncoding) { + Debug.Assert(count >= 0); + if (detectEncoding && count >= 2) { + switch (buffer[0]) { + case 0xEF: + if (buffer[1] == 0xBB && count > 2 && buffer[2] == 0xBF) { + #if !PCL + if (encoding.CodePage != 65001) + #else + if (encoding.WebName != "utf-8") + #endif + encoding = Encoding.UTF8; + return 3; + } + break; + case 0xFE: + if (buffer[1] == 0xFF) { + #if !PCL + if (encoding.CodePage != 1201) + #else + if (encoding.WebName != "utf-16BE") + #endif + encoding = Encoding.BigEndianUnicode; + return 2; + } + break; + case 0xFF: + if (buffer[1] == 0xFE) { + if (count >= 4 && buffer[2] == 0x00 && buffer[3] == 0x00) { + #if !PCL + if (encoding.CodePage != 12000) + encoding = Encoding.UTF32; // UTF-32 little endian + #else + if (encoding.WebName != "utf-32") { + try { + encoding = Encoding.GetEncoding("utf-32"); + } catch { + throw new NotSupportedException("An UTF-32 input encoding was detected, which is not supported on this system."); + } + } + #endif + return 4; + } else { + #if !PCL + if (encoding.CodePage != 1200) + #else + if (encoding.WebName != "utf-16") + #endif + encoding = Encoding.Unicode; // UTF-16 little endian + return 2; + } + } + break; + case 0x00: + if (buffer[1] == 0x00 && count >= 4 && buffer[2] == 0xFE && buffer[3] == 0xFF) { + #if !PCL + if (encoding.CodePage != 12001) + encoding = new UTF32Encoding(true, true); // UTF-32 big endian + #else + if (encoding.WebName != "utf-32BE") { + try { + encoding = Encoding.GetEncoding("utf-32BE"); + } catch { + throw new NotSupportedException("An UTF-32 (big endian) input encoding was detected, which is not supported on this system."); + } + } + #endif + return 4; + } + break; + } + } + byte[] preamble = encoding.GetPreamble(); + if (preamble.Length > 0 && count >= preamble.Length) { + int i = 0; + while (buffer[i] == preamble[i]) { + if (++i == preamble.Length) return preamble.Length; + } + } + return 0; +} + +#if !LOW_TRUST +/// Reads all remaining chars into the given buffer. If the remaining stream +/// content holds more than the given maximum number of chars, an exception will be thrown. +internal unsafe static int ReadAllRemainingCharsFromStream(char* buffer, int maxCount, byte[] byteBuffer, int byteBufferIndex, int byteBufferCount, System.IO.Stream stream, long streamPosition, Decoder decoder, bool flush) { + Debug.Assert(maxCount > 0 && byteBufferIndex >= 0 && byteBufferIndex < byteBufferCount); + fixed (byte* pByteBuffer = byteBuffer) { + int bufferCount = 0; + for (;;) { + try { + bufferCount += decoder.GetChars(pByteBuffer + byteBufferIndex, byteBufferCount - byteBufferIndex, + buffer + bufferCount, maxCount - bufferCount, flush); + } catch (DecoderFallbackException e) { + e.Data.Add("Stream.Position", streamPosition - (byteBufferCount - byteBufferIndex) + e.Index); + throw; + } + if (flush) break; + byteBufferIndex = 0; // GetChars consumed all bytes in the byte buffer + byteBufferCount = stream.Read(byteBuffer, 0, byteBuffer.Length); + streamPosition += byteBufferCount; + flush = byteBufferCount == 0; + } + return bufferCount; + } +} +#endif + + +/// Returns a case-folded copy of the string argument. All chars are mapped +/// using the (non-Turkic) 1-to-1 case folding mappings (v. 6.0) for Unicode code +/// points in the Basic Multilingual Plane, i.e. code points below 0x10000. +/// If the argument is null, null is returned. +#if LOW_TRUST +static public string FoldCase(string str) { + char[] cftable = CaseFoldTable.FoldedChars; + if (str != null) { + for (int i = 0; i < str.Length; ++i) { + char c = str[i]; + char cfc = cftable[c]; + if (c != cfc) { + StringBuilder sb = new StringBuilder(str); + sb[i++] = cfc; + for (; i < str.Length; ++i) { + c = str[i]; + cfc = cftable[c]; + if (c != cfc) sb[i] = cfc; + } + return sb.ToString(); + } + } + } + return str; +} +#else +static unsafe public string FoldCase(string str) { + if (str != null) { + fixed (char* src0 = str) { + char* end = src0 + str.Length; + char* cftable = CaseFoldTable.FoldedChars; + char* src = src0; + for (;;) { // src is null-terminated, so we can always read one char + char c = *src; + if (c == cftable[c]) { + if (++src >= end) break; + } else { + string newString = new String('\u0000', str.Length); + fixed (char* dst_ = newString) { + src = src0; + char* dst = dst_; + do { + *dst = cftable[*src]; + ++src; ++dst; + } while (src != end); + } + return newString; + } + } + } + } + return str; +} +#endif + +#if !LOW_TRUST + unsafe +#endif +static public char FoldCase(char ch) { + return CaseFoldTable.FoldedChars[ch]; +} + +internal static int FindNewlineOrEOSChar(string str) { + int i; + for (i = 0; i < str.Length; ++i) { + char c = str[i]; + // '\n' = '\u000A', '\r' = '\u000D' + if (unchecked((uint)c - 0xEu) < 0xFFFFu - 0xEu) continue; + if (c == '\n' || c == '\r' || c == '\uffff') goto Return; + } + i = -1; +Return: + return i; +} + +/// Returns the given string with all occurrences of "\r\n" and "\r" replaced +/// by "\n". If the argument is null, null is returned. +#if LOW_TRUST +static public string NormalizeNewlines(string str) { + if (str == null || str.Length == 0) return str; + int nCR = 0; + int nCRLF = 0; + for (int i = 0; i < str.Length; ++i) { + if (str[i] == '\r') { + if (i + 1 < str.Length && str[i + 1] == '\n') ++nCRLF; + else ++nCR; + } + } + if (nCRLF == 0) { + return nCR == 0 ? str : str.Replace('\r', '\n'); + } else { + return CopyWithNormalizedNewlines(str, 0, str.Length, nCRLF, nCR); + } +} +static internal string CopyWithNormalizedNewlines(string src, int index, int length, int nCRLF, int nCR) { + Debug.Assert(length > 0 && nCRLF >= 0 && nCR >= 0 && (nCRLF | nCR) != 0); + if (nCRLF != 0) { + StringBuilder sb = new StringBuilder(length - nCRLF); + int end = index + length; + int i0 = index; + if (nCR == 0) { + int nn = nCRLF; + int i = index; + for (;;) { + char c = src[i++]; + if (c == '\r') { + sb.Append(src, i0, i - i0 - 1).Append('\n'); + ++i; // skip over the '\n' in "\r\n" + i0 = i; + if (--nn == 0) break; + } + } + } else { + int nn = nCRLF + nCR; + int i = index; + for (;;) { + char c = src[i++]; + if (c == '\r') { + sb.Append(src, i0, i - i0 - 1).Append('\n'); + if (i < end && src[i] == '\n') ++i; // skip over the '\n' in "\r\n" + i0 = i; + if (--nn == 0) break; + } + } + } + if (i0 < end) sb.Append(src, i0, end - i0); + return sb.ToString(); + } else { + return new StringBuilder(src, index, length, length).Replace('\r', '\n').ToString(); + } +} +#else +static unsafe public string NormalizeNewlines(string str) { + int length; + if (str == null || (length = str.Length) == 0) return str; + fixed (char* src = str) { // the char buffer is guaranteed to be null-terminated (C# language specification on fixed statement) + int nCR = 0; + int nCRLF = 0; + for (int i = 0; i < length; ++i) { + if (src[i] == '\r') { + if (src[i + 1] == '\n') ++nCRLF; // relies on null-termination + else ++nCR; + } + } + if (nCRLF == 0) { + return nCR == 0 ? str : str.Replace('\r', '\n'); + } else { + return CopyWithNormalizedNewlines(src, length, nCRLF, nCR); + } + } +} +static unsafe internal string CopyWithNormalizedNewlines(char* src, int length, int nCRLF, int nCR) { + Debug.Assert(length > 0 && nCRLF >= 0 && nCR >= 0 && (nCRLF | nCR) != 0); + string newString = new String('\n', length - nCRLF); + fixed (char* dst_ = newString) { + char* dst = dst_; + char* end = src + length; + if (nCRLF != 0) { + if (nCR == 0) { + int nn = nCRLF; + for (;;) { + char c = *src; + ++src; + if (c != '\r') { + *dst = c; + ++dst; + } else { + ++src; // skip over the '\n' in "\r\n" + *dst = '\n'; + ++dst;; + if (--nn == 0) break; + } + } + } else { + int nn = nCRLF + nCR; + for (;;) { + char c = *src; + ++src; + if (c != '\r') { + *dst = c; + ++dst; + } else { + if (*src == '\n') ++src; // skip over the '\n' in "\r\n" (relies on null-termination) + *dst = '\n'; + ++dst; + if (--nn == 0) break; + } + } + } + } else { + int nn = nCR; + for (;;) { + char c = *src; + ++src; + if (c != '\r') { + *dst = c; + ++dst; + } else { + *dst = '\n'; + ++dst; + if (--nn == 0) break; + } + } + } + // copy remaining chars + #if UNALIGNED_READS + if (src != end) { + uint len = Buffer.PositiveDistance(src, end); + if ((unchecked((int)dst) & 2) != 0) { // align dest + *dst = *src; + ++src; ++dst; --len; + } + while (len >= 8) { + ((int*)dst)[0] = ((int*)src)[0]; + ((int*)dst)[1] = ((int*)src)[1]; + ((int*)dst)[2] = ((int*)src)[2]; + ((int*)dst)[3] = ((int*)src)[3]; + src += 8; dst += 8; len -= 8; + } + if ((len & 4) != 0) { + ((int*)dst)[0] = ((int*)src)[0]; + ((int*)dst)[1] = ((int*)src)[1]; + src += 4; dst += 4; + } + if ((len & 2) != 0) { + ((int*)dst)[0] = ((int*)src)[0]; + src += 2; dst += 2; + } + if ((len & 1) != 0) { + *dst = *src; + } + } + #else + while (src < end) { + *dst = *src; + ++src; ++dst; + } + #endif + } + return newString; +} +#endif + + +/// A faster implementation of System.Globalization.StringInfo(str).LengthInTextElements. +public static int CountTextElements(string str) { + int count = 0; + int end = str.Length; + int i = 0; + for (;;) { + SkipBaseCharacter: + if (i >= end) break; + char c = str[i]; + ++i; + ++count; + if (c < ' ') continue; // control char + if (c > '~') { + var uc = CharUnicodeInfo.GetUnicodeCategory(c); + Switch: + switch (uc) { + case UnicodeCategory.Surrogate: + uc = CharUnicodeInfo.GetUnicodeCategory(str, i - 1); + if (uc == UnicodeCategory.Surrogate) continue; + ++i; + goto Switch; + case UnicodeCategory.NonSpacingMark: + case UnicodeCategory.SpacingCombiningMark: + case UnicodeCategory.EnclosingMark: + case UnicodeCategory.Control: + case UnicodeCategory.Format: + case UnicodeCategory.OtherNotAssigned: + continue; + // adding these cases to the default branch prevents + // the MS C# compiler from splitting the jump table + case UnicodeCategory.OtherNumber: + case UnicodeCategory.DashPunctuation: + case UnicodeCategory.ClosePunctuation: + case UnicodeCategory.InitialQuotePunctuation: + case UnicodeCategory.OtherPunctuation: + case UnicodeCategory.ModifierSymbol: + default: + break; // exits the switch, not the loop + } + } + // SkipMoreBaseCharactersOrCombiningMarks: + for (;;) { + if (i >= end) break; + c = str[i]; + ++i; + if (c >= ' ') { + if (c <= '~') { + ++count; + continue; + } + } else { // control char + ++count; + goto SkipBaseCharacter; + } + var uc = CharUnicodeInfo.GetUnicodeCategory(c); + Switch: + switch (uc) { + case UnicodeCategory.NonSpacingMark: + case UnicodeCategory.SpacingCombiningMark: + case UnicodeCategory.EnclosingMark: + continue; + case UnicodeCategory.Surrogate: + uc = CharUnicodeInfo.GetUnicodeCategory(str, i - 1); + if (uc != UnicodeCategory.Surrogate) { + ++i; + goto Switch; + } + ++count; + goto SkipBaseCharacter; + case UnicodeCategory.Control: + case UnicodeCategory.Format: + case UnicodeCategory.OtherNotAssigned: + ++count; + goto SkipBaseCharacter; + // adding these cases to the default branch prevents + // the MS C# compiler from splitting the jump table + case UnicodeCategory.OtherNumber: + case UnicodeCategory.DashPunctuation: + case UnicodeCategory.ClosePunctuation: + case UnicodeCategory.InitialQuotePunctuation: + case UnicodeCategory.OtherPunctuation: + case UnicodeCategory.ModifierSymbol: + default: + ++count; + continue; + } + } + break; + } + return count; +} + +// Apparently System.Char.Is(High|Low)Surrogate is not safe for consumption by Silverlight developers + +public static bool IsSurrogate(char ch) { return (ch & 0xF800) == 0xD800; } +public static bool IsHighSurrogate(char ch) { return (ch & 0xFC00) == 0xD800; } +public static bool IsLowSurrogate(char ch) { return (ch & 0xFC00) == 0xDC00; } + +#if LOW_TRUST + +public static bool IsWhitespace(char ch) { + return System.Char.IsWhiteSpace(ch); +} + +#else + +internal unsafe struct IsWhitespaceHelper { + + // we use the same data structure and algorithm as for IdentifierValidator + + private static readonly byte[] DataArray = { + 0,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,3,1,1,1, + 1,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,0,1,2,2,3,1,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,4,5,6,2, + 2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,0,62,0,0,1,0,0,0,0,0,0,0, + 32,0,0,0,255,7,0,0,0,131,0,0,0,0,0,128, + }; + + private const int Table1Offset = 0; + private const int Table1Size = 128; + private const int Table1Log2Length = 7; + private const int Table2Offset = 128; + private const int Table2Size = 80; + private const int Table2Log2BlockLength = 4; + private const int Table3Offset = Table2Offset + Table2Size; + private const int Table3Size = 28; + private const int Table3Log2BlockLength = 5; + +#if LOW_TRUST + private static readonly byte[] Table1 = BufferHelpers.CopySubarray(DataArray, 0, Table2Offset); + private static readonly byte[] Table2 = BufferHelpers.CopySubarray(DataArray, Table2Offset, Table2Size); + private static readonly uint[] Table3 = BufferHelpers.CopyUIntsStoredInLittleEndianByteArray(DataArray, Table3Offset, Table3Size); +#else + private static readonly byte* Data = LoadDataArrayIntoFixedBuffer(); + private static readonly byte* Table1 = Data + Table1Offset; + private static readonly byte* Table2 = Data + Table2Offset; + private static readonly uint* Table3 = (uint*)(Data + Table3Offset); + + private static byte* LoadDataArrayIntoFixedBuffer() { + var buffer = UnmanagedMemoryPool.Allocate(DataArray.Length); + Marshal.Copy(DataArray, 0, buffer, DataArray.Length); + Debug.Assert(Table3Size%sizeof(uint) == 0); + if (!System.BitConverter.IsLittleEndian) + Buffer.SwapByteOrder((uint*)((byte*)buffer + Table3Offset), Table3Size/sizeof(uint)); + return (byte*)buffer; + } +#endif + + public static uint IsWhitespace_(char ch) { + uint cp = (uint)ch; + uint idx1 = cp >> (Table2Log2BlockLength + Table3Log2BlockLength); + const uint f2 = 1u << Table2Log2BlockLength; + const uint m2 = f2 - 1; + uint idx2 = Table1[idx1]*f2 + ((cp >> Table3Log2BlockLength) & m2); + uint idx3 = Table2[idx2]; + return Table3[idx3] >> (int)(cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too + } +} + +/// A faster implementation of System.Char.IsWhiteSpace. +public static bool IsWhitespace(char ch) { // should get inlined + return (IsWhitespaceHelper.IsWhitespace_(ch) & 1u) != 0; +} + +#endif + +#if !LOW_TRUST + unsafe +#endif +internal static string HexEscape(char c) { +#if LOW_TRUST + char[] cs = new char[6]; +#else + char* cs = stackalloc char[6]; +#endif + cs[0] = '\\'; + cs[1] = 'u'; + int n = c; + for (int j = 0; j < 4; ++j) { + cs[5 - j] = "0123456789abcdef"[n & 0xf]; + n >>= 4; + } + return new string(cs, 0, 6); +} + +internal static string EscapeChar(char c) { + switch (c) { + case '\\': return "\\\\"; + case '\'': return "\\\'"; + case '\"': return "\\\""; + case '\r': return "\\r"; + case '\n': return "\\n"; + case '\t': return "\\t"; + case '\f': return "\\f"; + case '\v': return "\\v"; + case '\a': return "\\a"; + case '\b': return "\\b"; + default: return HexEscape(c); + } +} + +#if !LOW_TRUST + unsafe +#endif +internal static string Concat(string str0, string str1, string str2, string str3, string str4) { +#if LOW_TRUST + return str0 + str1 + str2 + str3 + str4; +#else + int length = str0.Length + str1.Length + str2.Length + str3.Length + str4.Length; + var str = new string('\u0000', length); + fixed (char* pStr = str) { + int i = 0; + for (int j = 0; j < str0.Length; ++i, ++j) pStr[i] = str0[j]; + for (int j = 0; j < str1.Length; ++i, ++j) pStr[i] = str1[j]; + for (int j = 0; j < str2.Length; ++i, ++j) pStr[i] = str2[j]; + for (int j = 0; j < str3.Length; ++i, ++j) pStr[i] = str3[j]; + for (int j = 0; j < str4.Length; ++i, ++j) pStr[i] = str4[j]; + } + return str; +#endif +} + +internal static string Escape(string str, string prefix1, string prefix2, string postfix1, string postfix2, char escapedQuoteChar) { + Debug.Assert(str != null && prefix1 != null && prefix2 != null && postfix1 != null && postfix2 != null); + StringBuilder sb = null; + int i0 = 0; + int i = 0; + for (;;) { + if (i >= str.Length) break; + char c = str[i]; + ++i; + if (c > '\'' && c < '\u007f') { + if (c != '\\') continue; + } else if (c == ' ' || ( !Char.IsControl(c) && c != escapedQuoteChar + && (c < '\u2028' || c > '\u2029'))) continue; + if ((object)sb == null) { + sb = new StringBuilder(str.Length + prefix1.Length + prefix2.Length + postfix1.Length + postfix2.Length + 8); + sb.Append(prefix1).Append(prefix2); + } + int n = i - i0 - 1; + if (n != 0) sb.Append(str, i0, n); + i0 = i; + sb.Append(EscapeChar(c)); + } + if ((object)sb == null) return Concat(prefix1, prefix2, str, postfix1, postfix2); + if (i0 != i) sb.Append(str, i0, i - i0); + return sb.Append(postfix1).Append(postfix2).ToString(); +} + +internal static string AsciiEscape(string str, string prefix1, string prefix2, string postfix1, string postfix2, char escapedQuoteChar) { + Debug.Assert(str != null && prefix1 != null && prefix2 != null && postfix1 != null && postfix2 != null); + StringBuilder sb = null; + int i0 = 0; + int i = 0; + for (;;) { + if (i >= str.Length) break; + char c = str[i]; + ++i; + if (c > '\'' && c < '\u007f') { + if (c != '\\') continue; + } else if (c == ' ' || (c >= ' ' && c <= '\'' && c != escapedQuoteChar)) continue; + if ((object)sb == null) { + sb = new StringBuilder(str.Length + prefix1.Length + prefix2.Length + postfix1.Length + postfix2.Length + 8); + sb.Append(prefix1).Append(prefix2); + } + int n = i - i0 - 1; + if (n != 0) sb.Append(str, i0, n); + i0 = i; + sb.Append(EscapeChar(c)); + } + if ((object)sb == null) return Concat(prefix1, prefix2, str, postfix1, postfix2); + if (i0 != i) sb.Append(str, i0, i - i0); + return sb.Append(postfix1).Append(postfix2).ToString(); +} + +internal static string SingleQuote(string str) { + return Escape(str, "", "'", "'", "", '\''); +} + +internal static string SingleQuote(string prefix, string str, string postfix) { + return Escape(str, prefix, "'", "'", postfix, '\''); +} + +internal static string DoubleQuote(string str) { + return Escape(str, "", "\"", "\"", "", '"'); +} + +internal static string DoubleQuote(string prefix, string str, string postfix) { + return Escape(str, prefix, "\"", "\"", postfix, '"'); +} + + +} // class Text + +} \ No newline at end of file diff --git a/src/FParsecCS/UnmanagedMemoryPool.cs b/src/FParsecCS/UnmanagedMemoryPool.cs new file mode 100644 index 0000000..d5caa37 --- /dev/null +++ b/src/FParsecCS/UnmanagedMemoryPool.cs @@ -0,0 +1,40 @@ +// Copyright (c) Stephan Tolksdorf 2010 +// License: Simplified BSD License. See accompanying documentation. + +#if !LOW_TRUST + +using System; +using System.Runtime.InteropServices; +using System.Collections.Generic; + +namespace FParsec { + +/// +/// Allocates and keeps references to chunks of unmanaged memory that we +/// intend to keep around for the lifetime of the AppDomain. +/// +internal sealed class UnmanagedMemoryPool { + private static List Handles = new List(); + + static public IntPtr Allocate(int size) { + lock (Handles) { + var h = Marshal.AllocHGlobal(size); + Handles.Add(h); + return h; + } + } + + // implementation of a "static finalizer" + private UnmanagedMemoryPool() { } + private static readonly UnmanagedMemoryPool Instance = new UnmanagedMemoryPool(); + ~UnmanagedMemoryPool() { + var hs = Handles; + Handles = null; + foreach (var h in hs) + Marshal.FreeHGlobal(h); + } +} + +} + +#endif \ No newline at end of file diff --git a/src/NpgsqlFSharpAnalyzer.Core/NpgsqlFSharpAnalyzer.Core.fsproj b/src/NpgsqlFSharpAnalyzer.Core/NpgsqlFSharpAnalyzer.Core.fsproj index 3581f13..306396a 100644 --- a/src/NpgsqlFSharpAnalyzer.Core/NpgsqlFSharpAnalyzer.Core.fsproj +++ b/src/NpgsqlFSharpAnalyzer.Core/NpgsqlFSharpAnalyzer.Core.fsproj @@ -15,6 +15,7 @@ + diff --git a/src/NpgsqlFSharpAnalyzer.Core/SqlAnalysis.fs b/src/NpgsqlFSharpAnalyzer.Core/SqlAnalysis.fs index 2e20335..25bdf24 100644 --- a/src/NpgsqlFSharpAnalyzer.Core/SqlAnalysis.fs +++ b/src/NpgsqlFSharpAnalyzer.Core/SqlAnalysis.fs @@ -5,6 +5,7 @@ open FSharp.Compiler.Range open F23.StringSimilarity open NpgsqlFSharpParser open InformationSchema +open Npgsql module SqlAnalysis = @@ -309,8 +310,10 @@ module SqlAnalysis = let parametersWithNullability = determineParameterNullability parameters dbSchemaLookups commandText Result.Ok (parametersWithNullability, output) with - | ex -> - Result.Error ex.Message + | :? PostgresException as databaseError -> + Result.Error databaseError.Message + | error -> + Result.Error (sprintf "%s\n%s" error.Message error.StackTrace) let createWarning (message: string) (range: range) : Message = { Message = message; diff --git a/src/NpgsqlFSharpParser/NpgsqlFSharpParser.fsproj b/src/NpgsqlFSharpParser/NpgsqlFSharpParser.fsproj index 775ef00..69e2431 100644 --- a/src/NpgsqlFSharpParser/NpgsqlFSharpParser.fsproj +++ b/src/NpgsqlFSharpParser/NpgsqlFSharpParser.fsproj @@ -2,6 +2,7 @@ netstandard2.0 + true @@ -10,7 +11,11 @@ - + + + + + diff --git a/src/NpgsqlFSharpVs/NpgsqlFSharpVs.csproj b/src/NpgsqlFSharpVs/NpgsqlFSharpVs.csproj index b08fc8c..6ccbe98 100644 --- a/src/NpgsqlFSharpVs/NpgsqlFSharpVs.csproj +++ b/src/NpgsqlFSharpVs/NpgsqlFSharpVs.csproj @@ -6,6 +6,9 @@ 3 + + false + Debug @@ -106,6 +109,9 @@ 0.43.0 + + 4.7.2 + compile; build; native; contentfiles; analyzers; buildtransitive @@ -130,6 +136,14 @@ + + {c5eb813f-4278-4ee7-925b-6757bad0fe9b} + FParsecCS + + + {9c8e7641-9dc8-470c-8009-71a747c01dc5} + FParsec + {5964bb56-97b8-4fae-9933-8113db11438d} NpgsqlFSharpAnalyzer.Core @@ -141,16 +155,8 @@ - - \ No newline at end of file diff --git a/src/NpgsqlFSharpVs/paket.references b/src/NpgsqlFSharpVs/paket.references index e69de29..6f627f4 100644 --- a/src/NpgsqlFSharpVs/paket.references +++ b/src/NpgsqlFSharpVs/paket.references @@ -0,0 +1 @@ +FSharp.Core diff --git a/tests/NpgsqlFSharpAnalyzer.Tests/NpgsqlFSharpAnalyzer.Tests.fsproj b/tests/NpgsqlFSharpAnalyzer.Tests/NpgsqlFSharpAnalyzer.Tests.fsproj index cc36c7c..8d8e0bc 100644 --- a/tests/NpgsqlFSharpAnalyzer.Tests/NpgsqlFSharpAnalyzer.Tests.fsproj +++ b/tests/NpgsqlFSharpAnalyzer.Tests/NpgsqlFSharpAnalyzer.Tests.fsproj @@ -22,7 +22,6 @@ -