From 95233e9f4f4825863d1ab932ce633e2c9a8fb918 Mon Sep 17 00:00:00 2001 From: David Millington Date: Tue, 4 Aug 2015 14:20:27 +0200 Subject: [PATCH 01/10] Update README.md --- README.md | 81 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index fb05af21..09d3b115 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,9 @@ ### Abstract Syntax Tree Builder for Delphi With DelphiAST you can take real Delphi code and get an abstract syntax tree. One unit at time and without a symbol table though. -FreePascal and Lazarus compatible. +This is forked from https://github.com/RomanYankovsky/DelphiAST -#### Sample input -```delphi -unit Unit1; - -interface - -uses - Unit2; - -function Sum(A, B: Integer): Integer; - -implementation - -function Sum(A, B: Integer): Integer; -begin - Result := A + B; -end; - -end. -``` - -#### Sample outcome -```xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -``` +Please use that branch, not this one. #### Copyright Copyright (c) 2014-2015 Roman Yankovsky (roman@yankovsky.me) From e6baee29d27b3427ddb4cb6b42ed81870f895cf4 Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 15:57:52 +0300 Subject: [PATCH 02/10] Re-adding string caching (memory optimization, for memory usage and also very importantly memory fragmentation). Added StringCache.pas. Changed attributes to use the string cache. Note required a define USESTRINGCACHE which is off by default. The cache is not threadsafe. --- Source/DelphiAST.Classes.pas | 44 ++- Source/DelphiAST.Writer.pas | 12 +- .../SimpleParser/SimpleParser.StringCache.pas | 365 ++++++++++++++++++ 3 files changed, 413 insertions(+), 8 deletions(-) create mode 100644 Source/SimpleParser/SimpleParser.StringCache.pas diff --git a/Source/DelphiAST.Classes.pas b/Source/DelphiAST.Classes.pas index fbccda28..7ede2c9c 100644 --- a/Source/DelphiAST.Classes.pas +++ b/Source/DelphiAST.Classes.pas @@ -5,7 +5,8 @@ interface uses - SysUtils, Generics.Collections, SimpleParser.Lexer.Types, DelphiAST.Consts; + SysUtils, Generics.Collections, SimpleParser.Lexer.Types, DelphiAST.Consts + {$ifdef USESTRINGCACHE}, SimpleParser.StringCache{$endif}; type EParserException = class(Exception) @@ -19,8 +20,14 @@ EParserException = class(Exception) property Line: Integer read FLine; property Col: Integer read FCol; end; + + {$ifdef USESTRINGCACHE} + TAttributeEntryValue = TStringId; + {$else} + TAttributeEntryValue = string; + {$endif} - TAttributeEntry = TPair; + TAttributeEntry = TPair; PAttributeEntry = ^TAttributeEntry; TSyntaxNodeClass = class of TSyntaxNode; @@ -32,6 +39,8 @@ TSyntaxNode = class function GetHasChildren: Boolean; function GetHasAttributes: Boolean; function TryGetAttributeEntry(const Key: TAttributeName; var AttributeEntry: PAttributeEntry): boolean; + procedure SetAttributeInternal(const Key: TAttributeName; const Value: TAttributeEntryValue); + {$ifdef USESTRINGCACHE}procedure SetAttribute(const Key: TAttributeName; const Value: TStringId); overload;{$endif} protected FAttributes: TArray; FChildNodes: TArray; @@ -45,7 +54,7 @@ TSyntaxNode = class function GetAttribute(const Key: TAttributeName): string; function HasAttribute(const Key: TAttributeName): Boolean; - procedure SetAttribute(const Key: TAttributeName; const Value: string); + procedure SetAttribute(const Key: TAttributeName; const Value: string); {$ifdef USESTRINGCACHE}overload;{$endif} procedure ClearAttributes; function AddChild(Node: TSyntaxNode): TSyntaxNode; overload; @@ -360,6 +369,27 @@ class procedure TExpressionTools.RawNodeListToTree(RawParentNode: TSyntaxNode; R { TSyntaxNode } procedure TSyntaxNode.SetAttribute(const Key: TAttributeName; const Value: string); +{$ifdef USESTRINGCACHE} + var + NewValue : TAttributeEntryValue; +{$endif} +begin + {$ifdef USESTRINGCACHE} + NewValue := TStringCache.Instance.Add(Value); + SetAttributeInternal(Key, NewValue); + {$else} + SetAttributeInternal(Key, Value); + {$endif} +end; + +{$ifdef USESTRINGCACHE} + procedure TSyntaxNode.SetAttribute(const Key: TAttributeName; const Value: TStringId); + begin + SetAttributeInternal(Key, Value); + end; +{$endif} + +procedure TSyntaxNode.SetAttributeInternal(const Key: TAttributeName; const Value: TAttributeEntryValue); var AttributeEntry: PAttributeEntry; NewAttributeEntry: TAttributeEntry; @@ -409,7 +439,7 @@ function TSyntaxNode.AddChild(Typ: TSyntaxNodeType): TSyntaxNode; function TSyntaxNode.Clone: TSyntaxNode; var ChildNode: TSyntaxNode; - Attr: TPair; + Attr: TPair; begin Result := TSyntaxNodeClass(Self.ClassType).Create(FTyp); @@ -489,7 +519,11 @@ function TSyntaxNode.GetAttribute(const Key: TAttributeName): string; AttributeEntry: PAttributeEntry; begin if TryGetAttributeEntry(Key, AttributeEntry) then - Result := AttributeEntry.Value + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.Get(AttributeEntry.Value) + {$else} + Result := AttributeEntry.Value + {$endif} else Result := ''; end; diff --git a/Source/DelphiAST.Writer.pas b/Source/DelphiAST.Writer.pas index 144fdc18..aaae77b8 100644 --- a/Source/DelphiAST.Writer.pas +++ b/Source/DelphiAST.Writer.pas @@ -21,7 +21,7 @@ TSyntaxTreeWriter = class implementation uses - Generics.Collections, DelphiAST.Consts; + Generics.Collections, DelphiAST.Consts, SimpleParser.StringCache; {$I SimpleParser.inc} {$IFDEF D18_NEWER} @@ -64,7 +64,7 @@ class procedure TSyntaxTreeWriter.NodeToXML(const Builder: TStringBuilder; var HasChildren: Boolean; NewIndent: string; - Attr: TPair; + Attr: TPair; ChildNode: TSyntaxNode; begin HasChildren := Node.HasChildren; @@ -94,7 +94,13 @@ class procedure TSyntaxTreeWriter.NodeToXML(const Builder: TStringBuilder; Builder.Append(' value="' + XMLEncode(TValuedSyntaxNode(Node).Value) + '"'); for Attr in Node.Attributes do - Builder.Append(' ' + AttributeNameToStr(Attr.Key) + '="' + XMLEncode(Attr.Value) + '"'); + Builder.Append(' ' + AttributeNameToStr(Attr.Key) + '="' + {$ifdef USESTRINGCACHE} + + XMLEncode(TStringCache.Instance.Get(Attr.Value)) + {$else} + + XMLEncode(Attr.Value) + {$endif} + + '"'); if HasChildren then Builder.Append('>') else diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas new file mode 100644 index 00000000..2055ae97 --- /dev/null +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -0,0 +1,365 @@ +unit SimpleParser.StringCache; + +interface + +uses + System.Generics.Defaults, System.Generics.Collections; + +type + TStringId = type NativeInt; + + TStringCache = class + type + TStringRec = class + strict private + FValue : string; + FUsageCount : NativeUInt; + public + constructor Create(const AValue : string); + procedure IncUsageCount; + property UsageCount : NativeUInt read FUsageCount; + property Value : string read FValue; + end; + private + type + TStringRecValueEqualityComparer = class(TEqualityComparer) + private + FStringComparer : IEqualityComparer; + public + constructor Create(); + function Equals(const Left, Right: TStringRec): Boolean; overload; override; + function GetHashCode(const Value: TStringRec): Integer; overload; override; + end; + TStringRecUsageComparer = class(TInterfacedObject, IComparer) + function Compare(const Left, Right: TStringRec): Integer; + end; + strict private + FStringToId : TDictionary; + FRefCount : NativeUInt; + FIsPersistent : Boolean; + + class var FInstance : TStringCache; + class constructor ClassCreate; + class destructor ClassDestroy; + private + FIdToString : TList; // ID is index + public + constructor Create; + destructor Destroy; override; + + function Add(const Value : string) : TStringId; + function AddAndGet(const P : PChar; const Length : Integer) : string; + function Get(const ID : TStringId) : string; + procedure Clear; + procedure ByUsage(InOrder : TList); + + procedure IncRef; + procedure DecRef; + + property Persistent : Boolean read FIsPersistent write FIsPersistent; + class property Instance : TStringCache read FInstance; + end; + + TStringCacheDictionary = class(TEnumerable>) + strict private + type + TKeyStringEnumerator = class(TEnumerator>) + private + FDictionary: TStringCacheDictionary; + FInternalEnum : TDictionary.TPairEnumerator; + protected + function DoGetCurrent: TPair; override; + function DoMoveNext: Boolean; override; + public + constructor Create(const ADictionary: TStringCacheDictionary); + destructor Destroy; override; + end; + private + FKeyToId : TDictionary; + + function GetItem(const Key: TKey): string; + procedure SetItem(const Key: TKey; const Value: string); + function GetCount : Integer; + protected + function DoGetEnumerator: TEnumerator>; override; + public + constructor Create; + destructor Destroy; override; + + function TryGetValue(const Key: TKey; out Value: string): Boolean; + procedure AddOrSetValue(const Key: TKey; const Value: string); + function ContainsKey(const Key: TKey): Boolean; + function ToArray: TArray>; override; + + property Items[const Key: TKey]: string read GetItem write SetItem; default; + property Count: Integer read GetCount; + end; + +implementation + +uses + SysUtils, Types; + +{ TStringCache.TStringRecValueEqualityComparer } + +constructor TStringCache.TStringRecValueEqualityComparer.Create; +begin + inherited Create(); + FStringComparer := TEqualityComparer.Default; +end; + +function TStringCache.TStringRecValueEqualityComparer.Equals(const Left, + Right: TStringRec): Boolean; +begin + // Compare by the string it holds only + Result := FStringComparer.Equals(Left.Value, Right.Value); +end; + +function TStringCache.TStringRecValueEqualityComparer.GetHashCode( + const Value: TStringRec): Integer; +begin + // Compare by the string it holds only + Result := FStringComparer.GetHashCode(Value.Value); +end; + +{ TStringCache.TStringRecUsageComparer } + +function TStringCache.TStringRecUsageComparer.Compare(const Left, + Right: TStringRec): Integer; +begin + if Left.UsageCount < Right.UsageCount then + Exit(LessThanValue) + else if Left.UsageCount > Right.UsageCount then + Exit(GreaterThanValue) + else // Usage is the same, sort by string + Exit(TComparer.Default.Compare(Left.Value, Right.Value)); +end; + +{ TStringCache } + +class constructor TStringCache.ClassCreate; +begin + FInstance := TStringCache.Create; +end; + +class destructor TStringCache.ClassDestroy; +begin + FInstance.Free; +end; + +constructor TStringCache.Create; +begin + inherited; + FRefCount := 0; + FIsPersistent := false; // Clear the cache when no longer needed + FStringToId := TDictionary.Create( + TStringCache.TStringRecValueEqualityComparer.Create); + FIdToString := TList.Create; + + Add(''); // Empty string is always item 0 +end; + +destructor TStringCache.Destroy; +begin + assert(FRefCount = 0, 'String cache destroyed with live objects still relying on it'); + Clear; + FStringToId.Free; + FIdToString.Free; + inherited; +end; + +function TStringCache.Add(const Value: string): TStringId; +var + Item : TStringRec; +begin + Result := 0; + Item := TStringRec.Create(Value); + + if FStringToId.TryGetValue(Item, Result) then begin + // Already exists. Increment the usage count of the existing one, and return + FIdToString[Result].IncUsageCount; + Item.Free; // Already exists, Item was search key only + Exit; + end; + + // Item does not yet exist + Result := FIdToString.Add(Item); + FStringToId.Add(Item, Result); +end; + +function TStringCache.AddAndGet(const P : PChar; const Length : Integer) : string; +var + SearchStr : string; +begin + SetString(SearchStr, P, Length); + Result := Get(Add(SearchStr)); +end; + +function TStringCache.Get(const ID: TStringId): string; +begin + if ID < FIdToString.Count then + Exit(FIdToString[ID].Value) + else + raise Exception.Create(Format('String cache entry with ID %d does not exist', [ID])); +end; + +procedure TStringCache.Clear; +var + I : Integer; +begin + if FRefCount <> 0 then + raise Exception.Create(Format('Clearing the string cache while objects still rely on it (%d)', [FRefCount])); + + // One instance of TStringRec, but stored in two lists. Free from only one + for I := 0 to Pred(FIdToString.Count) do + FIdToString[I].Free; + + FStringToId.Clear; + FIdToString.Clear; +end; + +procedure TStringCache.ByUsage(InOrder: TList); +begin + InOrder.InsertRange(0, FIdToString); + InOrder.Sort(TStringCache.TStringRecUsageComparer.Create); +end; + +procedure TStringCache.IncRef; +begin + // Keep a count of how many objects are using the string cache. This lets it + // clear itself when the last one is freed - ie, free all the strings when + // they are no longer needed. (The alternative, controlled by Persistent, + // is to keep them - ie make the cache persistent over multiple runs - useful + // for parsing the same or similar files over and over.) + Inc(FRefCount); +end; + +procedure TStringCache.DecRef; +begin + if FRefCount = 0 then + raise Exception.Create('String cache refcount cannot be decremented below zero'); + Dec(FRefCount); + + // Unless want to keep the strings around for next parse, clear now nothing is + // using any of them. + if (FRefCount = 0) and (not FIsPersistent) then + Clear; +end; + +{ TStringCache.TStringRec } + +constructor TStringCache.TStringRec.Create(const AValue: string); +begin + inherited Create; + FValue := AValue; + FUsageCount := 1; +end; + +procedure TStringCache.TStringRec.IncUsageCount; +begin + Inc(FUsageCount); +end; + +{ TStringCacheDictionary } + +constructor TStringCacheDictionary.Create; +begin + inherited; + FKeyToId := TDictionary.Create; + TStringCache.Instance.IncRef; // Uses the cache +end; + +destructor TStringCacheDictionary.Destroy; +begin + FKeyToId.Free; + TStringCache.Instance.DecRef; + inherited; +end; + +function TStringCacheDictionary.GetItem(const Key: TKey): string; +var + ID : TStringId; +begin + if FKeyToId.TryGetValue(Key, ID) then + Result := TStringCache.Instance.Get(ID) + else + Result := ''; +end; + +procedure TStringCacheDictionary.SetItem(const Key: TKey; const Value: string); +begin + FKeyToId.AddOrSetValue(Key, TStringCache.Instance.Add(Value)); +end; + +function TStringCacheDictionary.GetCount : Integer; +begin + Result := FKeyToId.Count; +end; + +function TStringCacheDictionary.TryGetValue(const Key: TKey; out Value: string): Boolean; +var + ID : TStringId; +begin + Result := FKeyToId.TryGetValue(Key, ID); + if Result then + Value := TStringCache.Instance.Get(ID); +end; + +procedure TStringCacheDictionary.AddOrSetValue(const Key: TKey; const Value: string); +begin + SetItem(Key, Value); +end; + +function TStringCacheDictionary.ContainsKey(const Key: TKey): Boolean; +begin + Result := FKeyToId.ContainsKey(Key); +end; + +function TStringCacheDictionary.ToArray: TArray>; +var + Value: TPair; + I : Integer; +begin + SetLength(Result, Count); + I := 0; + for Value in Self do begin + Result[I] := Value; + Inc(I); + end; +end; + +function TStringCacheDictionary.DoGetEnumerator: TEnumerator>; +begin + Result := TKeyStringEnumerator.Create(Self); +end; + +{ TStringCacheDictionary.TKeyStringEnumerator } + +constructor TStringCacheDictionary.TKeyStringEnumerator.Create(const ADictionary: TStringCacheDictionary); +begin + inherited Create(); + FDictionary := ADictionary; + FInternalEnum := FDictionary.FKeyToId.GetEnumerator; +end; + +destructor TStringCacheDictionary.TKeyStringEnumerator.Destroy; +begin + FInternalEnum.Free; + inherited; +end; + +function TStringCacheDictionary.TKeyStringEnumerator.DoGetCurrent: TPair; +var + Pair : TPair; +begin + // Wrap the owner dictionary's internal FKeyToId enumerator, converting ID to string + Pair := FInternalEnum.Current; + Result := TPair.Create(Pair.Key, FDictionary.Items[Pair.Key]); +end; + +function TStringCacheDictionary.TKeyStringEnumerator.DoMoveNext: Boolean; +begin + Result := FInternalEnum.MoveNext; +end; + +end. From 3228e5b87fc4521918b9157be6235b5ceb4606f6 Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 16:13:00 +0300 Subject: [PATCH 03/10] TValuedSyntaxNode uses the string cache --- Source/DelphiAST.Classes.pas | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/Source/DelphiAST.Classes.pas b/Source/DelphiAST.Classes.pas index 7ede2c9c..b084eab5 100644 --- a/Source/DelphiAST.Classes.pas +++ b/Source/DelphiAST.Classes.pas @@ -89,11 +89,13 @@ TCompoundSyntaxNode = class(TSyntaxNode) TValuedSyntaxNode = class(TSyntaxNode) private - FValue: string; + FValue: {$ifdef USESTRINGCACHE}TStringId{$else}string{$endif}; + function GetValue: string; + procedure SetValue(const Value: string); public function Clone: TSyntaxNode; override; - property Value: string read FValue write FValue; + property Value: string read GetValue write SetValue; end; TCommentNode = class(TSyntaxNode) @@ -461,6 +463,7 @@ constructor TSyntaxNode.Create(Typ: TSyntaxNodeType); SetLength(FAttributes, 0); SetLength(FChildNodes, 0); FParentNode := nil; + {$ifdef USESTRINGCACHE}TStringCache.Instance.IncRef;{$endif} end; procedure TSyntaxNode.ExtractChild(Node: TSyntaxNode); @@ -493,6 +496,8 @@ destructor TSyntaxNode.Destroy; var i: integer; begin + {$ifdef USESTRINGCACHE}TStringCache.Instance.DecRef;{$endif} + for i := 0 to Length(FChildNodes) - 1 do FChildNodes[i].Free; SetLength(FChildNodes, 0); @@ -569,6 +574,24 @@ function TValuedSyntaxNode.Clone: TSyntaxNode; TValuedSyntaxNode(Result).Value := Self.Value; end; +function TValuedSyntaxNode.GetValue: string; +begin + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.Get(FValue); + {$else} + Result := FValue; + {$endif} +end; + +procedure TValuedSyntaxNode.SetValue(const Value: string); +begin + {$ifdef USESTRINGCACHE} + FValue := TStringCache.Instance.Add(Value); + {$else} + FValue := Value; + {$endif} +end; + { TCommentNode } function TCommentNode.Clone: TSyntaxNode; From 66d852efe8d3d02e1ad80a1dc11b1802089b4ae6 Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 16:26:41 +0300 Subject: [PATCH 04/10] Lexer uses the string cache for tokens --- Source/SimpleParser/SimpleParser.Lexer.pas | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Source/SimpleParser/SimpleParser.Lexer.pas b/Source/SimpleParser/SimpleParser.Lexer.pas index bcfbcbec..829118b7 100644 --- a/Source/SimpleParser/SimpleParser.Lexer.pas +++ b/Source/SimpleParser/SimpleParser.Lexer.pas @@ -397,7 +397,8 @@ TmwPasLex = class(TmwBasePasLex) implementation uses - StrUtils; + StrUtils + {$ifdef USESTRINGCACHE}, SimpleParser.StringCache{$endif}; type TmwPasLexExpressionEvaluation = (leeNone, leeAnd, leeOr); @@ -1313,10 +1314,18 @@ constructor TmwBasePasLex.Create; New(FBuffer); FillChar(FBuffer^, SizeOf(TBufferRec), 0); + + {$ifdef USESTRINGCACHE} + TStringCache.Instance.IncRef; + {$endif} end; destructor TmwBasePasLex.Destroy; begin + {$ifdef USESTRINGCACHE} + TStringCache.Instance.DecRef; + {$endif} + if not FBuffer.SharedBuffer then FreeMem(FBuffer.Buf); @@ -2224,7 +2233,11 @@ function TmwBasePasLex.GetIsSpace: Boolean; function TmwBasePasLex.GetToken: string; begin - SetString(Result, (FBuffer.Buf + FTokenPos), GetTokenLen); + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.AddAndGet(FBuffer.Buf + FTokenPos, GetTokenLen); + {$else} + SetString(Result, (FBuffer.Buf + FTokenPos), GetTokenLen); + {$endif} end; function TmwBasePasLex.GetTokenLen: Integer; From 1aa71d67215f561273ba7be82e5b5f50371aa6ec Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 16:30:12 +0300 Subject: [PATCH 05/10] TStringCacheDictionary is not used (was originally written for a dictionary of attributes, but attributes no longer work as a dictionary.) --- .../SimpleParser/SimpleParser.StringCache.pas | 139 +----------------- 1 file changed, 1 insertion(+), 138 deletions(-) diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas index 2055ae97..20938c08 100644 --- a/Source/SimpleParser/SimpleParser.StringCache.pas +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -35,7 +35,7 @@ TStringRecUsageComparer = class(TInterfacedObject, IComparer) end; strict private FStringToId : TDictionary; - FRefCount : NativeUInt; + FRefCount : NativeInt; FIsPersistent : Boolean; class var FInstance : TStringCache; @@ -60,41 +60,6 @@ TStringRecUsageComparer = class(TInterfacedObject, IComparer) class property Instance : TStringCache read FInstance; end; - TStringCacheDictionary = class(TEnumerable>) - strict private - type - TKeyStringEnumerator = class(TEnumerator>) - private - FDictionary: TStringCacheDictionary; - FInternalEnum : TDictionary.TPairEnumerator; - protected - function DoGetCurrent: TPair; override; - function DoMoveNext: Boolean; override; - public - constructor Create(const ADictionary: TStringCacheDictionary); - destructor Destroy; override; - end; - private - FKeyToId : TDictionary; - - function GetItem(const Key: TKey): string; - procedure SetItem(const Key: TKey; const Value: string); - function GetCount : Integer; - protected - function DoGetEnumerator: TEnumerator>; override; - public - constructor Create; - destructor Destroy; override; - - function TryGetValue(const Key: TKey; out Value: string): Boolean; - procedure AddOrSetValue(const Key: TKey; const Value: string); - function ContainsKey(const Key: TKey): Boolean; - function ToArray: TArray>; override; - - property Items[const Key: TKey]: string read GetItem write SetItem; default; - property Count: Integer read GetCount; - end; - implementation uses @@ -260,106 +225,4 @@ procedure TStringCache.TStringRec.IncUsageCount; Inc(FUsageCount); end; -{ TStringCacheDictionary } - -constructor TStringCacheDictionary.Create; -begin - inherited; - FKeyToId := TDictionary.Create; - TStringCache.Instance.IncRef; // Uses the cache -end; - -destructor TStringCacheDictionary.Destroy; -begin - FKeyToId.Free; - TStringCache.Instance.DecRef; - inherited; -end; - -function TStringCacheDictionary.GetItem(const Key: TKey): string; -var - ID : TStringId; -begin - if FKeyToId.TryGetValue(Key, ID) then - Result := TStringCache.Instance.Get(ID) - else - Result := ''; -end; - -procedure TStringCacheDictionary.SetItem(const Key: TKey; const Value: string); -begin - FKeyToId.AddOrSetValue(Key, TStringCache.Instance.Add(Value)); -end; - -function TStringCacheDictionary.GetCount : Integer; -begin - Result := FKeyToId.Count; -end; - -function TStringCacheDictionary.TryGetValue(const Key: TKey; out Value: string): Boolean; -var - ID : TStringId; -begin - Result := FKeyToId.TryGetValue(Key, ID); - if Result then - Value := TStringCache.Instance.Get(ID); -end; - -procedure TStringCacheDictionary.AddOrSetValue(const Key: TKey; const Value: string); -begin - SetItem(Key, Value); -end; - -function TStringCacheDictionary.ContainsKey(const Key: TKey): Boolean; -begin - Result := FKeyToId.ContainsKey(Key); -end; - -function TStringCacheDictionary.ToArray: TArray>; -var - Value: TPair; - I : Integer; -begin - SetLength(Result, Count); - I := 0; - for Value in Self do begin - Result[I] := Value; - Inc(I); - end; -end; - -function TStringCacheDictionary.DoGetEnumerator: TEnumerator>; -begin - Result := TKeyStringEnumerator.Create(Self); -end; - -{ TStringCacheDictionary.TKeyStringEnumerator } - -constructor TStringCacheDictionary.TKeyStringEnumerator.Create(const ADictionary: TStringCacheDictionary); -begin - inherited Create(); - FDictionary := ADictionary; - FInternalEnum := FDictionary.FKeyToId.GetEnumerator; -end; - -destructor TStringCacheDictionary.TKeyStringEnumerator.Destroy; -begin - FInternalEnum.Free; - inherited; -end; - -function TStringCacheDictionary.TKeyStringEnumerator.DoGetCurrent: TPair; -var - Pair : TPair; -begin - // Wrap the owner dictionary's internal FKeyToId enumerator, converting ID to string - Pair := FInternalEnum.Current; - Result := TPair.Create(Pair.Key, FDictionary.Items[Pair.Key]); -end; - -function TStringCacheDictionary.TKeyStringEnumerator.DoMoveNext: Boolean; -begin - Result := FInternalEnum.MoveNext; -end; - end. From fc0470b471697c05e9391c658ee8953c59d5279f Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 17:27:30 +0300 Subject: [PATCH 06/10] Basic threadsafety for the string cache. Not extensively reviewed. When threadsafe, any individual instance has get/add method contents wrapped in a lock, to lock the internal structures. The instance is never cleared when the count of objects using it drops to 0, since that's a bit more complex to get right (should only clear while the refcount is 0, but have to lock that to ensure it's not changed while clearing is happening, which severely slows inc/decrementing the count. I couldn't think of a good compare-lock-exchange algorithm. So just don't bother; it's never cleared while alive, when threadsafe.) --- .../SimpleParser/SimpleParser.StringCache.pas | 146 ++++++++++++++---- 1 file changed, 116 insertions(+), 30 deletions(-) diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas index 20938c08..ad6a701e 100644 --- a/Source/SimpleParser/SimpleParser.StringCache.pas +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -3,7 +3,14 @@ interface uses - System.Generics.Defaults, System.Generics.Collections; + System.Generics.Defaults, System.Generics.Collections, SyncObjs; + +// Use STRINGCACHE_THREADSAFE to ensure one instance can be accessed by multiple +// threads at once. This prevents clearing - it keeps all added elements for the +// life of the instance (life of the program if using TStringCache.Instance) +// and locks around adding / getting items. +// This is one by default +{$define STRINGCACHE_THREADSAFE} type TStringId = type NativeInt; @@ -36,13 +43,23 @@ TStringRecUsageComparer = class(TInterfacedObject, IComparer) strict private FStringToId : TDictionary; FRefCount : NativeInt; - FIsPersistent : Boolean; + {$ifdef STRINGCACHE_THREADSAFE} + FLock : TCriticalSection; + {$else} + // If threadsafe, always persistent + FIsPersistent : Boolean; + {$endif} class var FInstance : TStringCache; class constructor ClassCreate; class destructor ClassDestroy; + + procedure Lock; inline; + procedure Unlock; inline; private - FIdToString : TList; // ID is index + FIdToString : TList; + function GetIsPersistent: Boolean; + procedure SetIsPersistent(const Value: Boolean); // ID is index public constructor Create; destructor Destroy; override; @@ -50,13 +67,13 @@ TStringRecUsageComparer = class(TInterfacedObject, IComparer) function Add(const Value : string) : TStringId; function AddAndGet(const P : PChar; const Length : Integer) : string; function Get(const ID : TStringId) : string; - procedure Clear; + procedure Clear(const OnDestruction : Boolean = false); procedure ByUsage(InOrder : TList); procedure IncRef; procedure DecRef; - property Persistent : Boolean read FIsPersistent write FIsPersistent; + property Persistent : Boolean read GetIsPersistent write SetIsPersistent; class property Instance : TStringCache read FInstance; end; @@ -116,7 +133,11 @@ constructor TStringCache.Create; begin inherited; FRefCount := 0; - FIsPersistent := false; // Clear the cache when no longer needed + {$ifdef STRINGCACHE_THREADSAFE} + FLock := TCriticalSection.Create; + {$else} + FIsPersistent := false; // Clear the cache when no longer needed + {$endif} FStringToId := TDictionary.Create( TStringCache.TStringRecValueEqualityComparer.Create); FIdToString := TList.Create; @@ -127,9 +148,12 @@ constructor TStringCache.Create; destructor TStringCache.Destroy; begin assert(FRefCount = 0, 'String cache destroyed with live objects still relying on it'); - Clear; + Clear(true); FStringToId.Free; FIdToString.Free; + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Free; + {$endif} inherited; end; @@ -140,16 +164,21 @@ function TStringCache.Add(const Value: string): TStringId; Result := 0; Item := TStringRec.Create(Value); - if FStringToId.TryGetValue(Item, Result) then begin - // Already exists. Increment the usage count of the existing one, and return - FIdToString[Result].IncUsageCount; - Item.Free; // Already exists, Item was search key only - Exit; - end; + Lock; + try + if FStringToId.TryGetValue(Item, Result) then begin + // Already exists. Increment the usage count of the existing one, and return + FIdToString[Result].IncUsageCount; + Item.Free; // Already exists, Item was search key only + Exit; + end; - // Item does not yet exist - Result := FIdToString.Add(Item); - FStringToId.Add(Item, Result); + // Item does not yet exist + Result := FIdToString.Add(Item); + FStringToId.Add(Item, Result); + finally + Unlock; + end; end; function TStringCache.AddAndGet(const P : PChar; const Length : Integer) : string; @@ -162,16 +191,23 @@ function TStringCache.AddAndGet(const P : PChar; const Length : Integer) : strin function TStringCache.Get(const ID: TStringId): string; begin - if ID < FIdToString.Count then - Exit(FIdToString[ID].Value) - else - raise Exception.Create(Format('String cache entry with ID %d does not exist', [ID])); + Lock; + try + if ID < FIdToString.Count then + Exit(FIdToString[ID].Value) + else + raise Exception.Create(Format('String cache entry with ID %d does not exist', [ID])); + finally + Unlock; + end; end; -procedure TStringCache.Clear; +procedure TStringCache.Clear(const OnDestruction : Boolean); var I : Integer; begin + // This doesn't need a lock. When threadsafe, never cleared except on destruction + if FRefCount <> 0 then raise Exception.Create(Format('Clearing the string cache while objects still rely on it (%d)', [FRefCount])); @@ -181,12 +217,41 @@ procedure TStringCache.Clear; FStringToId.Clear; FIdToString.Clear; + + if not OnDestruction then begin + // Add emtpy string - it's always item 0 - unless this is being called as + // part of destruction + Add(''); + assert(Get(0) = ''); + end; end; procedure TStringCache.ByUsage(InOrder: TList); begin - InOrder.InsertRange(0, FIdToString); - InOrder.Sort(TStringCache.TStringRecUsageComparer.Create); + Lock; + try + InOrder.InsertRange(0, FIdToString); + InOrder.Sort(TStringCache.TStringRecUsageComparer.Create); + finally + Unlock; + end; +end; + +function TStringCache.GetIsPersistent: Boolean; +begin + {$ifdef STRINGCACHE_THREADSAFE} + Result := true; // Never clears + {$else} + Result := FPersistent; + {$endif} +end; + +procedure TStringCache.SetIsPersistent(const Value: Boolean); +begin + // If threadsafe, always persistent (never clears) so don't set anything + {$ifndef STRINGCACHE_THREADSAFE} + FPersistent := Value; + {$endif} end; procedure TStringCache.IncRef; @@ -196,21 +261,42 @@ procedure TStringCache.IncRef; // they are no longer needed. (The alternative, controlled by Persistent, // is to keep them - ie make the cache persistent over multiple runs - useful // for parsing the same or similar files over and over.) - Inc(FRefCount); + AtomicIncrement(FRefCount); end; procedure TStringCache.DecRef; begin - if FRefCount = 0 then + if AtomicDecrement(FRefCount) < 0 then raise Exception.Create('String cache refcount cannot be decremented below zero'); - Dec(FRefCount); - // Unless want to keep the strings around for next parse, clear now nothing is - // using any of them. - if (FRefCount = 0) and (not FIsPersistent) then - Clear; + // When threadsafe, synchronizing clearing while ensuring the refcount is 0 + // (ie an addref dosn't occur while clearing) is hard without locking around + // IncRef and DecRef, which is expensive. So just don't clear. + {$ifndef STRINGCACHE_THREADSAFE} + // Unless want to keep the strings around for next parse, clear now nothing is + // using any of them. + if (FRefCount = 0) and (not Persistent) then + Clear; + {$endif} end; +procedure TStringCache.Lock; +begin + // If not threadsafe, nothing to do here + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Acquire; + {$endif} +end; + +procedure TStringCache.Unlock; +begin + // If not threadsafe, nothing to do here + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Release; + {$endif} +end; + + { TStringCache.TStringRec } constructor TStringCache.TStringRec.Create(const AValue: string); From 5b583701659eb70822406e4a2574be08b0997385 Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 17:31:38 +0300 Subject: [PATCH 07/10] Fixed typos for case where the ifdef to be threadsafe was turned off --- Source/SimpleParser/SimpleParser.StringCache.pas | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas index ad6a701e..2506a81b 100644 --- a/Source/SimpleParser/SimpleParser.StringCache.pas +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -242,7 +242,7 @@ function TStringCache.GetIsPersistent: Boolean; {$ifdef STRINGCACHE_THREADSAFE} Result := true; // Never clears {$else} - Result := FPersistent; + Result := FIsPersistent; {$endif} end; @@ -250,7 +250,7 @@ procedure TStringCache.SetIsPersistent(const Value: Boolean); begin // If threadsafe, always persistent (never clears) so don't set anything {$ifndef STRINGCACHE_THREADSAFE} - FPersistent := Value; + FIsPersistent := Value; {$endif} end; From 668552462ed10f74205822743b90a5d03db52e89 Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 17:34:03 +0300 Subject: [PATCH 08/10] Comment; also holds lock for AddAndGet (enters lock individually for both operations; now holds it for both. Safe to enter a CS twice.) --- Source/SimpleParser/SimpleParser.StringCache.pas | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas index 2506a81b..95bf1fa4 100644 --- a/Source/SimpleParser/SimpleParser.StringCache.pas +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -46,7 +46,7 @@ TStringRecUsageComparer = class(TInterfacedObject, IComparer) {$ifdef STRINGCACHE_THREADSAFE} FLock : TCriticalSection; {$else} - // If threadsafe, always persistent + // If threadsafe, always persistent, so only allow it to be changed when not threadsafe FIsPersistent : Boolean; {$endif} @@ -186,7 +186,13 @@ function TStringCache.AddAndGet(const P : PChar; const Length : Integer) : strin SearchStr : string; begin SetString(SearchStr, P, Length); - Result := Get(Add(SearchStr)); + + Lock; // Will enter in Get and Add too, but a CS can be entered multiple times + try + Result := Get(Add(SearchStr)); + finally + Unlock; + end; end; function TStringCache.Get(const ID: TStringId): string; From 38e35bab8acf65703f0066a9cb2a4007b506e77c Mon Sep 17 00:00:00 2001 From: David Millington Date: Fri, 22 Apr 2016 17:41:50 +0300 Subject: [PATCH 09/10] Revert "Update README.md" This reverts commit 95233e9f4f4825863d1ab932ce633e2c9a8fb918. --- README.md | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 09d3b115..fb05af21 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,86 @@ ### Abstract Syntax Tree Builder for Delphi With DelphiAST you can take real Delphi code and get an abstract syntax tree. One unit at time and without a symbol table though. -This is forked from https://github.com/RomanYankovsky/DelphiAST +FreePascal and Lazarus compatible. -Please use that branch, not this one. +#### Sample input +```delphi +unit Unit1; + +interface + +uses + Unit2; + +function Sum(A, B: Integer): Integer; + +implementation + +function Sum(A, B: Integer): Integer; +begin + Result := A + B; +end; + +end. +``` + +#### Sample outcome +```xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` #### Copyright Copyright (c) 2014-2015 Roman Yankovsky (roman@yankovsky.me) From b799dd814b958e851e92e214b357f56f674dd524 Mon Sep 17 00:00:00 2001 From: David Millington Date: Sat, 23 Apr 2016 15:07:08 +0300 Subject: [PATCH 10/10] Information comment at the top of StringCache.pas --- Source/SimpleParser/SimpleParser.StringCache.pas | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas index 95bf1fa4..21547316 100644 --- a/Source/SimpleParser/SimpleParser.StringCache.pas +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -1,5 +1,16 @@ unit SimpleParser.StringCache; +{ + String cache: provides a global class to keep unique string instances, which + are then referred to by an ID. There are methods to then get a string given + an ID. This can greatly reduce the number of strings in memory, since all + strings with the same content will be the same actual string, stored in the + cache. + + Originally written by David Millington: vintagedave@gmail.com or dave@parnassus.co + Code donated to the DelphiAST project, April 2016. +} + interface uses