From d7738ccaf185a6851dfacb8e348bb61115070c4b Mon Sep 17 00:00:00 2001 From: sebres Date: Sat, 1 Jun 2019 00:48:22 +0200 Subject: [PATCH 01/16] eliminate iPtr->lineBCPtr hash table (replaced with reference in BCExtLineInfo which is allocated with ByteCode now during compile process). --- generic/tclBasic.c | 40 ++++------------------------------------ generic/tclCompile.c | 42 +++++++++++++++++++++++------------------- generic/tclCompile.h | 19 +++++++++++++++++-- generic/tclExecute.c | 19 +++++++++---------- generic/tclInt.h | 7 +------ 5 files changed, 54 insertions(+), 73 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index a03f1315c8f8..3199b6053323 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -541,11 +541,9 @@ Tcl_CreateInterp(void) iPtr->cmdFramePtr = NULL; iPtr->linePBodyPtr = ckalloc(sizeof(Tcl_HashTable)); - iPtr->lineBCPtr = ckalloc(sizeof(Tcl_HashTable)); iPtr->lineLAPtr = ckalloc(sizeof(Tcl_HashTable)); iPtr->lineLABCPtr = ckalloc(sizeof(Tcl_HashTable)); Tcl_InitHashTable(iPtr->linePBodyPtr, TCL_ONE_WORD_KEYS); - Tcl_InitHashTable(iPtr->lineBCPtr, TCL_ONE_WORD_KEYS); Tcl_InitHashTable(iPtr->lineLAPtr, TCL_ONE_WORD_KEYS); Tcl_InitHashTable(iPtr->lineLABCPtr, TCL_ONE_WORD_KEYS); iPtr->scriptCLLocPtr = NULL; @@ -1388,7 +1386,6 @@ DeleteInterpProc( Tcl_HashSearch search; Tcl_HashTable *hTablePtr; ResolverScheme *resPtr, *nextResPtr; - int i; /* * Punt if there is an error in the Tcl_Release/Tcl_Preserve matchup, @@ -1612,33 +1609,6 @@ DeleteInterpProc( ckfree(iPtr->linePBodyPtr); iPtr->linePBodyPtr = NULL; - /* - * See also tclCompile.c, TclCleanupByteCode - */ - - for (hPtr = Tcl_FirstHashEntry(iPtr->lineBCPtr, &search); - hPtr != NULL; - hPtr = Tcl_NextHashEntry(&search)) { - ExtCmdLoc *eclPtr = Tcl_GetHashValue(hPtr); - - if (eclPtr->type == TCL_LOCATION_SOURCE) { - Tcl_DecrRefCount(eclPtr->path); - } - for (i=0; i< eclPtr->nuloc; i++) { - ckfree(eclPtr->loc[i].line); - } - - if (eclPtr->loc != NULL) { - ckfree(eclPtr->loc); - } - - ckfree(eclPtr); - Tcl_DeleteHashEntry(hPtr); - } - Tcl_DeleteHashTable(iPtr->lineBCPtr); - ckfree(iPtr->lineBCPtr); - iPtr->lineBCPtr = NULL; - /* * Location stack for uplevel/eval/... scripts which were passed through * proc arguments. Actually we track all arguments as we do not and cannot @@ -5719,19 +5689,17 @@ TclArgumentBCEnter( int cmd, int pc) { - ExtCmdLoc *eclPtr; + BCExtLineInfo *bcLI; int word; ECL *ePtr; CFWordBC *lastPtr = NULL; Interp *iPtr = (Interp *) interp; - Tcl_HashEntry *hePtr = - Tcl_FindHashEntry(iPtr->lineBCPtr, (char *) codePtr); - if (!hePtr) { + bcLI = TclByteCodeGetELI((ByteCode *)codePtr); + if (!bcLI || !bcLI->eclPtr) { return; } - eclPtr = Tcl_GetHashValue(hePtr); - ePtr = &eclPtr->loc[cmd]; + ePtr = &bcLI->eclPtr->loc[cmd]; /* * ePtr->nline is the number of words originally parsed. diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 41c81afc93d7..6bd351bc18e2 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1002,15 +1002,16 @@ TclCleanupByteCode( register ByteCode *codePtr) /* Points to the ByteCode to free. */ { Tcl_Interp *interp = (Tcl_Interp *) *codePtr->interpHandle; - Interp *iPtr = (Interp *) interp; int numLitObjects = codePtr->numLitObjects; int numAuxDataItems = codePtr->numAuxDataItems; register Tcl_Obj **objArrayPtr, *objPtr; register const AuxData *auxDataPtr; + BCExtLineInfo * bcLI; int i; #ifdef TCL_COMPILE_STATS if (interp != NULL) { + Interp *iPtr = (Interp *) interp; ByteCodeStats *statsPtr; Tcl_Time destroyTime; int lifetimeSec, lifetimeMicroSec, log2; @@ -1096,19 +1097,15 @@ TclCleanupByteCode( /* * TIP #280. Release the location data associated with this byte code - * structure, if any. NOTE: The interp we belong to may be gone already, - * and the data with it. - * - * See also tclBasic.c, DeleteInterpProc + * structure, if any. */ - if (iPtr) { - Tcl_HashEntry *hePtr = Tcl_FindHashEntry(iPtr->lineBCPtr, - (char *) codePtr); - - if (hePtr) { - ReleaseCmdWordData(Tcl_GetHashValue(hePtr)); - Tcl_DeleteHashEntry(hePtr); + bcLI = TclByteCodeGetELI(codePtr); + if (bcLI) { + ExtCmdLoc *eclPtr = bcLI->eclPtr; + if (eclPtr) { + bcLI->eclPtr = NULL; + ReleaseCmdWordData(eclPtr); } } @@ -1117,6 +1114,11 @@ TclCleanupByteCode( } TclHandleRelease(codePtr->interpHandle); + + /* Correct code pointer to free */ + if (bcLI) { + codePtr = (ByteCode *)bcLI; + } ckfree(codePtr); } @@ -2753,6 +2755,7 @@ TclInitByteCodeObj( * which to create a ByteCode structure. */ { register ByteCode *codePtr; + BCExtLineInfo * bcLI; size_t codeBytes, objArrayBytes, exceptArrayBytes, cmdLocBytes; size_t auxDataArrayBytes, structureSize; register unsigned char *p; @@ -2761,7 +2764,7 @@ TclInitByteCodeObj( #endif int numLitObjects = envPtr->literalArrayNext; Namespace *namespacePtr; - int i, isNew; + int i; Interp *iPtr; if (envPtr->iPtr == NULL) { @@ -2780,20 +2783,22 @@ TclInitByteCodeObj( * Compute the total number of bytes needed for this bytecode. */ - structureSize = sizeof(ByteCode); + structureSize = sizeof(BCExtLineInfo) + sizeof(ByteCode); structureSize += TCL_ALIGN(codeBytes); /* align object array */ structureSize += TCL_ALIGN(objArrayBytes); /* align exc range arr */ structureSize += TCL_ALIGN(exceptArrayBytes); /* align AuxData array */ structureSize += auxDataArrayBytes; structureSize += cmdLocBytes; - if (envPtr->iPtr->varFramePtr != NULL) { - namespacePtr = envPtr->iPtr->varFramePtr->nsPtr; + if (iPtr->varFramePtr != NULL) { + namespacePtr = iPtr->varFramePtr->nsPtr; } else { - namespacePtr = envPtr->iPtr->globalNsPtr; + namespacePtr = iPtr->globalNsPtr; } p = ckalloc(structureSize); + bcLI = (BCExtLineInfo *)p; + p += sizeof(BCExtLineInfo); codePtr = (ByteCode *) p; codePtr->interpHandle = TclHandlePreserve(iPtr->handle); codePtr->compileEpoch = iPtr->compileEpoch; @@ -2903,8 +2908,7 @@ TclInitByteCodeObj( * byte code object (internal rep), for use with the bc compiler. */ - Tcl_SetHashValue(Tcl_CreateHashEntry(iPtr->lineBCPtr, codePtr, - &isNew), envPtr->extCmdMapPtr); + bcLI->eclPtr = envPtr->extCmdMapPtr; envPtr->extCmdMapPtr = NULL; /* We've used up the CompileEnv. Mark as uninitialized. */ diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 1d657a73e7bd..2da31471734d 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -174,8 +174,7 @@ typedef struct CmdLocation { * Structure to record additional location information for byte code. This * information is internal and not saved. i.e. tbcload'ed code will not have * this information. It records the lines for all words of all commands found - * in the byte code. The association with a ByteCode structure BC is done - * through the 'lineBCPtr' HashTable in Interp, keyed by the address of BC. + * in the byte code. This is stored as reference in a BCExtLineInfo structure. * Also recorded is information coming from the context, i.e. type of the * frame and associated information, like the path of a sourced file. */ @@ -514,6 +513,22 @@ typedef struct ByteCode { * created. */ #endif /* TCL_COMPILE_STATS */ } ByteCode; + +/* + * TIP #280: This provides a facilities for the bytecode extended line info, + * which is allocated as a BCExtLineInfo structure together with a ByteCode, + * if it is compiled from source (not precompiled). + */ +typedef struct BCExtLineInfo { + ExtCmdLoc *eclPtr; /* Holds location information of byte code. */ +} BCExtLineInfo; + +#define TclByteCodeHasELI(codePtr) \ + (!((codePtr)->flags & TCL_BYTECODE_PRECOMPILED)) +#define TclByteCodeGetELI(codePtr) \ + (TclByteCodeHasELI(codePtr) \ + ? ((BCExtLineInfo*)(codePtr))-1 : NULL) + /* * Opcodes for the Tcl bytecode instructions. These must correspond to the diff --git a/generic/tclExecute.c b/generic/tclExecute.c index f86cb504ac8d..d5c438f10333 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -1755,17 +1755,17 @@ TclCompileObj( if (invoker == NULL) { return codePtr; } else { - Tcl_HashEntry *hePtr = - Tcl_FindHashEntry(iPtr->lineBCPtr, codePtr); + BCExtLineInfo *bcLI; ExtCmdLoc *eclPtr; CmdFrame *ctxCopyPtr; int redo; - if (!hePtr) { + bcLI = TclByteCodeGetELI(codePtr); + if (!bcLI || !bcLI->eclPtr) { return codePtr; } + eclPtr = bcLI->eclPtr; - eclPtr = Tcl_GetHashValue(hePtr); redo = 0; ctxCopyPtr = TclStackAlloc(interp, sizeof(CmdFrame)); *ctxCopyPtr = *invoker; @@ -9794,19 +9794,18 @@ TclGetSrcInfoForPc( * there find the list of word locations for this command. */ - ExtCmdLoc *eclPtr; + BCExtLineInfo *bcLI; + ExtCmdLoc *eclPtr; ECL *locPtr = NULL; int srcOffset, i; - Interp *iPtr = (Interp *) *codePtr->interpHandle; - Tcl_HashEntry *hePtr = - Tcl_FindHashEntry(iPtr->lineBCPtr, codePtr); - if (!hePtr) { + bcLI = TclByteCodeGetELI(codePtr); + if (!bcLI || !bcLI->eclPtr) { return; } + eclPtr = bcLI->eclPtr; srcOffset = cfPtr->cmd - codePtr->source; - eclPtr = Tcl_GetHashValue(hePtr); for (i=0; i < eclPtr->nuloc; i++) { if (eclPtr->loc[i].srcOffset == srcOffset) { diff --git a/generic/tclInt.h b/generic/tclInt.h index 62fd71b0193c..aaa19338c168 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2031,12 +2031,7 @@ typedef struct Interp { * for its body. It is keyed by the address of * the Proc structure for a procedure. The * values are "struct CmdFrame*". */ - Tcl_HashTable *lineBCPtr; /* This table remembers for each ByteCode - * object the location information for its - * body. It is keyed by the address of the - * Proc structure for a procedure. The values - * are "struct ExtCmdLoc*". (See - * tclCompile.h) */ + Tcl_HashTable *unused_LBCP; /* No longer used (was lineBCPtr) */ Tcl_HashTable *lineLABCPtr; Tcl_HashTable *lineLAPtr; /* This table remembers for each argument of a * command on the execution stack the index of From 7b2d534223e06e442941099de6c315331724932e Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 3 Jun 2019 09:13:38 +0200 Subject: [PATCH 02/16] eliminate iPtr->linePBodyPtr hash table (replaced with reference cfPtr in Proc structure now) --- generic/tclBasic.c | 27 ---- generic/tclCmdMZ.c | 32 ++-- generic/tclDisassemble.c | 32 +++- generic/tclInt.h | 13 +- generic/tclOOMethod.c | 122 +-------------- generic/tclProc.c | 321 +++++++++++++++------------------------ 6 files changed, 170 insertions(+), 377 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 3199b6053323..d5ec2881061a 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -540,10 +540,8 @@ Tcl_CreateInterp(void) */ iPtr->cmdFramePtr = NULL; - iPtr->linePBodyPtr = ckalloc(sizeof(Tcl_HashTable)); iPtr->lineLAPtr = ckalloc(sizeof(Tcl_HashTable)); iPtr->lineLABCPtr = ckalloc(sizeof(Tcl_HashTable)); - Tcl_InitHashTable(iPtr->linePBodyPtr, TCL_ONE_WORD_KEYS); Tcl_InitHashTable(iPtr->lineLAPtr, TCL_ONE_WORD_KEYS); Tcl_InitHashTable(iPtr->lineLABCPtr, TCL_ONE_WORD_KEYS); iPtr->scriptCLLocPtr = NULL; @@ -1584,31 +1582,6 @@ DeleteInterpProc( TclDeleteLiteralTable(interp, &iPtr->literalTable); - /* - * TIP #280 - Release the arrays for ByteCode/Proc extension, and - * contents. - */ - - for (hPtr = Tcl_FirstHashEntry(iPtr->linePBodyPtr, &search); - hPtr != NULL; - hPtr = Tcl_NextHashEntry(&search)) { - CmdFrame *cfPtr = Tcl_GetHashValue(hPtr); - Proc *procPtr = (Proc *) Tcl_GetHashKey(iPtr->linePBodyPtr, hPtr); - - procPtr->iPtr = NULL; - if (cfPtr) { - if (cfPtr->type == TCL_LOCATION_SOURCE) { - Tcl_DecrRefCount(cfPtr->data.eval.path); - } - ckfree(cfPtr->line); - ckfree(cfPtr); - } - Tcl_DeleteHashEntry(hPtr); - } - Tcl_DeleteHashTable(iPtr->linePBodyPtr); - ckfree(iPtr->linePBodyPtr); - iPtr->linePBodyPtr = NULL; - /* * Location stack for uplevel/eval/... scripts which were passed through * proc arguments. Actually we track all arguments as we do not and cannot diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 193eac411d60..fb18a72e7a90 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -5455,10 +5455,11 @@ TclNRWhileObjCmd( * * TclListLines -- * - * ??? + * Retrieve line(s) inside of given listObj from its source considering + * continuations. * * Results: - * Filled in array of line numbers? + * Last line found, filled in array of line numbers if not NULL. * * Side effects: * None. @@ -5466,16 +5467,16 @@ TclNRWhileObjCmd( *---------------------------------------------------------------------- */ -void +int TclListLines( Tcl_Obj *listObj, /* Pointer to obj holding a string with list * structure. Assumed to be valid. Assumed to * contain n elements. */ int line, /* Line the list as a whole starts on. */ int n, /* #elements in lines */ - int *lines, /* Array of line numbers, to fill. */ + int *lines, /* Array of line numbers, to fill (or NULL). */ Tcl_Obj *const *elems) /* The list elems as Tcl_Obj*, in need of - * derived continuation data */ + * derived continuation data (or NULL) */ { const char *listStr = Tcl_GetString(listObj); const char *listHead = listStr; @@ -5484,26 +5485,27 @@ TclListLines( ContLineLoc *clLocPtr = TclContinuationsGet(listObj); int *clNext = (clLocPtr ? &clLocPtr->loc[0] : NULL); - for (i = 0; i < n; i++) { + n--; + for (i = 0; i <= n; i++) { TclFindElement(NULL, listStr, length, &element, &next, NULL, NULL); - TclAdvanceLines(&line, listStr, element); - /* Leading whitespace */ + TclAdvanceLines(&line, listStr, element); /* Leading whitespace */ TclAdvanceContinuations(&line, &clNext, element - listHead); if (elems && clNext) { TclContinuationsEnterDerived(elems[i], element-listHead, clNext); } - lines[i] = line; - length -= (next - listStr); - TclAdvanceLines(&line, element, next); - /* Element */ - listStr = next; - - if (*element == 0) { + if (lines) { + lines[i] = line; + } + if (*element == 0 || i == n) { /* ASSERT i == n */ break; } + length -= (next - listStr); + TclAdvanceLines(&line, element, next); /* Element */ + listStr = next; } + return line; } /* diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index f62c2600fcc7..17c7587f594c 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -27,7 +27,7 @@ static Tcl_Obj * DisassembleByteCodeObj(Tcl_Interp *interp, Tcl_Obj *objPtr); static int FormatInstruction(ByteCode *codePtr, const unsigned char *pc, Tcl_Obj *bufferObj); -static void GetLocationInformation(Proc *procPtr, +static void GetLocationInformation(ByteCode *codePtr, Tcl_Obj **fileObjPtr, int *linePtr); static void PrintSourceToObj(Tcl_Obj *appendObj, const char *stringPtr, int maxChars); @@ -72,7 +72,7 @@ static const Tcl_ObjType tclInstNameType = { static void GetLocationInformation( - Proc *procPtr, /* What to look up the information for. */ + ByteCode *codePtr, /* What to look up the information for. */ Tcl_Obj **fileObjPtr, /* Where to write the information about what * file the code came from. Will be written * to, either with the object (assume shared!) @@ -85,11 +85,31 @@ GetLocationInformation( * either with the line number or with -1 if * the information is not available. */ { - CmdFrame *cfPtr = TclGetCmdFrameForProcedure(procPtr); + Proc *procPtr = codePtr->procPtr; + CmdFrame *cfPtr; + *fileObjPtr = NULL; *linePtr = -1; - if (cfPtr == NULL) { + + if (!procPtr || !(cfPtr = procPtr->cfPtr)) { + + /* todo: retrive from BC if ready */ + #if 0 + BCExtLineInfo *bcLI = TclByteCodeGetELI(codePtr); + ExtCmdLoc *eclPtr; + + if (!bcLI || !(eclPtr = bcLI->eclPtr)) { + return; + } + + if (eclPtr->nloc) { + *linePtr = eclPtr->loc[0].line[0]; + } + if (eclPtr->type == TCL_LOCATION_SOURCE) { + *fileObjPtr = eclPtr->path; + } + #endif return; } @@ -276,7 +296,7 @@ DisassembleByteCodeObj( Tcl_AppendToObj(bufferObj, " Source ", -1); PrintSourceToObj(bufferObj, codePtr->source, TclMin(codePtr->numSrcBytes, 55)); - GetLocationInformation(codePtr->procPtr, &fileObj, &line); + GetLocationInformation(codePtr, &fileObj, &line); if (line > -1 && fileObj != NULL) { Tcl_AppendPrintfToObj(bufferObj, "\n File \"%s\" Line %d", Tcl_GetString(fileObj), line); @@ -1219,7 +1239,7 @@ DisassembleByteCodeAsDicts( * system if it is available. */ - GetLocationInformation(codePtr->procPtr, &file, &line); + GetLocationInformation(codePtr, &file, &line); /* * Build the overall result. diff --git a/generic/tclInt.h b/generic/tclInt.h index aaa19338c168..8cc34e2d3614 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -960,6 +960,8 @@ typedef struct Proc { CompiledLocal *lastLocalPtr;/* Pointer to the last allocated local * variable or NULL if none. This has frame * index (numCompiledLocals-1). */ + struct CmdFrame * cfPtr; /* Holds the location information for proc's + * body. */ } Proc; /* @@ -2026,11 +2028,7 @@ typedef struct Interp { * active. */ int invokeWord; /* Index of the word in the command which * is getting compiled. */ - Tcl_HashTable *linePBodyPtr;/* This table remembers for each statically - * defined procedure the location information - * for its body. It is keyed by the address of - * the Proc structure for a procedure. The - * values are "struct CmdFrame*". */ + Tcl_HashTable *unused_LPBP; /* No longer used (was linePBodyPtr) */ Tcl_HashTable *unused_LBCP; /* No longer used (was lineBCPtr) */ Tcl_HashTable *lineLABCPtr; Tcl_HashTable *lineLAPtr; /* This table remembers for each argument of a @@ -2975,7 +2973,6 @@ MODULE_SCOPE Tcl_Obj * TclGetBgErrorHandler(Tcl_Interp *interp); MODULE_SCOPE int TclGetChannelFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Channel *chanPtr, int *modePtr, int flags); -MODULE_SCOPE CmdFrame * TclGetCmdFrameForProcedure(Proc *procPtr); MODULE_SCOPE int TclGetCompletionCodeFromObj(Tcl_Interp *interp, Tcl_Obj *value, int *code); MODULE_SCOPE int TclGetNumberFromObj(Tcl_Interp *interp, @@ -3031,7 +3028,7 @@ MODULE_SCOPE Tcl_Obj * TclLindexList(Tcl_Interp *interp, MODULE_SCOPE Tcl_Obj * TclLindexFlat(Tcl_Interp *interp, Tcl_Obj *listPtr, int indexCount, Tcl_Obj *const indexArray[]); /* TIP #280 */ -MODULE_SCOPE void TclListLines(Tcl_Obj *listObj, int line, int n, +MODULE_SCOPE int TclListLines(Tcl_Obj *listObj, int line, int n, int *lines, Tcl_Obj *const *elems); MODULE_SCOPE Tcl_Obj * TclListObjCopy(Tcl_Interp *interp, Tcl_Obj *listPtr); MODULE_SCOPE Tcl_Obj * TclLsetList(Tcl_Interp *interp, Tcl_Obj *listPtr, @@ -3069,6 +3066,8 @@ MODULE_SCOPE int TclParseNumber(Tcl_Interp *interp, Tcl_Obj *objPtr, MODULE_SCOPE void TclParseInit(Tcl_Interp *interp, const char *string, int numBytes, Tcl_Parse *parsePtr); MODULE_SCOPE int TclParseAllWhiteSpace(const char *src, int numBytes); +MODULE_SCOPE void TclProcCmdFrameFree(CmdFrame *cfPtr); +MODULE_SCOPE CmdFrame * TclProcCmdFrameSet(Proc *procPtr, int lineIdx, int adj); MODULE_SCOPE int TclProcessReturn(Tcl_Interp *interp, int code, int level, Tcl_Obj *returnOpts); MODULE_SCOPE int TclpObjLstat(Tcl_Obj *pathPtr, Tcl_StatBuf *buf); diff --git a/generic/tclOOMethod.c b/generic/tclOOMethod.c index 3e64ba20b64a..08d58c1e1971 100644 --- a/generic/tclOOMethod.c +++ b/generic/tclOOMethod.c @@ -467,66 +467,7 @@ TclOOMakeProcInstanceMethod( procPtr->cmdPtr = NULL; if (iPtr->cmdFramePtr) { - CmdFrame context = *iPtr->cmdFramePtr; - - if (context.type == TCL_LOCATION_BC) { - /* - * Retrieve source information from the bytecode, if possible. If - * the information is retrieved successfully, context.type will be - * TCL_LOCATION_SOURCE and the reference held by - * context.data.eval.path will be counted. - */ - - TclGetSrcInfoForPc(&context); - } else if (context.type == TCL_LOCATION_SOURCE) { - /* - * The copy into 'context' up above has created another reference - * to 'context.data.eval.path'; account for it. - */ - - Tcl_IncrRefCount(context.data.eval.path); - } - - if (context.type == TCL_LOCATION_SOURCE) { - /* - * We can account for source location within a proc only if the - * proc body was not created by substitution. - * (FIXME: check that this is sane and correct!) - */ - - if (context.line - && (context.nline >= 4) && (context.line[3] >= 0)) { - int isNew; - CmdFrame *cfPtr = ckalloc(sizeof(CmdFrame)); - Tcl_HashEntry *hPtr; - - cfPtr->level = -1; - cfPtr->type = context.type; - cfPtr->line = ckalloc(sizeof(int)); - cfPtr->line[0] = context.line[3]; - cfPtr->nline = 1; - cfPtr->framePtr = NULL; - cfPtr->nextPtr = NULL; - - cfPtr->data.eval.path = context.data.eval.path; - Tcl_IncrRefCount(cfPtr->data.eval.path); - - cfPtr->cmd = NULL; - cfPtr->len = 0; - - hPtr = Tcl_CreateHashEntry(iPtr->linePBodyPtr, - (char *) procPtr, &isNew); - Tcl_SetHashValue(hPtr, cfPtr); - } - - /* - * 'context' is going out of scope; account for the reference that - * it's holding to the path name. - */ - - Tcl_DecrRefCount(context.data.eval.path); - context.data.eval.path = NULL; - } + TclProcCmdFrameSet(procPtr, 3, 0); } return Tcl_NewInstanceMethod(interp, (Tcl_Object) oPtr, nameObj, flags, @@ -580,66 +521,7 @@ TclOOMakeProcMethod( procPtr->cmdPtr = NULL; if (iPtr->cmdFramePtr) { - CmdFrame context = *iPtr->cmdFramePtr; - - if (context.type == TCL_LOCATION_BC) { - /* - * Retrieve source information from the bytecode, if possible. If - * the information is retrieved successfully, context.type will be - * TCL_LOCATION_SOURCE and the reference held by - * context.data.eval.path will be counted. - */ - - TclGetSrcInfoForPc(&context); - } else if (context.type == TCL_LOCATION_SOURCE) { - /* - * The copy into 'context' up above has created another reference - * to 'context.data.eval.path'; account for it. - */ - - Tcl_IncrRefCount(context.data.eval.path); - } - - if (context.type == TCL_LOCATION_SOURCE) { - /* - * We can account for source location within a proc only if the - * proc body was not created by substitution. - * (FIXME: check that this is sane and correct!) - */ - - if (context.line - && (context.nline >= 4) && (context.line[3] >= 0)) { - int isNew; - CmdFrame *cfPtr = ckalloc(sizeof(CmdFrame)); - Tcl_HashEntry *hPtr; - - cfPtr->level = -1; - cfPtr->type = context.type; - cfPtr->line = ckalloc(sizeof(int)); - cfPtr->line[0] = context.line[3]; - cfPtr->nline = 1; - cfPtr->framePtr = NULL; - cfPtr->nextPtr = NULL; - - cfPtr->data.eval.path = context.data.eval.path; - Tcl_IncrRefCount(cfPtr->data.eval.path); - - cfPtr->cmd = NULL; - cfPtr->len = 0; - - hPtr = Tcl_CreateHashEntry(iPtr->linePBodyPtr, - (char *) procPtr, &isNew); - Tcl_SetHashValue(hPtr, cfPtr); - } - - /* - * 'context' is going out of scope; account for the reference that - * it's holding to the path name. - */ - - Tcl_DecrRefCount(context.data.eval.path); - context.data.eval.path = NULL; - } + TclProcCmdFrameSet(procPtr, 3, 0); } return Tcl_NewMethod(interp, (Tcl_Class) clsPtr, nameObj, flags, typePtr, diff --git a/generic/tclProc.c b/generic/tclProc.c index f050d542b0dc..af76155c1bcb 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -197,85 +197,7 @@ Tcl_ProcObjCmd( */ if (iPtr->cmdFramePtr) { - CmdFrame *contextPtr = TclStackAlloc(interp, sizeof(CmdFrame)); - - *contextPtr = *iPtr->cmdFramePtr; - if (contextPtr->type == TCL_LOCATION_BC) { - /* - * Retrieve source information from the bytecode, if possible. If - * the information is retrieved successfully, context.type will be - * TCL_LOCATION_SOURCE and the reference held by - * context.data.eval.path will be counted. - */ - - TclGetSrcInfoForPc(contextPtr); - } else if (contextPtr->type == TCL_LOCATION_SOURCE) { - /* - * The copy into 'context' up above has created another reference - * to 'context.data.eval.path'; account for it. - */ - - Tcl_IncrRefCount(contextPtr->data.eval.path); - } - - if (contextPtr->type == TCL_LOCATION_SOURCE) { - /* - * We can account for source location within a proc only if the - * proc body was not created by substitution. - */ - - if (contextPtr->line - && (contextPtr->nline >= 4) && (contextPtr->line[3] >= 0)) { - int isNew; - Tcl_HashEntry *hePtr; - CmdFrame *cfPtr = ckalloc(sizeof(CmdFrame)); - - cfPtr->level = -1; - cfPtr->type = contextPtr->type; - cfPtr->line = ckalloc(sizeof(int)); - cfPtr->line[0] = contextPtr->line[3]; - cfPtr->nline = 1; - cfPtr->framePtr = NULL; - cfPtr->nextPtr = NULL; - - cfPtr->data.eval.path = contextPtr->data.eval.path; - Tcl_IncrRefCount(cfPtr->data.eval.path); - - cfPtr->cmd = NULL; - cfPtr->len = 0; - - hePtr = Tcl_CreateHashEntry(iPtr->linePBodyPtr, - procPtr, &isNew); - if (!isNew) { - /* - * Get the old command frame and release it. See also - * TclProcCleanupProc in this file. Currently it seems as - * if only the procbodytest::proc command of the testsuite - * is able to trigger this situation. - */ - - CmdFrame *cfOldPtr = Tcl_GetHashValue(hePtr); - - if (cfOldPtr->type == TCL_LOCATION_SOURCE) { - Tcl_DecrRefCount(cfOldPtr->data.eval.path); - cfOldPtr->data.eval.path = NULL; - } - ckfree(cfOldPtr->line); - cfOldPtr->line = NULL; - ckfree(cfOldPtr); - } - Tcl_SetHashValue(hePtr, cfPtr); - } - - /* - * 'contextPtr' is going out of scope; account for the reference - * that it's holding to the path name. - */ - - Tcl_DecrRefCount(contextPtr->data.eval.path); - contextPtr->data.eval.path = NULL; - } - TclStackFree(interp, contextPtr); + TclProcCmdFrameSet(procPtr, 3, 0); } /* @@ -629,6 +551,8 @@ TclCreateProc( } } + procPtr->cfPtr = NULL; + *procPtrPtr = procPtr; return TCL_OK; @@ -1927,7 +1851,6 @@ TclProcCompileProc( } if (bodyPtr->typePtr != &tclByteCodeType) { - Tcl_HashEntry *hePtr; #ifdef TCL_COMPILE_DEBUG if (tclTraceCompile >= 1) { @@ -1994,19 +1917,12 @@ TclProcCompileProc( (void) TclPushStackFrame(interp, &framePtr, (Tcl_Namespace *) nsPtr, /* isProcCallFrame */ 0); - /* - * TIP #280: We get the invoking context from the cmdFrame which - * was saved by 'Tcl_ProcObjCmd' (using linePBodyPtr). - */ - - hePtr = Tcl_FindHashEntry(iPtr->linePBodyPtr, (char *) procPtr); - /* * Constructed saved frame has body as word 0. See Tcl_ProcObjCmd. */ iPtr->invokeWord = 0; - iPtr->invokeCmdFramePtr = (hePtr ? Tcl_GetHashValue(hePtr) : NULL); + iPtr->invokeCmdFramePtr = procPtr->cfPtr; TclSetByteCodeFromAny(interp, bodyPtr, NULL, NULL); iPtr->invokeCmdFramePtr = NULL; TclPopStackFrame(interp); @@ -2113,9 +2029,7 @@ TclProcCleanupProc( Tcl_Obj *bodyPtr = procPtr->bodyPtr; Tcl_Obj *defPtr; Tcl_ResolvedVarInfo *resVarInfo; - Tcl_HashEntry *hePtr = NULL; - CmdFrame *cfPtr = NULL; - Interp *iPtr = procPtr->iPtr; + CmdFrame *cfPtr; if (bodyPtr != NULL) { Tcl_DecrRefCount(bodyPtr); @@ -2139,35 +2053,13 @@ TclProcCleanupProc( ckfree(localPtr); localPtr = nextPtr; } + + cfPtr = procPtr->cfPtr; ckfree(procPtr); - /* - * TIP #280: Release the location data associated with this Proc - * structure, if any. The interpreter may not exist (For example for - * procbody structures created by tbcload. - */ - - if (iPtr == NULL) { - return; - } - - hePtr = Tcl_FindHashEntry(iPtr->linePBodyPtr, (char *) procPtr); - if (!hePtr) { - return; - } - - cfPtr = Tcl_GetHashValue(hePtr); - if (cfPtr) { - if (cfPtr->type == TCL_LOCATION_SOURCE) { - Tcl_DecrRefCount(cfPtr->data.eval.path); - cfPtr->data.eval.path = NULL; - } - ckfree(cfPtr->line); - cfPtr->line = NULL; - ckfree(cfPtr); + TclProcCmdFrameFree(cfPtr); } - Tcl_DeleteHashEntry(hePtr); } /* @@ -2396,8 +2288,7 @@ SetLambdaFromAny( Interp *iPtr = (Interp *) interp; const char *name; Tcl_Obj *argsPtr, *bodyPtr, *nsObjPtr, **objv; - int isNew, objc, result; - CmdFrame *cfPtr = NULL; + int objc, result; Proc *procPtr; if (interp == NULL) { @@ -2455,79 +2346,17 @@ SetLambdaFromAny( * available already through 'name'. Use 'TclListLines', see 'switch' * (tclCmdMZ.c). * - * This code is nearly identical to the #280 code in Tcl_ProcObjCmd, see - * this file. The differences are the different index of the body in the - * line array of the context, and the special processing mentioned in the - * previous paragraph to track into the list. Find a way to factor the - * common elements into a single function. */ if (iPtr->cmdFramePtr) { - CmdFrame *contextPtr = TclStackAlloc(interp, sizeof(CmdFrame)); - - *contextPtr = *iPtr->cmdFramePtr; - if (contextPtr->type == TCL_LOCATION_BC) { - /* - * Retrieve the source context from the bytecode. This call - * accounts for the reference to the source file, if any, held in - * 'context.data.eval.path'. - */ - - TclGetSrcInfoForPc(contextPtr); - } else if (contextPtr->type == TCL_LOCATION_SOURCE) { - /* - * We created a new reference to the source file path name when we - * created 'context' above. Account for the reference. - */ - - Tcl_IncrRefCount(contextPtr->data.eval.path); - - } - - if (contextPtr->type == TCL_LOCATION_SOURCE) { - /* - * We can record source location within a lambda only if the body - * was not created by substitution. - */ - - if (contextPtr->line - && (contextPtr->nline >= 2) && (contextPtr->line[1] >= 0)) { - int buf[2]; - - /* - * Move from approximation (line of list cmd word) to actual - * location (line of 2nd list element). - */ - - cfPtr = ckalloc(sizeof(CmdFrame)); - TclListLines(objPtr, contextPtr->line[1], 2, buf, NULL); - - cfPtr->level = -1; - cfPtr->type = contextPtr->type; - cfPtr->line = ckalloc(sizeof(int)); - cfPtr->line[0] = buf[1]; - cfPtr->nline = 1; - cfPtr->framePtr = NULL; - cfPtr->nextPtr = NULL; - - cfPtr->data.eval.path = contextPtr->data.eval.path; - Tcl_IncrRefCount(cfPtr->data.eval.path); - - cfPtr->cmd = NULL; - cfPtr->len = 0; - } - - /* - * 'contextPtr' is going out of scope. Release the reference that - * it's holding to the source file path - */ + /* + * Get from approximation (line of list cmd word) to actual + * location (line of 2nd list element). + */ - Tcl_DecrRefCount(contextPtr->data.eval.path); - } - TclStackFree(interp, contextPtr); + TclProcCmdFrameSet(procPtr, 1, + TclListLines(objPtr, 0, 1+1, NULL, NULL)); } - Tcl_SetHashValue(Tcl_CreateHashEntry(iPtr->linePBodyPtr, procPtr, - &isNew), cfPtr); /* * Set the namespace for this lambda: given by objv[2] understood as a @@ -2741,14 +2570,12 @@ MakeLambdaError( /* *---------------------------------------------------------------------- * - * TclGetCmdFrameForProcedure -- + * TclProcCmdFrameFree -- * - * How to get the CmdFrame information for a procedure. + * Free CmdFrame information of a procedure. * * Results: - * A pointer to the CmdFrame (only guaranteed to be valid until the next - * Tcl command is processed or the interpreter's state is otherwise - * modified) or a NULL if the information is not available. + * none. * * Side effects: * none. @@ -2756,23 +2583,113 @@ MakeLambdaError( *---------------------------------------------------------------------- */ +void +TclProcCmdFrameFree( + CmdFrame *cfPtr) /* The cmd-frame is to be released. */ +{ + /* + * TIP #280: Release the location data associated with this Proc + * structure, if any. + */ + + if (cfPtr->type == TCL_LOCATION_SOURCE) { + Tcl_DecrRefCount(cfPtr->data.eval.path); + cfPtr->data.eval.path = NULL; + } + ckfree(cfPtr->line); + ckfree(cfPtr); +} + +/* + *---------------------------------------------------------------------- + * + * TclProcCmdFrameSet -- + * + * Create CmdFrame information for a procedure or lambda (from context). + * + * Results: + * New created CmdFrame of procedure. + * + * Side effects: + * Previous CmdFrame of procedure may be released. + * + *---------------------------------------------------------------------- + */ + CmdFrame * -TclGetCmdFrameForProcedure( - Proc *procPtr) /* The procedure whose cmd-frame is to be - * looked up. */ +TclProcCmdFrameSet( + Proc *procPtr, /* Proc to create CmdFrame */ + int lineIdx, /* Index of line in line buffer of context */ + int adjustLine) /* Offset to body in lines to adjust its number */ { - Tcl_HashEntry *hePtr; + CmdFrame *cfPtr = NULL; + CmdFrame *contextPtr = procPtr->iPtr->cmdFramePtr; - if (procPtr == NULL || procPtr->iPtr == NULL) { - return NULL; + if (contextPtr->type == TCL_LOCATION_BC) { + + /* + * Retrieve source information from the bytecode, if possible. If + * the information is retrieved successfully, context.type will be + * TCL_LOCATION_SOURCE and the reference held by + * context.data.eval.path will be counted. + */ + + contextPtr = TclStackAlloc( + (Tcl_Interp*)procPtr->iPtr, sizeof(CmdFrame)); + *contextPtr = *procPtr->iPtr->cmdFramePtr; + TclGetSrcInfoForPc(contextPtr); } - hePtr = Tcl_FindHashEntry(procPtr->iPtr->linePBodyPtr, procPtr); - if (hePtr == NULL) { - return NULL; + + /* + * We can account for source location within a proc only if the + * proc body was not created by substitution. + */ + + if ((contextPtr->type == TCL_LOCATION_SOURCE) + && contextPtr->line + && (contextPtr->nline > lineIdx) && (contextPtr->line[lineIdx] >= 0) + ) { + cfPtr = ckalloc(sizeof(CmdFrame)); + + cfPtr->level = -1; + cfPtr->type = contextPtr->type; + cfPtr->line = ckalloc(sizeof(int)); + cfPtr->line[0] = contextPtr->line[lineIdx] + adjustLine; + cfPtr->nline = 1; + cfPtr->framePtr = NULL; + cfPtr->nextPtr = NULL; + + cfPtr->data.eval.path = contextPtr->data.eval.path; + Tcl_IncrRefCount(cfPtr->data.eval.path); + + cfPtr->cmd = NULL; + cfPtr->len = 0; } - return (CmdFrame *) Tcl_GetHashValue(hePtr); + + if (procPtr->cfPtr) { + /* + * Get the old command frame and release it. + */ + + TclProcCmdFrameFree(procPtr->cfPtr); + } + + if (contextPtr != procPtr->iPtr->cmdFramePtr) { + + /* + * 'contextPtr' is going out of scope; account for the reference + * that it's holding to the path name. + */ + + if (contextPtr->type == TCL_LOCATION_SOURCE) { + Tcl_DecrRefCount(contextPtr->data.eval.path); + } + TclStackFree((Tcl_Interp*)procPtr->iPtr, contextPtr); + } + + procPtr->cfPtr = cfPtr; + return cfPtr; } - /* * Local Variables: * mode: c From 3fff9dd7709318513a8f2e46cc3f9d4fc174afe0 Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 3 Jun 2019 21:59:38 +0200 Subject: [PATCH 03/16] **interim commit** try to cherry-pick and back-port from future branch an implementation of code- and string-segment objects (amend/review needed) --- generic/tcl.h | 4 + generic/tclAssembly.c | 12 +- generic/tclCompile.c | 365 +++++++++++++++++++++++++++++++++++++++--- generic/tclCompile.h | 32 ++++ generic/tclExecute.c | 15 +- generic/tclInt.h | 7 + generic/tclLiteral.c | 16 +- generic/tclObj.c | 67 +++++++- generic/tclProc.c | 17 +- 9 files changed, 481 insertions(+), 54 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 1dfb2004475e..d4d443d98169 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -469,6 +469,10 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt; #else typedef struct stat Tcl_StatBuf; #endif + +#ifndef TCL_HASH_TYPE +# define TCL_HASH_TYPE unsigned +#endif /* *---------------------------------------------------------------------------- diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index f05814fadfba..2c7a94c50a89 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -847,8 +847,11 @@ CompileAssembleObj( int status; /* Status return from Tcl_AssembleCode */ const char* source; /* String representation of the source code */ int sourceLen; /* Length of the source code in bytes */ + StringSegment *strSegPtr; + source = Tcl_GetUtfFromObj(objPtr, &sourceLen); + /* * Get the expression ByteCode from the object. If it exists, make sure it * is valid in the current context. @@ -867,18 +870,23 @@ CompileAssembleObj( } /* - * Not valid, so free it and regenerate. + * Not valid, so obtain string segment, free code and regenerate. */ + strSegPtr = codePtr->strSegPtr; + strSegPtr->refCount++; FreeAssembleCodeInternalRep(objPtr); + } else { + strSegPtr = TclGetStringSegmentFromObj(objPtr); + strSegPtr->refCount++; } /* * Set up the compilation environment, and assemble the code. */ - source = TclGetStringFromObj(objPtr, &sourceLen); TclInitCompileEnv(interp, &compEnv, source, sourceLen, NULL, 0); + compEnv.strSegPtr = strSegPtr; status = TclAssembleCode(&compEnv, source, sourceLen, TCL_EVAL_DIRECT); if (status != TCL_OK) { /* diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 6bd351bc18e2..5fbb314803ca 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -707,7 +707,7 @@ const Tcl_ObjType tclByteCodeType = { "bytecode", /* name */ FreeByteCodeInternalRep, /* freeIntRepProc */ DupByteCodeInternalRep, /* dupIntRepProc */ - NULL, /* updateStringProc */ + TclUpdateStringOfByteCode, /* updateStringProc */ SetByteCodeFromAny /* setFromAnyProc */ }; @@ -716,11 +716,11 @@ const Tcl_ObjType tclByteCodeType = { * compiled bytecode for the [subst]itution of Tcl values. */ -static const Tcl_ObjType substCodeType = { +const Tcl_ObjType tclSubstCodeType = { "substcode", /* name */ FreeSubstCodeInternalRep, /* freeIntRepProc */ DupByteCodeInternalRep, /* dupIntRepProc - shared with bytecode */ - NULL, /* updateStringProc */ + TclUpdateStringOfByteCode, /* updateStringProc */ NULL, /* setFromAnyProc */ }; @@ -731,6 +731,327 @@ static const Tcl_ObjType substCodeType = { #define TclIncrUInt4AtPtr(ptr, delta) \ TclStoreInt4AtPtr(TclGetUInt4AtPtr(ptr)+(delta), (ptr)); + +void TclFreeStringSegment(StringSegment *strSegPtr) { + while (strSegPtr->refCount-- <= 1) { + StringSegment *parentPtr = strSegPtr->parentPtr; + char *bytes = strSegPtr->bytes.ptr; + ckfree(strSegPtr); + if (!parentPtr) { + ckfree(bytes); + break; + } + /* parent reference "recursively" */ + strSegPtr = parentPtr; + } +} + +/* + * Code segment facilities: + */ +const Tcl_ObjType tclCodeSegmentType; + +static void +DupCodeSegmentInternalRep( + Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr) /* Object with internal rep to set. */ +{ + StringSegment *strSegPtr = srcPtr->internalRep.twoPtrValue.ptr1; + copyPtr->typePtr = &tclCodeSegmentType; + copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + copyPtr->internalRep.twoPtrValue.ptr2 = + srcPtr->internalRep.twoPtrValue.ptr2; + copyPtr->length = srcPtr->length; + strSegPtr->refCount++; +} + +static inline void +TclObtainObjStringSegmentBytes( + register Tcl_Obj *objPtr, /* Object whose string rep to obtain. */ + StringSegment *strSegPtr, /* String segment to obtain bytes from. */ + size_t offset) /* Offset of string in segment. */ +{ + /* segment bytes are still used - copy it */ + char * bytes = ckalloc(objPtr->length + 1); + + memcpy(bytes, TclGetStringSegmentBytes(strSegPtr) + offset, + objPtr->length); + bytes[objPtr->length] = '\0'; + objPtr->bytes = bytes; + /* objPtr->length = strSegPtr->length; */ + + /* *****todo**** remove this check */ + if (objPtr->length > 1000) { + Tcl_Panic("unexpected codeSegment 2 string!!!"); + } +} + +static void +FreeCodeSegmentInternalRep( + register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ +{ + StringSegment *strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; + + objPtr->typePtr = NULL; + + /* + * If string of object may be still needed, update it right now. + */ + if (objPtr->refCount) { + /* rather a type switch (shimmering), string rep may be needed */ + if (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr) { + /* only possible on root segment, so check if segment is not shared + * obtain last reference and simply free segment */ + if ( strSegPtr->refCount == 1 + && strSegPtr->length == objPtr->length + ) { + /* + * we don't need to check whether strSegPtr has parent + * or the offset by equal lengths and bytes (pointers) + */ + objPtr->bytes = strSegPtr->bytes.ptr; + ckfree(strSegPtr); + return; + } + + /* segment bytes are still used - copy it */ + TclObtainObjStringSegmentBytes(objPtr, strSegPtr, + (size_t)objPtr->internalRep.twoPtrValue.ptr2); + } + } else { + /* release object - we don't need string rep at all */ + if (objPtr->bytes == strSegPtr->bytes.ptr) { + objPtr->bytes = NULL; + } + } + + /* free reference(s) */ + TclFreeStringSegment(strSegPtr); +} + +/* + *---------------------------------------------------------------------- + * + * UpdateStringOfCodeSegment -- + * + * Update the string representation for a code segment object. + * + * Results: + * None. + * + * Side effects: + * The object's string is set to a valid NUL-terminated string that is + * copied from the code segment. Use Tcl_GetUtfFromObj to avoid that. + * + *---------------------------------------------------------------------- + */ + +static void +UpdateStringOfCodeSegment( + Tcl_Obj *objPtr) /* Object whose string rep to update */ +{ + StringSegment *strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; + size_t offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + + char *bytes = TclGetStringSegmentBytes(strSegPtr) + offset; + + /* + * If we can obtain the source string from code-segment (whole bytes of the + * top level code segment is NTS), use it directly (don't copy it). + */ + if (!bytes[objPtr->length]) { + /* use it directly (as long as object retains segment reference) */ + objPtr->bytes = bytes; + } else { + /* obtain a copy to bytes (we need NTS for backwards compatibility) */ + TclObtainObjStringSegmentBytes(objPtr, strSegPtr, offset); + } +} + +const Tcl_ObjType tclCodeSegmentType = { + "bytecodesegment", /* name */ + FreeCodeSegmentInternalRep, /* freeIntRepProc */ + DupCodeSegmentInternalRep, /* dupIntRepProc */ + UpdateStringOfCodeSegment, /* updateStringProc */ + NULL /* setFromAnyProc */ +}; + +/* + *---------------------------------------------------------------------- + * + * TclNewCodeSegmentObj -- + * + * This function creates a new object which string representation is + * normally a part of byte-code and it initializes object from the + * byte pointer and length arguments without a copy. + * + * Results: + * A newly created object is returned that has ref count zero. + * + * Side effects: + * The new object's internal representation will be set to given part + * offset and of the length starting from offset. This is basically not + * a C-style NUL-terminated string. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclNewCodeSegmentObj( + StringSegment *strSegPtr, /* Points to string segment to share. */ + const char *bytes, /* Points to the first of the length bytes + * used to initialize the new object. */ + unsigned long length) /* The length (in bytes) of "bytes" string + * when initializing the new object. */ +{ + register Tcl_Obj *objPtr; + size_t offset = 0; + + + TclNewObj(objPtr); + + if (strSegPtr) { + const char *segBytes = TclGetStringSegmentBytes(strSegPtr); + offset = bytes - segBytes; + if (offset) { + if (strSegPtr->parentPtr) { + strSegPtr = strSegPtr->parentPtr; + } + assert(offset < segBytes + strSegPtr->length); + } + strSegPtr->refCount++; + } else { + strSegPtr = ckalloc(sizeof(StringSegment)); + strSegPtr->refCount = 1; + strSegPtr->parentPtr = NULL; + strSegPtr->bytes.ptr = (char *)bytes; + strSegPtr->length = length; + strSegPtr->line = 0; + } + + objPtr->bytes = NULL; + objPtr->length = length; + objPtr->typePtr = &tclCodeSegmentType; + objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + objPtr->internalRep.twoPtrValue.ptr2 = (void *)offset; + + return objPtr; +} + +/* + *---------------------------------------------------------------------- + * + * TclUpdateStringOfByteCode -- + * + * Update the string representation for any byte code object. Note: + * The handle of this procedure used also to distinguish byte code objects + * in order to avoid generation of new NTS representation. + * + * Results: + * None. + * + * Side effects: + * The object's string is set to a valid NUL-terminated string that is + * copied from the code segment. Use Tcl_GetUtfFromObj to avoid that. + * + *---------------------------------------------------------------------- + */ + +void +TclUpdateStringOfByteCode( + Tcl_Obj *objPtr) /* Object whose string rep to update */ +{ + ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + StringSegment *strSegPtr = codePtr->strSegPtr; + + /* + * If we can obtain the source string from code-segment (whole bytes of the + * top level code segment is NTS), use it directly (don't copy it). + */ + if (csegPtr && !csegPtr->parentPtr) { + /* use it directly as reference (as long as object retains codeSegment) */ + objPtr->bytes = csegPtr->bytes; + objPtr->length = csegPtr->length; + } else { + /* use it directly as reference (as long as object retains codeSegment) */ + char * bytes = ckalloc(codePtr->numSrcBytes + 1); + + memcpy(bytes, codePtr->source, codePtr->numSrcBytes); + bytes[codePtr->numSrcBytes] = '\0'; + objPtr->bytes = bytes; + objPtr->length = codePtr->numSrcBytes; + + /* *****todo**** remove this check */ + if (objPtr->length > 1000) { + Tcl_Panic("unexpected byteCode 2 string!!!"); + } + } +} + +/* + *---------------------------------------------------------------------- + * + * TclCopyByteCodeObject -- + * + * Copy the part of internal representation of code segment for any type + * of byte code object. + * + * Results: + * A newly created object is returned that has ref count zero. + * + * Side effects: + * The refCount of related segment may be incremented after this operation. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclCopyByteCodeObject( + Tcl_Obj *objPtr) /* Object with internal rep to copy. */ +{ + const char *bytes; + int length; + Tcl_Obj *cpyPtr; + StringSegment *strSegPtr = TclGetStringSegmentFromObj(objPtr); + + bytes = Tcl_GetUtfFromObj(objPtr, &length); + cpyPtr = TclNewCodeSegmentObj(strSegPtr, bytes, length); + + /* + * TIP #280. + * Ensure that the continuation line data for the original body is + * not lost and applies to the new body as well. + */ + + TclContinuationsCopy(cpyPtr, objPtr); + + return cpyPtr; +} + +StringSegment * +TclGetStringSegmentFromObj( + Tcl_Obj *objPtr) +{ + if (objPtr->typePtr == &tclCodeSegmentType) { + /* CodeSegment */ + return (StringSegment *)objPtr->internalRep.twoPtrValue.ptr1; + } + + if (objPtr->typePtr + && (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) + ) { + /* ByteCode */ + ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + return codePtr->strSegPtr; + } + + /* *****todo**** rewrite this - wrap obj to a string segment or return new (rather impossible because of bytes sharing & offsets) */ + if (objPtr->length > 1000) { + Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE"); + } + return NULL; +} + /* *---------------------------------------------------------------------- * @@ -783,7 +1104,7 @@ TclSetByteCodeFromAny( } #endif - stringPtr = TclGetStringFromObj(objPtr, &length); + stringPtr = Tcl_GetUtfFromObj(objPtr, &length); /* * TIP #280: Pick up the CmdFrame in which the BC compiler was invoked and @@ -793,6 +1114,9 @@ TclSetByteCodeFromAny( TclInitCompileEnv(interp, &compEnv, stringPtr, length, iPtr->invokeCmdFramePtr, iPtr->invokeWord); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr->refCount++; + /* * Now we check if we have data about invisible continuation lines for the @@ -811,6 +1135,7 @@ TclSetByteCodeFromAny( compEnv.clNext = &clLocPtr->loc[0]; } + //!!!!!! printf("**** byte-code : %d, %p, %.80s\n", clLocPtr != NULL, objPtr, Tcl_GetString(objPtr)); TclCompileScript(interp, stringPtr, length, &compEnv); /* @@ -834,6 +1159,8 @@ TclSetByteCodeFromAny( iPtr->compiledProcPtr = procPtr; TclInitCompileEnv(interp, &compEnv, stringPtr, length, iPtr->invokeCmdFramePtr, iPtr->invokeWord); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr->refCount++; if (clLocPtr) { compEnv.clNext = &clLocPtr->loc[0]; } @@ -1288,7 +1615,7 @@ CompileSubstObj( Interp *iPtr = (Interp *) interp; ByteCode *codePtr = NULL; - if (objPtr->typePtr == &substCodeType) { + if (objPtr->typePtr == &tclSubstCodeType) { Namespace *nsPtr = iPtr->varFramePtr->nsPtr; codePtr = objPtr->internalRep.twoPtrValue.ptr1; @@ -1302,19 +1629,21 @@ CompileSubstObj( FreeSubstCodeInternalRep(objPtr); } } - if (objPtr->typePtr != &substCodeType) { + if (objPtr->typePtr != &tclSubstCodeType) { CompileEnv compEnv; int numBytes; - const char *bytes = Tcl_GetStringFromObj(objPtr, &numBytes); + const char *bytes = Tcl_GetUtfFromObj(objPtr, &numBytes); /* TODO: Check for more TIP 280 */ TclInitCompileEnv(interp, &compEnv, bytes, numBytes, NULL, 0); TclSubstCompile(interp, bytes, numBytes, flags, 1, &compEnv); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr->refCount++; TclEmitOpcode(INST_DONE, &compEnv); TclInitByteCodeObj(objPtr, &compEnv); - objPtr->typePtr = &substCodeType; + objPtr->typePtr = &tclSubstCodeType; TclFreeCompileEnv(&compEnv); codePtr = objPtr->internalRep.twoPtrValue.ptr1; @@ -1422,6 +1751,7 @@ TclInitCompileEnv( envPtr->iPtr = iPtr; envPtr->source = stringPtr; envPtr->numSrcBytes = numBytes; + envPtr->strSegPtr = NULL; envPtr->procPtr = iPtr->compiledProcPtr; iPtr->compiledProcPtr = NULL; envPtr->numCommands = 0; @@ -1666,6 +1996,9 @@ TclFreeCompileEnv( ReleaseCmdWordData(envPtr->extCmdMapPtr); envPtr->extCmdMapPtr = NULL; } + if (envPtr->strSegPtr) { + TclFreeStringSegment(envPtr->strSegPtr); + } } /* @@ -1833,7 +2166,7 @@ TclCompileInvocation( continue; } - objIdx = TclRegisterNewLiteral(envPtr, + objIdx = TclRegisterCodeSegmentLiteral(envPtr, tokenPtr[1].start, tokenPtr[1].size); if (envPtr->clNext) { TclContinuationsEnterDerived(TclFetchLiteral(envPtr, objIdx), @@ -2831,8 +3164,7 @@ TclInitByteCodeObj( codePtr->objArrayPtr = (Tcl_Obj **) p; for (i = 0; i < numLitObjects; i++) { Tcl_Obj *fetched = TclFetchLiteral(envPtr, i); - - if (objPtr == fetched) { + if (fetched == objPtr) { /* * Prevent circular reference where the bytecode intrep of * a value contains a literal which is that same value. @@ -2844,15 +3176,12 @@ TclInitByteCodeObj( * can be sure we do not have any lingering cycles hiding in * the intrep. */ - int numBytes; - const char *bytes = Tcl_GetStringFromObj(objPtr, &numBytes); - - codePtr->objArrayPtr[i] = Tcl_NewStringObj(bytes, numBytes); - Tcl_IncrRefCount(codePtr->objArrayPtr[i]); + fetched = TclCopyByteCodeObject(fetched); + Tcl_IncrRefCount(fetched); + /* Release old fetched (it calls Tcl_DecrRefCount() for us) */ TclReleaseLiteral((Tcl_Interp *)iPtr, objPtr); - } else { - codePtr->objArrayPtr[i] = fetched; } + codePtr->objArrayPtr[i] = fetched; } p += TCL_ALIGN(objArrayBytes); /* align exception range array */ diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 2da31471734d..b5dcab49fa78 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -201,6 +201,30 @@ typedef struct ExtCmdLoc { int nuloc; /* Number of used entries in 'loc'. */ } ExtCmdLoc; +typedef struct StringSegment StringSegment; + +typedef struct StringSegment { + TCL_HASH_TYPE hash; /* Hash of this string segment. */ + size_t refCount; /* Count all references of this segment. */ + StringSegment *parentPtr; /* Parent segment (sharing string memory). */ + union { + char *ptr; /* Pointer to string (parentPtr == NULL) or */ + size_t offset; /* offset to string part (parentPtr != NULL). */ + } bytes; + int length; /* Size of string in bytes. */ + unsigned int line; /* Line in source this object can be found. */ +} StringSegment; + +#define TclGetStringSegmentBytes(strSegPtr) \ + (!(strSegPtr)->parentPtr ? (strSegPtr)->bytes.ptr : \ + (strSegPtr)->parentPtr->bytes.ptr + (strSegPtr)->bytes.offset) + +MODULE_SCOPE Tcl_Obj * TclNewCodeSegmentObj(StringSegment *strSegPtr, + const char *bytes, unsigned long length); + +MODULE_SCOPE StringSegment *TclGetStringSegmentFromObj(Tcl_Obj *objPtr); +MODULE_SCOPE void TclFreeStringSegment(StringSegment *strSegPtr); + /* * CompileProcs need the ability to record information during compilation that * can be used by bytecode instructions during execution. The AuxData @@ -290,6 +314,8 @@ typedef struct CompileEnv { * owned by the CompileEnv and must not be * freed or changed by it. */ int numSrcBytes; /* Number of bytes in source. */ + StringSegment *strSegPtr; /* Top level code segment compiling currently, + * mostly conforms with source/numSrcBytes. */ Proc *procPtr; /* If a procedure is being compiled, a pointer * to its Proc structure; otherwise NULL. Used * to compile local variables. Set from @@ -439,6 +465,8 @@ typedef struct ByteCode { * was compiled. Note that this pointer is not * owned by the ByteCode and must not be freed * or modified by it. */ + StringSegment *strSegPtr; /* Top level code segment of the byte code, + * mostly conforms with source/numSrcBytes. */ Proc *procPtr; /* If the ByteCode was compiled from a * procedure body, this is a pointer to its * Proc structure; otherwise NULL. This @@ -1271,6 +1299,10 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, #define TclRegisterNewCmdLiteral(envPtr, bytes, length) \ TclRegisterLiteral(envPtr, (char *)(bytes), length, LITERAL_CMD_NAME) +#define TclRegisterCodeSegmentLiteral(envPtr, bytes, length) \ + TclAddLiteralObj((envPtr), \ + TclNewCodeSegmentObj(envPtr->strSegPtr, (bytes), (length)), 0) + /* * Macro used to manually adjust the stack requirements; used in cases where * the stack effect cannot be computed from the opcode and its operands, but diff --git a/generic/tclExecute.c b/generic/tclExecute.c index d5c438f10333..9d3690345bb4 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -778,11 +778,11 @@ static Tcl_NRPostProc TEBCresume; * compiled bytecode for Tcl expressions. */ -static const Tcl_ObjType exprCodeType = { +const Tcl_ObjType tclExprCodeType = { "exprcode", FreeExprCodeInternalRep, /* freeIntRepProc */ DupExprCodeInternalRep, /* dupIntRepProc */ - NULL, /* updateStringProc */ + TclUpdateStringOfByteCode, /* updateStringProc */ NULL /* setFromAnyProc */ }; @@ -1524,7 +1524,7 @@ CompileExprObj( * Get the expression ByteCode from the object. If it exists, make sure it * is valid in the current context. */ - if (objPtr->typePtr == &exprCodeType) { + if (objPtr->typePtr == &tclExprCodeType) { Namespace *namespacePtr = iPtr->varFramePtr->nsPtr; codePtr = objPtr->internalRep.twoPtrValue.ptr1; @@ -1536,15 +1536,18 @@ CompileExprObj( FreeExprCodeInternalRep(objPtr); } } - if (objPtr->typePtr != &exprCodeType) { + if (objPtr->typePtr != &tclExprCodeType) { /* * TIP #280: No invoker (yet) - Expression compilation. */ int length; - const char *string = TclGetStringFromObj(objPtr, &length); + const char *string = Tcl_GetUtfFromObj(objPtr, &length); TclInitCompileEnv(interp, &compEnv, string, length, NULL, 0); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr->refCount++; + TclCompileExpr(interp, string, length, &compEnv, 0); /* @@ -1565,7 +1568,7 @@ CompileExprObj( TclEmitOpcode(INST_DONE, &compEnv); TclInitByteCodeObj(objPtr, &compEnv); - objPtr->typePtr = &exprCodeType; + objPtr->typePtr = &tclExprCodeType; TclFreeCompileEnv(&compEnv); codePtr = objPtr->internalRep.twoPtrValue.ptr1; if (iPtr->varFramePtr->localCachePtr) { diff --git a/generic/tclInt.h b/generic/tclInt.h index 8cc34e2d3614..8414b1ca94ad 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2681,13 +2681,16 @@ MODULE_SCOPE const Tcl_ObjType tclBignumType; MODULE_SCOPE const Tcl_ObjType tclBooleanType; MODULE_SCOPE const Tcl_ObjType tclByteArrayType; MODULE_SCOPE const Tcl_ObjType tclByteCodeType; +MODULE_SCOPE const Tcl_ObjType tclCodeSegmentType; MODULE_SCOPE const Tcl_ObjType tclDoubleType; MODULE_SCOPE const Tcl_ObjType tclEndOffsetType; +MODULE_SCOPE const Tcl_ObjType tclExprCodeType; MODULE_SCOPE const Tcl_ObjType tclIntType; MODULE_SCOPE const Tcl_ObjType tclListType; MODULE_SCOPE const Tcl_ObjType tclDictType; MODULE_SCOPE const Tcl_ObjType tclProcBodyType; MODULE_SCOPE const Tcl_ObjType tclStringType; +MODULE_SCOPE const Tcl_ObjType tclSubstCodeType; MODULE_SCOPE const Tcl_ObjType tclArraySearchType; MODULE_SCOPE const Tcl_ObjType tclEnsembleCmdType; #ifndef TCL_WIDE_INT_IS_LONG @@ -3297,6 +3300,10 @@ MODULE_SCOPE int Tcl_DisassembleObjCmd(ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); +MODULE_SCOPE const char * Tcl_GetUtfFromObj(Tcl_Obj *objPtr, int *lengthPtr); +MODULE_SCOPE void TclUpdateStringOfByteCode(Tcl_Obj *objPtr); +MODULE_SCOPE Tcl_Obj * TclCopyByteCodeObject(Tcl_Obj *objPtr); + /* Assemble command function */ MODULE_SCOPE int Tcl_AssembleObjCmd(ClientData clientData, Tcl_Interp *interp, int objc, diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index 16185e6070b4..d716bc3659c8 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -201,15 +201,8 @@ TclCreateLiteral( globalPtr = globalPtr->nextPtr) { objPtr = globalPtr->objPtr; if (globalPtr->nsPtr == nsPtr) { - /* - * Literals should always have UTF-8 representations... but this - * is not guaranteed so we need to be careful anyway. - * - * https://stackoverflow.com/q/54337750/301832 - */ - int objLength; - char *objBytes = TclGetStringFromObj(objPtr, &objLength); + const char *objBytes = Tcl_GetUtfFromObj(objPtr, &objLength); if ((objLength == length) && ((length == 0) || ((objBytes[0] == bytes[0]) @@ -607,12 +600,11 @@ TclAddLiteralObj( * NULL. */ { register LiteralEntry *lPtr; - int objIndex; + int objIndex = envPtr->literalArrayNext; - if (envPtr->literalArrayNext >= envPtr->literalArrayEnd) { + if (objIndex >= envPtr->literalArrayEnd) { ExpandLocalLiteralArray(envPtr); } - objIndex = envPtr->literalArrayNext; envPtr->literalArrayNext++; lPtr = &envPtr->literalArrayPtr[objIndex]; @@ -822,7 +814,7 @@ TclReleaseLiteral( } globalTablePtr = &iPtr->literalTable; - bytes = TclGetStringFromObj(objPtr, &length); + bytes = Tcl_GetUtfFromObj(objPtr, &length); index = (HashString(bytes, length) & globalTablePtr->mask); /* diff --git a/generic/tclObj.c b/generic/tclObj.c index 255614a60e93..cbc305fcb345 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -15,6 +15,7 @@ */ #include "tclInt.h" +#include "tclCompile.h" #include "tommath.h" #include @@ -577,6 +578,7 @@ TclContinuationsEnter( Tcl_HashEntry *hPtr = Tcl_CreateHashEntry(tsdPtr->lineCLPtr, objPtr, &newEntry); ContLineLoc *clLocPtr = ckalloc(sizeof(ContLineLoc) + num*sizeof(int)); + //!!!!!! printf("++++ cont-enter: %d, %p, %.80s\n", newEntry, objPtr, Tcl_GetString(objPtr)); if (!newEntry) { /* @@ -663,7 +665,7 @@ TclContinuationsEnterDerived( * better way which doesn't shimmer?) */ - TclGetStringFromObj(objPtr, &length); + Tcl_GetUtfFromObj(objPtr, &length); end = start + length; /* First char after the word */ /* @@ -1697,6 +1699,69 @@ Tcl_GetStringFromObj( return objPtr->bytes; } +/* + *---------------------------------------------------------------------- + * + * Tcl_GetUtfFromObj -- + * + * Returns the non-NTS utf-8 string representation's pointer and length + * for an object. + * + * Results: + * Returns a pointer to the unalterable string representation of objPtr, + * which in opposite to Tcl_GetStringFromObj (Tcl_GetString) is not + * guaranteed null-terminated string (NTS) string for example could be + * a part of some buffer (or code). + * + * Side effects: + * May call the object's updateStringProc to update the string + * representation from the internal representation. + * + *---------------------------------------------------------------------- + */ + +const char * +Tcl_GetUtfFromObj( + register Tcl_Obj *objPtr, /* Object whose string rep byte pointer should + * be returned. */ + register int *lengthPtr) /* The location where the length (in bytes) + * should be stored. */ +{ + const char *bytes; + + /* + * Prefer direct string rep, use type-related mechanisms to obtain it. + */ + if (objPtr->typePtr == &tclCodeSegmentType) { + *lengthPtr = (int)objPtr->internalRep.ptrAndLongRep.value; + return (const char *)objPtr->internalRep.ptrAndLongRep.ptr; + } + + if (objPtr->typePtr + && (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) + ) { + /* ByteCode */ + ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + *lengthPtr = codePtr->numSrcBytes; + return codePtr->source; + } + + /* + * Already available. + */ + if ((bytes = objPtr->bytes)) { + *lengthPtr = objPtr->length; + return bytes; + } + + /* + * Fallback to retrieve string representation. + */ + bytes = Tcl_GetString(objPtr); + *lengthPtr = objPtr->length; + return bytes; +} + /* *---------------------------------------------------------------------- * diff --git a/generic/tclProc.c b/generic/tclProc.c index af76155c1bcb..1e639ab0f039 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -332,20 +332,7 @@ TclCreateProc( */ if (Tcl_IsShared(bodyPtr)) { - const char *bytes; - int length; - Tcl_Obj *sharedBodyPtr = bodyPtr; - - bytes = TclGetStringFromObj(bodyPtr, &length); - bodyPtr = Tcl_NewStringObj(bytes, length); - - /* - * TIP #280. - * Ensure that the continuation line data for the original body is - * not lost and applies to the new body as well. - */ - - TclContinuationsCopy(bodyPtr, sharedBodyPtr); + bodyPtr = TclCopyByteCodeObject(bodyPtr); } /* @@ -373,7 +360,7 @@ TclCreateProc( * in the Proc. */ - result = Tcl_ListObjGetElements(interp , argsPtr ,&numArgs ,&argArray); + result = Tcl_ListObjGetElements(interp, argsPtr, &numArgs, &argArray); if (result != TCL_OK) { goto procError; } From a9401c5ed0284dcf85bc7423a5ca5f8832527e7d Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 18 Jun 2019 20:08:05 +0200 Subject: [PATCH 04/16] review (small fixes after back-porting) --- generic/tclAssembly.c | 36 +---- generic/tclCompile.c | 328 ++++++++++++++++++++++++++++++------------ generic/tclExecute.c | 41 +----- generic/tclInt.h | 2 + generic/tclListObj.c | 2 +- generic/tclLiteral.c | 2 +- generic/tclOOMethod.c | 2 - generic/tclObj.c | 34 ++++- generic/tclProc.c | 4 +- 9 files changed, 275 insertions(+), 176 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 2c7a94c50a89..3cb102b11856 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -279,7 +279,6 @@ static int CreateMirrorJumpTable(AssemblyEnv* assemEnvPtr, static int FindLocalVar(AssemblyEnv* envPtr, Tcl_Token** tokenPtrPtr); static int FinishAssembly(AssemblyEnv*); -static void FreeAssembleCodeInternalRep(Tcl_Obj *objPtr); static void FreeAssemblyEnv(AssemblyEnv*); static int GetBooleanOperand(AssemblyEnv*, Tcl_Token**, int*); static int GetListIndexOperand(AssemblyEnv*, Tcl_Token**, int*); @@ -319,7 +318,7 @@ static void UnstackExpiredCatches(CompileEnv*, BasicBlock*, int, static const Tcl_ObjType assembleCodeType = { "assemblecode", - FreeAssembleCodeInternalRep, /* freeIntRepProc */ + TclFreeByteCodeInternalRep, /* freeIntRepProc */ DupAssembleCodeInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ NULL /* setFromAnyProc */ @@ -875,7 +874,7 @@ CompileAssembleObj( strSegPtr = codePtr->strSegPtr; strSegPtr->refCount++; - FreeAssembleCodeInternalRep(objPtr); + TclInvalidateByteCodeInternalRep(objPtr); } else { strSegPtr = TclGetStringSegmentFromObj(objPtr); strSegPtr->refCount++; @@ -4320,37 +4319,6 @@ DupAssembleCodeInternalRep( return; } -/* - *----------------------------------------------------------------------------- - * - * FreeAssembleCodeInternalRep -- - * - * Part of the Tcl object type implementation for Tcl expression - * bytecode. Frees the storage allocated to hold the internal rep, unless - * ref counts indicate bytecode execution is still in progress. - * - * Results: - * None. - * - * Side effects: - * May free allocated memory. Leaves objPtr untyped. - * - *----------------------------------------------------------------------------- - */ - -static void -FreeAssembleCodeInternalRep( - Tcl_Obj *objPtr) -{ - ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; - - codePtr->refCount--; - if (codePtr->refCount <= 0) { - TclCleanupByteCode(codePtr); - } - objPtr->typePtr = NULL; -} - /* * Local Variables: * mode: c diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 5fbb314803ca..e44ec1abac1b 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -676,8 +676,6 @@ static void EnterCmdExtentData(CompileEnv *envPtr, int cmdNumber, int numSrcBytes, int numCodeBytes); static void EnterCmdStartData(CompileEnv *envPtr, int cmdNumber, int srcOffset, int codeOffset); -static void FreeByteCodeInternalRep(Tcl_Obj *objPtr); -static void FreeSubstCodeInternalRep(Tcl_Obj *objPtr); static int GetCmdLocEncodingSize(CompileEnv *envPtr); static int IsCompactibleCompileEnv(Tcl_Interp *interp, CompileEnv *envPtr); @@ -705,7 +703,7 @@ static void ReleaseCmdWordData(ExtCmdLoc *eclPtr); const Tcl_ObjType tclByteCodeType = { "bytecode", /* name */ - FreeByteCodeInternalRep, /* freeIntRepProc */ + TclFreeByteCodeInternalRep, /* freeIntRepProc */ DupByteCodeInternalRep, /* dupIntRepProc */ TclUpdateStringOfByteCode, /* updateStringProc */ SetByteCodeFromAny /* setFromAnyProc */ @@ -718,7 +716,7 @@ const Tcl_ObjType tclByteCodeType = { const Tcl_ObjType tclSubstCodeType = { "substcode", /* name */ - FreeSubstCodeInternalRep, /* freeIntRepProc */ + TclFreeByteCodeInternalRep, /* freeIntRepProc */ DupByteCodeInternalRep, /* dupIntRepProc - shared with bytecode */ TclUpdateStringOfByteCode, /* updateStringProc */ NULL, /* setFromAnyProc */ @@ -738,7 +736,9 @@ void TclFreeStringSegment(StringSegment *strSegPtr) { char *bytes = strSegPtr->bytes.ptr; ckfree(strSegPtr); if (!parentPtr) { - ckfree(bytes); + if (bytes != tclEmptyStringRep) { + ckfree(bytes); + } break; } /* parent reference "recursively" */ @@ -768,38 +768,39 @@ DupCodeSegmentInternalRep( static inline void TclObtainObjStringSegmentBytes( register Tcl_Obj *objPtr, /* Object whose string rep to obtain. */ - StringSegment *strSegPtr, /* String segment to obtain bytes from. */ - size_t offset) /* Offset of string in segment. */ + const char *segBytes) /* String of segment to obtain bytes from. */ { /* segment bytes are still used - copy it */ char * bytes = ckalloc(objPtr->length + 1); - memcpy(bytes, TclGetStringSegmentBytes(strSegPtr) + offset, - objPtr->length); + memcpy(bytes, segBytes, objPtr->length); bytes[objPtr->length] = '\0'; objPtr->bytes = bytes; - /* objPtr->length = strSegPtr->length; */ +#if 0 /* *****todo**** remove this check */ if (objPtr->length > 1000) { - Tcl_Panic("unexpected codeSegment 2 string!!!"); + Tcl_Panic("unexpected codeSegment 2 string!!!"); } +#endif } static void -FreeCodeSegmentInternalRep( - register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ +FreeStringSegmentInternalRep( + register Tcl_Obj *objPtr, /* Object whose internal rep loses segment. */ + StringSegment *strSegPtr, + size_t offset) /* String segment to dereference. */ { - StringSegment *strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; - - objPtr->typePtr = NULL; - + const char *bytes = TclGetStringSegmentBytes(strSegPtr) + offset; /* * If string of object may be still needed, update it right now. + * Don't use TclObjBeingDeleted unless TclFreeObj uses special + * semantic to delete this type of objects: + * (typePtr == NULL) instead of (length == -1) */ - if (objPtr->refCount) { + if (objPtr->typePtr) { /* rather a type switch (shimmering), string rep may be needed */ - if (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr) { + if (!offset && (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr)) { /* only possible on root segment, so check if segment is not shared * obtain last reference and simply free segment */ if ( strSegPtr->refCount == 1 @@ -813,14 +814,25 @@ FreeCodeSegmentInternalRep( ckfree(strSegPtr); return; } - - /* segment bytes are still used - copy it */ - TclObtainObjStringSegmentBytes(objPtr, strSegPtr, - (size_t)objPtr->internalRep.twoPtrValue.ptr2); } + + /* segment bytes are still used (or gets dereferenced below) - copy it */ + if ( !objPtr->bytes + || (objPtr->bytes >= bytes && objPtr->bytes <= bytes + strSegPtr->length) + ) { + TclObtainObjStringSegmentBytes(objPtr, bytes); + } + + objPtr->typePtr = NULL; } else { - /* release object - we don't need string rep at all */ - if (objPtr->bytes == strSegPtr->bytes.ptr) { + /* + * Check if bytes points to the shared string segment area, in order to avoid + * calling of ckfree for this pointer. + */ + if (objPtr->bytes >= bytes + && objPtr->bytes <= bytes + strSegPtr->length + ) { + /* reset string rep - we don't need it at all */ objPtr->bytes = NULL; } } @@ -829,6 +841,16 @@ FreeCodeSegmentInternalRep( TclFreeStringSegment(strSegPtr); } +static void +FreeCodeSegmentInternalRep( + register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ +{ + StringSegment *strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; + + FreeStringSegmentInternalRep(objPtr, strSegPtr, + (size_t)objPtr->internalRep.twoPtrValue.ptr2); +} + /* *---------------------------------------------------------------------- * @@ -864,7 +886,7 @@ UpdateStringOfCodeSegment( objPtr->bytes = bytes; } else { /* obtain a copy to bytes (we need NTS for backwards compatibility) */ - TclObtainObjStringSegmentBytes(objPtr, strSegPtr, offset); + TclObtainObjStringSegmentBytes(objPtr, bytes); } } @@ -914,10 +936,7 @@ TclNewCodeSegmentObj( const char *segBytes = TclGetStringSegmentBytes(strSegPtr); offset = bytes - segBytes; if (offset) { - if (strSegPtr->parentPtr) { - strSegPtr = strSegPtr->parentPtr; - } - assert(offset < segBytes + strSegPtr->length); + assert(offset < (size_t)strSegPtr->length); } strSegPtr->refCount++; } else { @@ -964,27 +983,24 @@ TclUpdateStringOfByteCode( ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; StringSegment *strSegPtr = codePtr->strSegPtr; + char *bytes; + + if (!strSegPtr) { + Tcl_Panic("update string of byte code object without string segment"); + return; + } + bytes = TclGetStringSegmentBytes(strSegPtr); + /* * If we can obtain the source string from code-segment (whole bytes of the * top level code segment is NTS), use it directly (don't copy it). */ - if (csegPtr && !csegPtr->parentPtr) { - /* use it directly as reference (as long as object retains codeSegment) */ - objPtr->bytes = csegPtr->bytes; - objPtr->length = csegPtr->length; - } else { - /* use it directly as reference (as long as object retains codeSegment) */ - char * bytes = ckalloc(codePtr->numSrcBytes + 1); - - memcpy(bytes, codePtr->source, codePtr->numSrcBytes); - bytes[codePtr->numSrcBytes] = '\0'; + if (!bytes[strSegPtr->length]) { + /* use it directly (as long as object retains segment reference) */ objPtr->bytes = bytes; - objPtr->length = codePtr->numSrcBytes; - - /* *****todo**** remove this check */ - if (objPtr->length > 1000) { - Tcl_Panic("unexpected byteCode 2 string!!!"); - } + } else { + /* obtain a copy to bytes (we need NTS for backwards compatibility) */ + TclObtainObjStringSegmentBytes(objPtr, bytes); } } @@ -1032,24 +1048,48 @@ StringSegment * TclGetStringSegmentFromObj( Tcl_Obj *objPtr) { - if (objPtr->typePtr == &tclCodeSegmentType) { - /* CodeSegment */ + const Tcl_ObjType *typePtr = objPtr->typePtr; + + if (typePtr == &tclCodeSegmentType) { + /* CodeSegment */ return (StringSegment *)objPtr->internalRep.twoPtrValue.ptr1; } - if (objPtr->typePtr - && (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) - ) { + if (!objPtr->typePtr) { + StringSegment *strSegPtr; + +wrapObj: + /* No type, wrap it to tclCodeSegmentType*/ + strSegPtr = ckalloc(sizeof(StringSegment)); + + strSegPtr->refCount = 1; + strSegPtr->parentPtr = NULL; + strSegPtr->bytes.ptr = (char *)objPtr->bytes; + strSegPtr->length = objPtr->length; + strSegPtr->line = 0; + + objPtr->typePtr = &tclCodeSegmentType; + objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + objPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* offset is 0 */ + return strSegPtr; + } + + if (typePtr->updateStringProc == TclUpdateStringOfByteCode) { /* ByteCode */ ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; - return codePtr->strSegPtr; + if (codePtr->strSegPtr) { + return codePtr->strSegPtr; + } } +#if 1 + TclFreeIntRep(objPtr); + goto wrapObj; +#else /* *****todo**** rewrite this - wrap obj to a string segment or return new (rather impossible because of bytes sharing & offsets) */ - if (objPtr->length > 1000) { - Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE"); - } + Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE"); return NULL; +#endif } /* @@ -1106,6 +1146,11 @@ TclSetByteCodeFromAny( stringPtr = Tcl_GetUtfFromObj(objPtr, &length); + if (objPtr->typePtr && objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) { + /* already byte code - invalidate it (switch to string segment and free code) */ + TclInvalidateByteCodeInternalRep(objPtr); + } + /* * TIP #280: Pick up the CmdFrame in which the BC compiler was invoked and * use to initialize the tracking in the compiler. This information was @@ -1253,7 +1298,7 @@ SetByteCodeFromAny( * * Part of the bytecode Tcl object type implementation. However, it does * not copy the internal representation of a bytecode Tcl_Obj, but - * instead leaves the new object untyped (with a NULL type pointer). + * instead tries to set the type of new object to tclCodeSegmentType. * Code will be compiled for the new object only if necessary. * * Results: @@ -1270,13 +1315,70 @@ DupByteCodeInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { - return; + register ByteCode *codePtr = srcPtr->internalRep.twoPtrValue.ptr1; + StringSegment *strSegPtr; + + if ((strSegPtr = codePtr->strSegPtr)) { + const char *bytes = TclGetStringSegmentBytes(strSegPtr); + size_t offset = codePtr->source - bytes; + + strSegPtr->refCount++; + copyPtr->typePtr = &tclCodeSegmentType; + copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + copyPtr->internalRep.twoPtrValue.ptr2 = (void *)offset; + copyPtr->length = codePtr->numSrcBytes; + return; + } +} + +/* + *---------------------------------------------------------------------- + * + * TclInvalidateByteCodeInternalRep -- + * + * Part of the bytecode Tcl object type implementation. Invalidates rep + * associated with a bytecode object. + * + * Results: + * None. + * + * Side effects: + * The bytecode object's internal rep is marked invalid (actually it + * switches the internal representation to code segment type). + * + *---------------------------------------------------------------------- + */ + +void +TclInvalidateByteCodeInternalRep( + register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ +{ + register ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + StringSegment *strSegPtr; + + objPtr->typePtr = NULL; + + if ((strSegPtr = codePtr->strSegPtr)) { + const char *bytes = TclGetStringSegmentBytes(strSegPtr); + size_t offset = codePtr->source - bytes; + + codePtr->strSegPtr = NULL; /* obtain reference to string segment */ + objPtr->typePtr = &tclCodeSegmentType; + objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + objPtr->internalRep.twoPtrValue.ptr2 = (void *)offset; + objPtr->length = codePtr->numSrcBytes; + } + + /* free byte code internal representation */ + if (codePtr->refCount-- <= 1) { + TclCleanupByteCode(codePtr); + } } /* *---------------------------------------------------------------------- * - * FreeByteCodeInternalRep -- + * TclFreeByteCodeInternalRep -- * * Part of the bytecode Tcl object type implementation. Frees the storage * associated with a bytecode object's internal representation unless its @@ -1293,13 +1395,43 @@ DupByteCodeInternalRep( *---------------------------------------------------------------------- */ -static void -FreeByteCodeInternalRep( +void +TclFreeByteCodeInternalRep( register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ { register ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + StringSegment *strSegPtr = codePtr->strSegPtr; - objPtr->typePtr = NULL; + /* + * If string of object may be still needed, update it right now. + * Don't use TclObjBeingDeleted unless TclFreeObj uses special + * semantic to delete this type of objects: + * (typePtr == NULL) instead of (length == -1) + */ + if (objPtr->typePtr) { + /* object seems to switch its representation (not a free) */ + + codePtr->strSegPtr = NULL; /* obtain reference to string segment */ + if (strSegPtr) { + FreeStringSegmentInternalRep(objPtr, strSegPtr, 0); + } + + objPtr->typePtr = NULL; + } else if (objPtr->bytes) { + /* check string rep is shared with a string segment */ + if (strSegPtr) { + const char *bytes = TclGetStringSegmentBytes(strSegPtr); + + if ( !objPtr->bytes + || (objPtr->bytes >= bytes && objPtr->bytes <= bytes + strSegPtr->length) + ) { + /* shared - simply reset it */ + objPtr->bytes = NULL; + } + } + } + + /* free byte code internal representation */ if (codePtr->refCount-- <= 1) { TclCleanupByteCode(codePtr); } @@ -1441,6 +1573,10 @@ TclCleanupByteCode( } TclHandleRelease(codePtr->interpHandle); + + if (codePtr->strSegPtr) { + TclFreeStringSegment(codePtr->strSegPtr); + } /* Correct code pointer to free */ if (bcLI) { @@ -1626,7 +1762,7 @@ CompileSubstObj( || (codePtr->nsEpoch != nsPtr->resolverEpoch) || (codePtr->localCachePtr != iPtr->varFramePtr->localCachePtr)) { - FreeSubstCodeInternalRep(objPtr); + TclInvalidateByteCodeInternalRep(objPtr); } } if (objPtr->typePtr != &tclSubstCodeType) { @@ -1663,38 +1799,6 @@ CompileSubstObj( return codePtr; } -/* - *---------------------------------------------------------------------- - * - * FreeSubstCodeInternalRep -- - * - * Part of the substcode Tcl object type implementation. Frees the - * storage associated with a substcode object's internal representation - * unless its code is actively being executed. - * - * Results: - * None. - * - * Side effects: - * The substcode object's internal rep is marked invalid and its code - * gets freed unless the code is actively being executed. In that case - * the cleanup is delayed until the last execution of the code completes. - * - *---------------------------------------------------------------------- - */ - -static void -FreeSubstCodeInternalRep( - register Tcl_Obj *objPtr) /* Object whose internal rep to free. */ -{ - register ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; - - objPtr->typePtr = NULL; - if (codePtr->refCount-- <= 1) { - TclCleanupByteCode(codePtr); - } -} - static void ReleaseCmdWordData( ExtCmdLoc *eclPtr) @@ -3228,9 +3332,43 @@ TclInitByteCodeObj( * by making its internal rep point to the just compiled ByteCode. */ - TclFreeIntRep(objPtr); + codePtr->strSegPtr = NULL; + if (objPtr->typePtr != &tclCodeSegmentType) { + TclFreeIntRep(objPtr); + } else { + size_t offset; + + codePtr->strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; + offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + /* if offset is not 0 - create new part of segment */ + if (offset) { + StringSegment *strSegPtr = ckalloc(sizeof(StringSegment)); + + strSegPtr->refCount = 1; + strSegPtr->parentPtr = codePtr->strSegPtr; + /* hold it simple referenced (only one parent deeply) */ + if (strSegPtr->parentPtr->parentPtr) { + offset += strSegPtr->parentPtr->bytes.offset; + strSegPtr->parentPtr = strSegPtr->parentPtr->parentPtr; + } + strSegPtr->parentPtr->refCount++; + strSegPtr->bytes.offset = offset; + strSegPtr->length = objPtr->length; + strSegPtr->line = 0; + + codePtr->strSegPtr = strSegPtr; + objPtr->bytes = NULL; + } + } objPtr->internalRep.twoPtrValue.ptr1 = codePtr; objPtr->typePtr = &tclByteCodeType; + if (!codePtr->strSegPtr) { + codePtr->strSegPtr = envPtr->strSegPtr; + if (codePtr->strSegPtr) { + codePtr->strSegPtr->refCount++; + objPtr->length = codePtr->strSegPtr->length; + } + } /* * TIP #280. Associate the extended per-word line information with the diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 9d3690345bb4..57e9ec19791d 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -751,7 +751,6 @@ static Tcl_Obj * ExecuteExtendedBinaryMathOp(Tcl_Interp *interp, Tcl_Obj *valuePtr, Tcl_Obj *value2Ptr); static Tcl_Obj * ExecuteExtendedUnaryMathOp(int opcode, Tcl_Obj *valuePtr); -static void FreeExprCodeInternalRep(Tcl_Obj *objPtr); static ExceptionRange * GetExceptRangeForPc(const unsigned char *pc, int searchMode, ByteCode *codePtr); static const char * GetSrcInfoForPc(const unsigned char *pc, @@ -780,7 +779,7 @@ static Tcl_NRPostProc TEBCresume; const Tcl_ObjType tclExprCodeType = { "exprcode", - FreeExprCodeInternalRep, /* freeIntRepProc */ + TclFreeByteCodeInternalRep, /* freeIntRepProc */ DupExprCodeInternalRep, /* dupIntRepProc */ TclUpdateStringOfByteCode, /* updateStringProc */ NULL /* setFromAnyProc */ @@ -1533,7 +1532,7 @@ CompileExprObj( || (codePtr->nsPtr != namespacePtr) || (codePtr->nsEpoch != namespacePtr->resolverEpoch) || (codePtr->localCachePtr != iPtr->varFramePtr->localCachePtr)) { - FreeExprCodeInternalRep(objPtr); + TclInvalidateByteCodeInternalRep(objPtr); } } if (objPtr->typePtr != &tclExprCodeType) { @@ -1547,7 +1546,7 @@ CompileExprObj( TclInitCompileEnv(interp, &compEnv, string, length, NULL, 0); compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); compEnv.strSegPtr->refCount++; - + TclCompileExpr(interp, string, length, &compEnv, 0); /* @@ -1620,36 +1619,6 @@ DupExprCodeInternalRep( return; } -/* - *---------------------------------------------------------------------- - * - * FreeExprCodeInternalRep -- - * - * Part of the Tcl object type implementation for Tcl expression - * bytecode. Frees the storage allocated to hold the internal rep, unless - * ref counts indicate bytecode execution is still in progress. - * - * Results: - * None. - * - * Side effects: - * May free allocated memory. Leaves objPtr untyped. - * - *---------------------------------------------------------------------- - */ - -static void -FreeExprCodeInternalRep( - Tcl_Obj *objPtr) -{ - ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; - - objPtr->typePtr = NULL; - if (codePtr->refCount-- <= 1) { - TclCleanupByteCode(codePtr); - } -} - /* *---------------------------------------------------------------------- * @@ -8146,9 +8115,7 @@ TEBCresume( } iPtr->cmdFramePtr = bcFramePtr->nextPtr; - if (codePtr->refCount-- <= 1) { - TclCleanupByteCode(codePtr); - } + TclReleaseByteCode(codePtr); TclStackFree(interp, TD); /* free my stack */ return result; diff --git a/generic/tclInt.h b/generic/tclInt.h index 8414b1ca94ad..dc9d563d14b3 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3301,6 +3301,8 @@ MODULE_SCOPE int Tcl_DisassembleObjCmd(ClientData clientData, Tcl_Obj *const objv[]); MODULE_SCOPE const char * Tcl_GetUtfFromObj(Tcl_Obj *objPtr, int *lengthPtr); +MODULE_SCOPE void TclFreeByteCodeInternalRep(Tcl_Obj *objPtr); +MODULE_SCOPE void TclInvalidateByteCodeInternalRep(Tcl_Obj *objPtr); MODULE_SCOPE void TclUpdateStringOfByteCode(Tcl_Obj *objPtr); MODULE_SCOPE Tcl_Obj * TclCopyByteCodeObject(Tcl_Obj *objPtr); diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 6eb6780ff3bb..10a5412d2d9e 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1846,7 +1846,7 @@ SetListFromAny( } } else { int estCount, length; - const char *limit, *nextElem = TclGetStringFromObj(objPtr, &length); + const char *limit, *nextElem = Tcl_GetUtfFromObj(objPtr, &length); /* * Allocate enough space to hold a (Tcl_Obj *) for each diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index d716bc3659c8..f0d9fd639db5 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -988,7 +988,7 @@ RebuildLiteralTable( for (oldChainPtr=oldBuckets ; oldSize>0 ; oldSize--,oldChainPtr++) { for (entryPtr=*oldChainPtr ; entryPtr!=NULL ; entryPtr=*oldChainPtr) { - bytes = TclGetStringFromObj(entryPtr->objPtr, &length); + bytes = Tcl_GetUtfFromObj(entryPtr->objPtr, &length); index = (HashString(bytes, length) & tablePtr->mask); *oldChainPtr = entryPtr->nextPtr; diff --git a/generic/tclOOMethod.c b/generic/tclOOMethod.c index 08d58c1e1971..219b2d0db796 100644 --- a/generic/tclOOMethod.c +++ b/generic/tclOOMethod.c @@ -1194,8 +1194,6 @@ CloneProcedureMethod( */ bodyObj = Tcl_DuplicateObj(pmPtr->procPtr->bodyPtr); - Tcl_GetString(bodyObj); - TclFreeIntRep(bodyObj); /* * Create the actual copy of the method record, manufacturing a new proc diff --git a/generic/tclObj.c b/generic/tclObj.c index cbc305fcb345..3376a7ba8ad1 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -18,6 +18,7 @@ #include "tclCompile.h" #include "tommath.h" #include +#include /* * Table of all object types. @@ -1420,8 +1421,27 @@ void TclFreeObj( register Tcl_Obj *objPtr) /* The object to be freed. */ { + const Tcl_ObjType *typePtr = objPtr->typePtr; + + if (objPtr->length == 40) { + int i = 0; + i++; + } /* - * Invalidate the string rep first so we can use the bytes value for our + * Firstly check special cases where string rep could be shared with + * objects internal representation, + */ + if ( typePtr == &tclCodeSegmentType + || (typePtr && typePtr->freeIntRepProc == TclFreeByteCodeInternalRep) + ) { + /* signal the object will be deleted and free it */ + objPtr->typePtr = NULL; + typePtr->freeIntRepProc(objPtr); + typePtr = NULL; + } + + /* + * Then invalidate the string rep so we can use the bytes value for our * pointer chain, and signal an obj deletion (as opposed to shimmering) * with 'length == -1'. */ @@ -1429,7 +1449,7 @@ TclFreeObj( TclInvalidateStringRep(objPtr); objPtr->length = -1; - if (!objPtr->typePtr || !objPtr->typePtr->freeIntRepProc) { + if (!typePtr || !typePtr->freeIntRepProc) { /* * objPtr can be freed safely, as it will not attempt to free any * other objects: it will not cause recursive calls to this function. @@ -1634,6 +1654,9 @@ Tcl_GetString( return objPtr->bytes; } + /* obscure case no string rep and no object type. */ + assert(objPtr->typePtr != NULL); + /* * Note we do not check for objPtr->typePtr == NULL. An invariant of * a properly maintained Tcl_Obj is that at least one of objPtr->bytes @@ -1733,8 +1756,11 @@ Tcl_GetUtfFromObj( * Prefer direct string rep, use type-related mechanisms to obtain it. */ if (objPtr->typePtr == &tclCodeSegmentType) { - *lengthPtr = (int)objPtr->internalRep.ptrAndLongRep.value; - return (const char *)objPtr->internalRep.ptrAndLongRep.ptr; + StringSegment *strSegPtr = objPtr->internalRep.twoPtrValue.ptr1; + size_t offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + *lengthPtr = (int)objPtr->length; + bytes = (const char *)TclGetStringSegmentBytes(strSegPtr) + offset; + return bytes; } if (objPtr->typePtr diff --git a/generic/tclProc.c b/generic/tclProc.c index 1e639ab0f039..952e0b8c35c0 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -241,7 +241,7 @@ Tcl_ProcObjCmd( * The argument list is just "args"; check the body */ - procBody = Tcl_GetStringFromObj(objv[3], &numBytes); + procBody = Tcl_GetUtfFromObj(objv[3], &numBytes); if (TclParseAllWhiteSpace(procBody, numBytes) < numBytes) { goto done; } @@ -1833,7 +1833,7 @@ TclProcCompileProc( codePtr->compileEpoch = iPtr->compileEpoch; codePtr->nsPtr = nsPtr; } else { - TclFreeIntRep(bodyPtr); + TclInvalidateByteCodeInternalRep(bodyPtr); } } From e6f9b97c495024f2fa16671ed9dc17141135265b Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 24 Jun 2019 13:40:17 +0200 Subject: [PATCH 05/16] fixed wrapping to the code segment object (info.test) --- generic/tclCompile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index e44ec1abac1b..b0ac2b119482 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -932,6 +932,7 @@ TclNewCodeSegmentObj( TclNewObj(objPtr); + assert(bytes != NULL); if (strSegPtr) { const char *segBytes = TclGetStringSegmentBytes(strSegPtr); offset = bytes - segBytes; @@ -1060,6 +1061,7 @@ TclGetStringSegmentFromObj( wrapObj: /* No type, wrap it to tclCodeSegmentType*/ + assert(objPtr->bytes != NULL); strSegPtr = ckalloc(sizeof(StringSegment)); strSegPtr->refCount = 1; @@ -1083,6 +1085,7 @@ TclGetStringSegmentFromObj( } #if 1 + TclGetString(objPtr); TclFreeIntRep(objPtr); goto wrapObj; #else From 58c790645ee6bfc1222e8a03dfbfb5b13924c952 Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 24 Jun 2019 16:25:57 +0200 Subject: [PATCH 06/16] introduced new function to check obj has bytes (now also if obj->bytes is NULL), fixed info.test --- generic/tclBasic.c | 2 +- generic/tclInt.h | 1 + generic/tclObj.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index d5ec2881061a..7f7fb78523a4 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -5828,7 +5828,7 @@ TclArgumentGet( * up by the caller. It knows better than us. */ - if ((obj->bytes == NULL) || TclListObjIsCanonical(obj)) { + if (!Tcl_ObjHasBytes(obj) || TclListObjIsCanonical(obj)) { return; } diff --git a/generic/tclInt.h b/generic/tclInt.h index dc9d563d14b3..91bdbfe87e54 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3301,6 +3301,7 @@ MODULE_SCOPE int Tcl_DisassembleObjCmd(ClientData clientData, Tcl_Obj *const objv[]); MODULE_SCOPE const char * Tcl_GetUtfFromObj(Tcl_Obj *objPtr, int *lengthPtr); +MODULE_SCOPE int Tcl_ObjHasBytes(Tcl_Obj *objPtr); MODULE_SCOPE void TclFreeByteCodeInternalRep(Tcl_Obj *objPtr); MODULE_SCOPE void TclInvalidateByteCodeInternalRep(Tcl_Obj *objPtr); MODULE_SCOPE void TclUpdateStringOfByteCode(Tcl_Obj *objPtr); diff --git a/generic/tclObj.c b/generic/tclObj.c index 3376a7ba8ad1..caabf3fc3cb3 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -1722,6 +1722,17 @@ Tcl_GetStringFromObj( return objPtr->bytes; } +int +Tcl_ObjHasBytes( + Tcl_Obj *objPtr) +{ + return ( + objPtr->bytes + || (objPtr->typePtr == &tclCodeSegmentType) + || (objPtr->typePtr && objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) + ); +} + /* *---------------------------------------------------------------------- * From 179e582a8d1a34105e69850dcbf13974c9d3bd5b Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 24 Jun 2019 21:47:30 +0200 Subject: [PATCH 07/16] fixed encoding.test (allow sharing of small literals) --- generic/tclCompile.c | 2 +- generic/tclCompile.h | 4 +-- generic/tclLiteral.c | 72 +++++++++++++++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 20 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index b0ac2b119482..a287c3b889ff 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1090,7 +1090,7 @@ TclGetStringSegmentFromObj( goto wrapObj; #else /* *****todo**** rewrite this - wrap obj to a string segment or return new (rather impossible because of bytes sharing & offsets) */ - Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE"); + Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s: %.80s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE", TclGetString(objPtr)); return NULL; #endif } diff --git a/generic/tclCompile.h b/generic/tclCompile.h index b5dcab49fa78..bc4e87c90f59 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1275,6 +1275,7 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, #define LITERAL_ON_HEAP 0x01 #define LITERAL_CMD_NAME 0x02 #define LITERAL_UNSHARED 0x04 +#define LITERAL_CODE_SEGMENT 0x08 /* * Form of TclRegisterLiteral with flags == 0. In that case, it is safe to @@ -1300,8 +1301,7 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, TclRegisterLiteral(envPtr, (char *)(bytes), length, LITERAL_CMD_NAME) #define TclRegisterCodeSegmentLiteral(envPtr, bytes, length) \ - TclAddLiteralObj((envPtr), \ - TclNewCodeSegmentObj(envPtr->strSegPtr, (bytes), (length)), 0) + TclRegisterLiteral(envPtr, (char *)(bytes), length, LITERAL_CODE_SEGMENT) /* * Macro used to manually adjust the stack requirements; used in cases where diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index f0d9fd639db5..b8d297c3f4dd 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -172,8 +172,9 @@ TclDeleteLiteralTable( */ Tcl_Obj * -TclCreateLiteral( +TclCreateLiteralEx( Interp *iPtr, + CompileEnv *envPtr, char *bytes, /* The start of the string. Note that this is * not a NUL-terminated string. */ int length, /* Number of bytes in the string. */ @@ -237,12 +238,16 @@ TclCreateLiteral( * table. */ - TclNewObj(objPtr); - if ((flags & LITERAL_ON_HEAP)) { - objPtr->bytes = bytes; - objPtr->length = length; + if (envPtr && (flags & LITERAL_CODE_SEGMENT)) { + objPtr = TclNewCodeSegmentObj(envPtr->strSegPtr, bytes, length); } else { - TclInitStringRep(objPtr, bytes, length); + TclNewObj(objPtr); + if ((flags & LITERAL_ON_HEAP)) { + objPtr->bytes = bytes; + objPtr->length = length; + } else { + TclInitStringRep(objPtr, bytes, length); + } } if ((flags & LITERAL_UNSHARED)) { @@ -315,6 +320,23 @@ TclCreateLiteral( *newPtr = 1; return objPtr; } + +Tcl_Obj * +TclCreateLiteral( + Interp *iPtr, + char *bytes, /* The start of the string. Note that this is + * not a NUL-terminated string. */ + int length, /* Number of bytes in the string. */ + unsigned hash, /* The string's hash. If -1, it will be + * computed here. */ + int *newPtr, + Namespace *nsPtr, + int flags, + LiteralEntry **globalPtrPtr) +{ + return TclCreateLiteralEx(iPtr, NULL, bytes, length, hash, newPtr, nsPtr, + flags, globalPtrPtr); +} /* *---------------------------------------------------------------------- @@ -408,10 +430,11 @@ TclRegisterLiteral( localHash = (hash & localTablePtr->mask); for (localPtr=localTablePtr->buckets[localHash] ; localPtr!=NULL; localPtr = localPtr->nextPtr) { - objPtr = localPtr->objPtr; - if ((objPtr->length == length) && ((length == 0) - || ((objPtr->bytes[0] == bytes[0]) - && (memcmp(objPtr->bytes, bytes, (unsigned) length) == 0)))) { + int objLength; + const char *objBytes = Tcl_GetUtfFromObj(localPtr->objPtr, &objLength); + if ((objLength == length) && ((length == 0) + || ((objBytes[0] == bytes[0]) + && (memcmp(objBytes, bytes, (unsigned) length) == 0)))) { if ((flags & LITERAL_ON_HEAP)) { ckfree(bytes); } @@ -419,16 +442,22 @@ TclRegisterLiteral( #ifdef TCL_COMPILE_DEBUG TclVerifyLocalLiteralTable(envPtr); #endif /*TCL_COMPILE_DEBUG*/ - +#if 0 + if (length < 10) { + printf("*****found**** %.*s for %.*s ... %d\n", objLength, objLength ? objBytes : "", length, length ? bytes : "", objIndex); + } +#endif return objIndex; } } /* - * The literal is new to this CompileEnv. If it is a command name, avoid - * sharing it accross namespaces, and try not to share it with non-cmd - * literals. Note that FQ command names can be shared, so that we register - * the namespace as the interp's global NS. + * The literal is new to this CompileEnv. + * If it is a code segment, avoid sharing if possible (could belong to + * different root code segments). + * If it is a command name, avoid sharing it accross namespaces, and try + * not to share it with non-cmd literals. Note that FQ command names can + * be shared, so that we register the namespace as the interp's global NS. */ if ((flags & LITERAL_CMD_NAME)) { @@ -446,8 +475,8 @@ TclRegisterLiteral( */ globalPtr = NULL; - objPtr = TclCreateLiteral(iPtr, bytes, length, hash, &new, nsPtr, flags, - &globalPtr); + objPtr = TclCreateLiteralEx(iPtr, envPtr, bytes, length, hash, &new, nsPtr, + flags, &globalPtr); objIndex = AddLocalLiteralEntry(envPtr, objPtr, localHash); #ifdef TCL_COMPILE_DEBUG @@ -617,6 +646,15 @@ TclAddLiteralObj( *litPtrPtr = lPtr; } +#if 0 + if (1) { + int length; + const char * bytes = Tcl_GetUtfFromObj(objPtr, &length); + if (length < 10) { + printf("*****ad+lt**** %.*s ... %d\n", length, length ? bytes : "", objIndex); + } + } +#endif return objIndex; } From 541127576f9e90e17eb7471705b8ce3ecf182fbf Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 16 Sep 2019 21:25:02 +0200 Subject: [PATCH 08/16] fixed compile lambda and CompileSubstObj - share segment of compiled object in compEnv.strSegPtr (env.test, exec.test) --- generic/tclCompile.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index a287c3b889ff..99b54bf75a6e 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -800,7 +800,9 @@ FreeStringSegmentInternalRep( */ if (objPtr->typePtr) { /* rather a type switch (shimmering), string rep may be needed */ - if (!offset && (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr)) { + if (!offset && (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr) + && !strSegPtr->bytes.ptr[strSegPtr->length] + ) { /* only possible on root segment, so check if segment is not shared * obtain last reference and simply free segment */ if ( strSegPtr->refCount == 1 @@ -1775,11 +1777,11 @@ CompileSubstObj( /* TODO: Check for more TIP 280 */ TclInitCompileEnv(interp, &compEnv, bytes, numBytes, NULL, 0); - - TclSubstCompile(interp, bytes, numBytes, flags, 1, &compEnv); compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); compEnv.strSegPtr->refCount++; + TclSubstCompile(interp, bytes, numBytes, flags, 1, &compEnv); + TclEmitOpcode(INST_DONE, &compEnv); TclInitByteCodeObj(objPtr, &compEnv); objPtr->typePtr = &tclSubstCodeType; From fa49025ca2faf45164f569918db303f72d9be646 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 18 Sep 2019 13:34:17 +0200 Subject: [PATCH 09/16] TclContinuationsEnter/TclContinuationsGet rewritten without cont-line table (replacement of TIP 530 applied), still shimmering problem with list objects becoming continuation (amend expected); extend List to avoid possible shimmering issues on invisible continuations. --- generic/tclAssembly.c | 2 +- generic/tclCompile.c | 129 +++++++++++++++++++++++++++++------- generic/tclCompile.h | 24 ------- generic/tclExecute.c | 2 +- generic/tclInt.h | 36 ++++++++++ generic/tclListObj.c | 12 ++++ generic/tclLiteral.c | 3 +- generic/tclObj.c | 149 ++++++++++-------------------------------- generic/tclParse.c | 8 +++ generic/tclResult.c | 7 +- 10 files changed, 208 insertions(+), 164 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 3cb102b11856..09ad569d0a3b 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -876,7 +876,7 @@ CompileAssembleObj( strSegPtr->refCount++; TclInvalidateByteCodeInternalRep(objPtr); } else { - strSegPtr = TclGetStringSegmentFromObj(objPtr); + strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); strSegPtr->refCount++; } diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 99b54bf75a6e..af30f746629d 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -734,9 +734,12 @@ void TclFreeStringSegment(StringSegment *strSegPtr) { while (strSegPtr->refCount-- <= 1) { StringSegment *parentPtr = strSegPtr->parentPtr; char *bytes = strSegPtr->bytes.ptr; + if (strSegPtr->clLocPtr) { + ckfree(strSegPtr->clLocPtr); + } ckfree(strSegPtr); if (!parentPtr) { - if (bytes != tclEmptyStringRep) { + if (bytes && bytes != tclEmptyStringRep) { ckfree(bytes); } break; @@ -800,8 +803,9 @@ FreeStringSegmentInternalRep( */ if (objPtr->typePtr) { /* rather a type switch (shimmering), string rep may be needed */ - if (!offset && (!objPtr->bytes || objPtr->bytes == strSegPtr->bytes.ptr) - && !strSegPtr->bytes.ptr[strSegPtr->length] + if (!offset && !strSegPtr->parentPtr + && (!objPtr->bytes || objPtr->bytes == bytes) + && !bytes[strSegPtr->length] ) { /* only possible on root segment, so check if segment is not shared * obtain last reference and simply free segment */ @@ -813,13 +817,13 @@ FreeStringSegmentInternalRep( * or the offset by equal lengths and bytes (pointers) */ objPtr->bytes = strSegPtr->bytes.ptr; - ckfree(strSegPtr); - return; + strSegPtr->bytes.ptr = NULL; + goto freeSeg; } } /* segment bytes are still used (or gets dereferenced below) - copy it */ - if ( !objPtr->bytes + if ( !objPtr->bytes || offset || (objPtr->bytes >= bytes && objPtr->bytes <= bytes + strSegPtr->length) ) { TclObtainObjStringSegmentBytes(objPtr, bytes); @@ -840,6 +844,7 @@ FreeStringSegmentInternalRep( } /* free reference(s) */ + freeSeg: TclFreeStringSegment(strSegPtr); } @@ -925,33 +930,72 @@ TclNewCodeSegmentObj( StringSegment *strSegPtr, /* Points to string segment to share. */ const char *bytes, /* Points to the first of the length bytes * used to initialize the new object. */ - unsigned long length) /* The length (in bytes) of "bytes" string + unsigned long length, /* The length (in bytes) of "bytes" string * when initializing the new object. */ + int flags) /* Flags of segment creation. */ { - register Tcl_Obj *objPtr; + Tcl_Obj *objPtr; size_t offset = 0; TclNewObj(objPtr); + objPtr->bytes = NULL; + assert(bytes != NULL); if (strSegPtr) { const char *segBytes = TclGetStringSegmentBytes(strSegPtr); + /* check bytes is included in segment (duplicate on demand only) */ + if ( (flags & TCLSEG_DUP_STRREP) + && (bytes < segBytes || bytes + length > segBytes + strSegPtr->length) + ) { + /* outside of parent/root segment */ + goto dupStrRep; + } + assert(bytes >= segBytes); offset = bytes - segBytes; if (offset) { - assert(offset < (size_t)strSegPtr->length); + assert(offset < (size_t)strSegPtr->length); } + /* object would share parent segment (directly or indirectly) */ strSegPtr->refCount++; + /* if fully included requested, check it (duplicate segment on demand) */ + if ( (flags & TCLSEG_FULL_SEGREP) + && (offset || length != strSegPtr->length) + ) { + /* not fully included - duplicate reference to parent */ + StringSegment *parSegPtr = strSegPtr; + + strSegPtr = ckalloc(sizeof(StringSegment)); + strSegPtr->refCount = 1; + strSegPtr->parentPtr = parSegPtr; /* refCount already incremented */ + strSegPtr->bytes.offset = offset; + strSegPtr->length = length; + strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; + offset = 0; + } } else { + if (flags & TCLSEG_DUP_STRREP) { + char *newBytes; + + dupStrRep: + newBytes = ckalloc(length+1); + memcpy(newBytes, bytes, length); + newBytes[length] = '\0'; + bytes = newBytes; + objPtr->bytes = newBytes; + } + strSegPtr = ckalloc(sizeof(StringSegment)); strSegPtr->refCount = 1; strSegPtr->parentPtr = NULL; strSegPtr->bytes.ptr = (char *)bytes; strSegPtr->length = length; strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; } - objPtr->bytes = NULL; objPtr->length = length; objPtr->typePtr = &tclCodeSegmentType; objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; @@ -1031,10 +1075,10 @@ TclCopyByteCodeObject( const char *bytes; int length; Tcl_Obj *cpyPtr; - StringSegment *strSegPtr = TclGetStringSegmentFromObj(objPtr); + StringSegment *strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); bytes = Tcl_GetUtfFromObj(objPtr, &length); - cpyPtr = TclNewCodeSegmentObj(strSegPtr, bytes, length); + cpyPtr = TclNewCodeSegmentObj(strSegPtr, bytes, length, 0); /* * TIP #280. @@ -1049,17 +1093,47 @@ TclCopyByteCodeObject( StringSegment * TclGetStringSegmentFromObj( - Tcl_Obj *objPtr) + Tcl_Obj *objPtr, /* Object to get segment from. */ + int flags) /* Flags to obtain/create segment inplace. */ { const Tcl_ObjType *typePtr = objPtr->typePtr; + StringSegment *strSegPtr; if (typePtr == &tclCodeSegmentType) { /* CodeSegment */ - return (StringSegment *)objPtr->internalRep.twoPtrValue.ptr1; + strSegPtr = (StringSegment *)objPtr->internalRep.twoPtrValue.ptr1; +#if 1 + if (flags & TCLSEG_FULL_SEGREP) { + /* check it owns fully included segment */ + if ( !strSegPtr->parentPtr + && (objPtr->internalRep.twoPtrValue.ptr2 /* offset */ + || objPtr->length != strSegPtr->length + ) + ) { + StringSegment *parSegPtr = strSegPtr; + + parSegPtr->refCount++; + strSegPtr = ckalloc(sizeof(StringSegment)); + strSegPtr->refCount = 1; + strSegPtr->parentPtr = parSegPtr; + strSegPtr->bytes.offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + strSegPtr->length = objPtr->length; + strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; + + objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + objPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* no offset anymore */ + } + } +#endif + return strSegPtr; } if (!objPtr->typePtr) { - StringSegment *strSegPtr; + + if (flags & TCLSEG_EXISTS) { + return NULL; + } wrapObj: /* No type, wrap it to tclCodeSegmentType*/ @@ -1071,6 +1145,7 @@ TclGetStringSegmentFromObj( strSegPtr->bytes.ptr = (char *)objPtr->bytes; strSegPtr->length = objPtr->length; strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; objPtr->typePtr = &tclCodeSegmentType; objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; @@ -1087,9 +1162,12 @@ TclGetStringSegmentFromObj( } #if 1 - TclGetString(objPtr); - TclFreeIntRep(objPtr); - goto wrapObj; + if (!(flags & TCLSEG_EXISTS)) { + TclGetString(objPtr); + TclFreeIntRep(objPtr); + goto wrapObj; + } + return NULL; #else /* *****todo**** rewrite this - wrap obj to a string segment or return new (rather impossible because of bytes sharing & offsets) */ Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s: %.80s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE", TclGetString(objPtr)); @@ -1164,7 +1242,7 @@ TclSetByteCodeFromAny( TclInitCompileEnv(interp, &compEnv, stringPtr, length, iPtr->invokeCmdFramePtr, iPtr->invokeWord); - compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); compEnv.strSegPtr->refCount++; @@ -1209,7 +1287,7 @@ TclSetByteCodeFromAny( iPtr->compiledProcPtr = procPtr; TclInitCompileEnv(interp, &compEnv, stringPtr, length, iPtr->invokeCmdFramePtr, iPtr->invokeWord); - compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); compEnv.strSegPtr->refCount++; if (clLocPtr) { compEnv.clNext = &clLocPtr->loc[0]; @@ -1777,7 +1855,7 @@ CompileSubstObj( /* TODO: Check for more TIP 280 */ TclInitCompileEnv(interp, &compEnv, bytes, numBytes, NULL, 0); - compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); compEnv.strSegPtr->refCount++; TclSubstCompile(interp, bytes, numBytes, flags, 1, &compEnv); @@ -2959,7 +3037,13 @@ TclCompileTokens( */ if (Tcl_DStringLength(&textBuffer) > 0) { - int literal = TclRegisterDStringLiteral(envPtr, &textBuffer); + int literal; + if (1) { + literal = TclRegisterCodeSegmentLiteral(envPtr, + Tcl_DStringValue(&textBuffer), Tcl_DStringLength(&textBuffer)); + } else { + literal = TclRegisterDStringLiteral(envPtr, &textBuffer); + } TclEmitPush(literal, envPtr); numObjsToConcat++; @@ -3360,6 +3444,7 @@ TclInitByteCodeObj( strSegPtr->bytes.offset = offset; strSegPtr->length = objPtr->length; strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; codePtr->strSegPtr = strSegPtr; objPtr->bytes = NULL; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index bc4e87c90f59..e4953c873f50 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -201,30 +201,6 @@ typedef struct ExtCmdLoc { int nuloc; /* Number of used entries in 'loc'. */ } ExtCmdLoc; -typedef struct StringSegment StringSegment; - -typedef struct StringSegment { - TCL_HASH_TYPE hash; /* Hash of this string segment. */ - size_t refCount; /* Count all references of this segment. */ - StringSegment *parentPtr; /* Parent segment (sharing string memory). */ - union { - char *ptr; /* Pointer to string (parentPtr == NULL) or */ - size_t offset; /* offset to string part (parentPtr != NULL). */ - } bytes; - int length; /* Size of string in bytes. */ - unsigned int line; /* Line in source this object can be found. */ -} StringSegment; - -#define TclGetStringSegmentBytes(strSegPtr) \ - (!(strSegPtr)->parentPtr ? (strSegPtr)->bytes.ptr : \ - (strSegPtr)->parentPtr->bytes.ptr + (strSegPtr)->bytes.offset) - -MODULE_SCOPE Tcl_Obj * TclNewCodeSegmentObj(StringSegment *strSegPtr, - const char *bytes, unsigned long length); - -MODULE_SCOPE StringSegment *TclGetStringSegmentFromObj(Tcl_Obj *objPtr); -MODULE_SCOPE void TclFreeStringSegment(StringSegment *strSegPtr); - /* * CompileProcs need the ability to record information during compilation that * can be used by bytecode instructions during execution. The AuxData diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 57e9ec19791d..adce065533e8 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -1544,7 +1544,7 @@ CompileExprObj( const char *string = Tcl_GetUtfFromObj(objPtr, &length); TclInitCompileEnv(interp, &compEnv, string, length, NULL, 0); - compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr); + compEnv.strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); compEnv.strSegPtr->refCount++; TclCompileExpr(interp, string, length, &compEnv, 0); diff --git a/generic/tclInt.h b/generic/tclInt.h index 91bdbfe87e54..4ecc613123b4 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -877,6 +877,8 @@ typedef struct VarInHash { *---------------------------------------------------------------- */ +typedef struct StringSegment StringSegment; + /* * Forward declaration to prevent an error when the forward reference to * Command is encountered in the Proc and ImportRef types declared below. @@ -2361,6 +2363,7 @@ typedef struct List { * derived from the list representation. May * be ignored if there is no string rep at * all.*/ + StringSegment *strSegPtr; /* String segment of the list (avoid shimmering). */ Tcl_Obj *elements; /* First list element; the struct is grown to * accommodate all elements. */ } List; @@ -4034,6 +4037,39 @@ MODULE_SCOPE unsigned TclHashObjKey(Tcl_HashTable *tablePtr, void *keyPtr); MODULE_SCOPE int TclFullFinalizationRequested(void); +/* + * tclObj.c / tclCompile.c shared implementation of code segment object. + */ + +typedef struct StringSegment { + TCL_HASH_TYPE hash; /* Hash of this string segment. */ + size_t refCount; /* Count all references of this segment. */ + StringSegment *parentPtr; /* Parent segment (sharing string memory). */ + union { + char *ptr; /* Pointer to string (parentPtr == NULL) or */ + size_t offset; /* offset to string part (parentPtr != NULL). */ + } bytes; + int length; /* Size of string in bytes. */ + unsigned int line; /* Line in source this object can be found. */ + ContLineLoc *clLocPtr; /* Locations of invisible continuation lines. */ +} StringSegment; + +#define TclGetStringSegmentBytes(strSegPtr) \ + (!(strSegPtr)->parentPtr ? (strSegPtr)->bytes.ptr : \ + (strSegPtr)->parentPtr->bytes.ptr + (strSegPtr)->bytes.offset) + +#define TCLSEG_EXISTS 0x01 /* Only if segment exists (avoid creation & shimmering) */ +#define TCLSEG_FULL_SEGREP 0x02 /* Force new segment representation if not fully + * included (offset > 0 or length < parent.length). */ +#define TCLSEG_DUP_STRREP 0x08 /* Create new string representation if not included in parent */ + +MODULE_SCOPE Tcl_Obj * TclNewCodeSegmentObj(StringSegment *strSegPtr, + const char *bytes, unsigned long length, int flags); + +MODULE_SCOPE StringSegment *TclGetStringSegmentFromObj(Tcl_Obj *objPtr, + int flags); +MODULE_SCOPE void TclFreeStringSegment(StringSegment *strSegPtr); + /* * Utility routines for encoding index values as integers. Used by both * some of the command compilers and by [lsort] and [lsearch]. diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 10a5412d2d9e..74178c193b0d 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -114,6 +114,7 @@ NewListIntRep( listRepPtr->canonicalFlag = 0; listRepPtr->refCount = 0; listRepPtr->maxElemCount = objc; + listRepPtr->strSegPtr = NULL; if (objv) { Tcl_Obj **elemPtrs; @@ -1740,6 +1741,9 @@ FreeListInternalRep( for (i = 0; i < numElems; i++) { Tcl_DecrRefCount(elemPtrs[i]); } + if (listRepPtr->strSegPtr) { + TclFreeStringSegment(listRepPtr->strSegPtr); + } ckfree(listRepPtr); } @@ -1846,6 +1850,7 @@ SetListFromAny( } } else { int estCount, length; + StringSegment *strSegPtr; const char *limit, *nextElem = Tcl_GetUtfFromObj(objPtr, &length); /* @@ -1862,6 +1867,13 @@ SetListFromAny( } elemPtrs = &listRepPtr->elements; + /* try to obtain original string segment if we can retain sharing this */ + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_EXISTS); + if (strSegPtr) { + strSegPtr->refCount++; + listRepPtr->strSegPtr = strSegPtr; + } + /* * Each iteration, parse and store a list element. */ diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index b8d297c3f4dd..9d05f5f4c71c 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -239,7 +239,8 @@ TclCreateLiteralEx( */ if (envPtr && (flags & LITERAL_CODE_SEGMENT)) { - objPtr = TclNewCodeSegmentObj(envPtr->strSegPtr, bytes, length); + objPtr = TclNewCodeSegmentObj(envPtr->strSegPtr, bytes, length, + TCLSEG_DUP_STRREP); } else { TclNewObj(objPtr); if ((flags & LITERAL_ON_HEAP)) { diff --git a/generic/tclObj.c b/generic/tclObj.c index caabf3fc3cb3..cb8e8bebcb4c 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -99,9 +99,6 @@ typedef struct ThreadSpecificData { static Tcl_ThreadDataKey dataKey; -static void TclThreadFinalizeContLines(ClientData clientData); -static ThreadSpecificData *TclGetContLineTable(void); - /* * Nested Tcl_Obj deletion management support * @@ -510,46 +507,7 @@ TclFinalizeObjects(void) Tcl_MutexUnlock(&tclObjMutex); } -/* - *---------------------------------------------------------------------- - * - * TclGetContLineTable -- - * - * This procedure is a helper which returns the thread-specific - * hash-table used to track continuation line information associated with - * Tcl_Obj*, and the objThreadMap, etc. - * - * Results: - * A reference to the thread-data. - * - * Side effects: - * May allocate memory for the thread-data. - * - * TIP #280 - *---------------------------------------------------------------------- - */ - -static ThreadSpecificData * -TclGetContLineTable(void) -{ - /* - * Initialize the hashtable tracking invisible continuation lines. For - * the release we use a thread exit handler to ensure that this is done - * before TSD blocks are made invalid. The TclFinalizeObjects() which - * would be the natural place for this is invoked afterwards, meaning that - * we try to operate on a data structure already gone. - */ - ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - - if (!tsdPtr->lineCLPtr) { - tsdPtr->lineCLPtr = ckalloc(sizeof(Tcl_HashTable)); - Tcl_InitHashTable(tsdPtr->lineCLPtr, TCL_ONE_WORD_KEYS); - Tcl_CreateThreadExitHandler(TclThreadFinalizeContLines,NULL); - } - return tsdPtr; -} - /* *---------------------------------------------------------------------- * @@ -574,14 +532,11 @@ TclContinuationsEnter( int num, int *loc) { - int newEntry; - ThreadSpecificData *tsdPtr = TclGetContLineTable(); - Tcl_HashEntry *hPtr = - Tcl_CreateHashEntry(tsdPtr->lineCLPtr, objPtr, &newEntry); - ContLineLoc *clLocPtr = ckalloc(sizeof(ContLineLoc) + num*sizeof(int)); - //!!!!!! printf("++++ cont-enter: %d, %p, %.80s\n", newEntry, objPtr, Tcl_GetString(objPtr)); - - if (!newEntry) { + ContLineLoc *clLocPtr; + StringSegment *strSegPtr; + + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); + if (strSegPtr->clLocPtr != NULL) { /* * We're entering ContLineLoc data for the same value more than one * time. Taking care not to leak the old entry. @@ -593,23 +548,23 @@ TclContinuationsEnter( * locations (offset) of invisible continuation lines in the literal * are the same for all occurences. * - * Note that while reusing the existing entry is possible it requires - * the same actions as for a new entry because we have to copy the - * incoming num/loc data even so. Because we are called from - * TclContinuationsEnterDerived for this case, which modified the - * stored locations (Rebased to the proper relative offset). Just - * returning the stored entry would rebase them a second time, or - * more, hosing the data. It is easier to simply replace, as we are - * doing. + * We will try to reuse the old entry memory here (and simply replace + * a content). */ - ckfree(Tcl_GetHashValue(hPtr)); + clLocPtr = strSegPtr->clLocPtr; + if (clLocPtr->num != num) { + clLocPtr = ckrealloc(clLocPtr, + sizeof(ContLineLoc) + num*sizeof(int)); + } + } else { + clLocPtr = ckalloc(sizeof(ContLineLoc) + num*sizeof(int)); } clLocPtr->num = num; memcpy(&clLocPtr->loc, loc, num*sizeof(int)); clLocPtr->loc[num] = CLL_END; /* Sentinel */ - Tcl_SetHashValue(hPtr, clLocPtr); + strSegPtr->clLocPtr = clLocPtr; return clLocPtr; } @@ -731,14 +686,22 @@ TclContinuationsCopy( Tcl_Obj *objPtr, Tcl_Obj *originObjPtr) { - ThreadSpecificData *tsdPtr = TclGetContLineTable(); - Tcl_HashEntry *hPtr = - Tcl_FindHashEntry(tsdPtr->lineCLPtr, originObjPtr); + StringSegment *origSegPtr, *strSegPtr; - if (hPtr) { - ContLineLoc *clLocPtr = Tcl_GetHashValue(hPtr); + /* only if segment can be obtained (also avoid shimmering problems) */ + origSegPtr = TclGetStringSegmentFromObj(originObjPtr, TCLSEG_EXISTS); + if (!origSegPtr) { + return; + } + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); + + /* if both objects don't share same segment */ + if (origSegPtr != strSegPtr) { + ContLineLoc *clLocPtr = origSegPtr->clLocPtr; - TclContinuationsEnter(objPtr, clLocPtr->num, clLocPtr->loc); + if (clLocPtr) { + TclContinuationsEnter(objPtr, clLocPtr->num, clLocPtr->loc); + } } } @@ -765,54 +728,11 @@ ContLineLoc * TclContinuationsGet( Tcl_Obj *objPtr) { - ThreadSpecificData *tsdPtr = TclGetContLineTable(); - Tcl_HashEntry *hPtr = - Tcl_FindHashEntry(tsdPtr->lineCLPtr, objPtr); + StringSegment *strSegPtr; - if (!hPtr) { - return NULL; - } - return Tcl_GetHashValue(hPtr); -} - -/* - *---------------------------------------------------------------------- - * - * TclThreadFinalizeContLines -- - * - * This procedure is a helper which releases all continuation line - * information currently known. It is run as a thread exit handler. - * - * Results: - * None. - * - * Side effects: - * Releases memory. - * - * TIP #280 - *---------------------------------------------------------------------- - */ - -static void -TclThreadFinalizeContLines( - ClientData clientData) -{ - /* - * Release the hashtable tracking invisible continuation lines. - */ - - ThreadSpecificData *tsdPtr = TclGetContLineTable(); - Tcl_HashEntry *hPtr; - Tcl_HashSearch hSearch; - - for (hPtr = Tcl_FirstHashEntry(tsdPtr->lineCLPtr, &hSearch); - hPtr != NULL; hPtr = Tcl_NextHashEntry(&hSearch)) { - ckfree(Tcl_GetHashValue(hPtr)); - Tcl_DeleteHashEntry(hPtr); - } - Tcl_DeleteHashTable(tsdPtr->lineCLPtr); - ckfree(tsdPtr->lineCLPtr); - tsdPtr->lineCLPtr = NULL; + /* only if segment can be obtained (also avoid shimmering problems) */ + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_EXISTS); + return strSegPtr ? strSegPtr->clLocPtr : NULL; } /* @@ -1744,8 +1664,9 @@ Tcl_ObjHasBytes( * Results: * Returns a pointer to the unalterable string representation of objPtr, * which in opposite to Tcl_GetStringFromObj (Tcl_GetString) is not - * guaranteed null-terminated string (NTS) string for example could be + * guaranteed null-terminated string (NTS) for example could be * a part of some buffer (or code). + * Also it may contain original not wrapped line continuations (\\\n). * * Side effects: * May call the object's updateStringProc to update the string diff --git a/generic/tclParse.c b/generic/tclParse.c index 1532c056e40d..b3fda54fcf68 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -2177,6 +2177,8 @@ TclSubstTokens( Interp *iPtr = (Interp *) interp; int inFile = iPtr->evalFlags & TCL_EVAL_FILE; + StringSegment *parSegPtr = NULL; /* reserved for future dev */ + /* * Each pass through this loop will substitute one token, and its * components, if any. The only thing tricky here is that we go to some @@ -2373,7 +2375,13 @@ TclSubstTokens( if (appendObj != NULL) { result = appendObj; } else { + #if 1 + result = TclNewCodeSegmentObj(parSegPtr, + append, appendByteLength, + TCLSEG_DUP_STRREP | (numCL?TCLSEG_FULL_SEGREP:0)); + #else result = Tcl_NewStringObj(append, appendByteLength); + #endif } Tcl_IncrRefCount(result); } else { diff --git a/generic/tclResult.c b/generic/tclResult.c index 9d0714c0e449..5664b4e04770 100644 --- a/generic/tclResult.c +++ b/generic/tclResult.c @@ -982,6 +982,12 @@ ResetObjResult( Tcl_IncrRefCount(objResultPtr); iPtr->objResultPtr = objResultPtr; } else { + /* + * Free internal representation firstly, be sure it does not share + * bytes reference + */ + TclFreeIntRep(objResultPtr); + /* now string representation */ if (objResultPtr->bytes != tclEmptyStringRep) { if (objResultPtr->bytes) { ckfree(objResultPtr->bytes); @@ -989,7 +995,6 @@ ResetObjResult( objResultPtr->bytes = tclEmptyStringRep; objResultPtr->length = 0; } - TclFreeIntRep(objResultPtr); } } From f62058bebe0f6821a81badf636499fb58090e82e Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 18 Sep 2019 17:28:12 +0200 Subject: [PATCH 10/16] fix segment handling in lists and extended TclGetStringSegmentFromObj with support of list-type (allows to find string segment of list) --- generic/tclCompile.c | 21 +++++++++++++++------ generic/tclListObj.c | 24 +++++++++++++++++++++++- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index af30f746629d..66e7443cd814 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1160,14 +1160,23 @@ TclGetStringSegmentFromObj( return codePtr->strSegPtr; } } + if (typePtr == &tclListType) { + /* List */ + List *listRepPtr = ListRepPtr(objPtr); + if (listRepPtr->strSegPtr) { + return listRepPtr->strSegPtr; + } + } #if 1 - if (!(flags & TCLSEG_EXISTS)) { - TclGetString(objPtr); - TclFreeIntRep(objPtr); - goto wrapObj; - } - return NULL; + /* if requested only if exists */ + if (flags & TCLSEG_EXISTS) { + return NULL; /* no segment */ + } + /* wrap to object containing segment */ + TclGetString(objPtr); + TclFreeIntRep(objPtr); + goto wrapObj; #else /* *****todo**** rewrite this - wrap obj to a string segment or return new (rather impossible because of bytes sharing & offsets) */ Tcl_Panic("unexpected, TclGetStringSegmentFromObj not yet implemented for %s: %.80s!!!", objPtr->typePtr ? objPtr->typePtr->name : "NONE", TclGetString(objPtr)); diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 74178c193b0d..5d78121de748 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -676,6 +676,10 @@ Tcl_ListObjAppendElement( */ TclInvalidateStringRep(listPtr); + if (listRepPtr->strSegPtr) { + TclFreeStringSegment(listRepPtr->strSegPtr); + listRepPtr->strSegPtr = NULL; + } return TCL_OK; } @@ -1074,6 +1078,10 @@ Tcl_ListObjReplace( */ TclInvalidateStringRep(listPtr); + if (listRepPtr->strSegPtr) { + TclFreeStringSegment(listRepPtr->strSegPtr); + listRepPtr->strSegPtr = NULL; + } return TCL_OK; } @@ -1516,12 +1524,20 @@ TclLsetFlat( Tcl_Obj *objPtr = chainPtr; if (result == TCL_OK) { + List *listRepPtr = ListRepPtr(objPtr); + /* * We're going to store valuePtr, so spoil string reps of all * containing lists. */ TclInvalidateStringRep(objPtr); + + /* invalidate string segment referenced in the list */ + if (listRepPtr->strSegPtr) { + TclFreeStringSegment(listRepPtr->strSegPtr); + listRepPtr->strSegPtr = NULL; + } } /* @@ -1557,7 +1573,7 @@ TclLsetFlat( } else { TclListObjSetElement(NULL, subListPtr, index, valuePtr); } - TclInvalidateStringRep(subListPtr); + Tcl_IncrRefCount(retValuePtr); return retValuePtr; } @@ -1708,6 +1724,12 @@ TclListObjSetElement( elemPtrs[index] = valuePtr; + /* invalidate string segment referenced in the list */ + if (listRepPtr->strSegPtr) { + TclFreeStringSegment(listRepPtr->strSegPtr); + listRepPtr->strSegPtr = NULL; + } + return TCL_OK; } From f80a3ec97bd026dd924cd78ccb0e54aee3670243 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 18 Sep 2019 17:43:48 +0200 Subject: [PATCH 11/16] info.test: fixed mistakenly recognized (artificial) line in info-30.18, this change illustrating "broken" behavior of previous implementation of TclContinuations* routines: clLocPtr binding on the object address does not affected by in-place modification of the object (basically other object then), so if unshared string gets amended or previously not canonical unshared list gets new elements (or it shrinks), then its string representation changes, but it still retains its old clLocPtr (bound in global hash table on the address of object only), this is worse and can produce very unexpected results by delivering of invisible continuation lines (inclusive SF or panic "Derived ICL data for object using offsets from before the script"). --- tests/info.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/info.test b/tests/info.test index 5fe2240efadd..a491f9bbffec 100644 --- a/tests/info.test +++ b/tests/info.test @@ -1610,11 +1610,11 @@ test info-30.18 {bs+nl, literal word, uplevel through proc, appended, loss of pr } set res [abra { return "\ -[reduce [info frame 0]]";# line 1613, still line of 3 appended script +[reduce [info frame 0]]";# line 1613, still line 2 of appended script (script changing before evaluation, because of append in abra) }] rename abra {} set res -} { type eval line 3 cmd {info frame 0} proc ::abra} +} { type eval line 2 cmd {info frame 0} proc ::abra} # { type source line 1606 file info.test cmd {info frame 0} proc ::abra} test info-30.19 {bs+nl in single-body switch, compiled} { From 5faa3056cb43efb7b6cc541f085ceee74eab2ab3 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 18 Sep 2019 20:58:39 +0200 Subject: [PATCH 12/16] **temp commit** several fixes, review needed (**SF/BO** in string.test, **ICL panic** in encoding.test) --- generic/tclCompile.c | 100 +++++++++++++++++++++++++++---------------- generic/tclListObj.c | 9 +++- generic/tclObj.c | 68 ++++++++++++++++++----------- 3 files changed, 113 insertions(+), 64 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 66e7443cd814..284051ed1e81 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -754,6 +754,32 @@ void TclFreeStringSegment(StringSegment *strSegPtr) { */ const Tcl_ObjType tclCodeSegmentType; +static StringSegment * +DupStringSegment( + StringSegment *parSegPtr, + size_t offset, + int length) +{ + StringSegment *strSegPtr; + + /* hold it simple referenced (only one parent deeply) */ + if (parSegPtr->parentPtr) { + offset += parSegPtr->bytes.offset; /* consider offset of parent */ + parSegPtr = parSegPtr->parentPtr; /* use root object */ + } + + strSegPtr = ckalloc(sizeof(StringSegment)); + strSegPtr->refCount = 0; + strSegPtr->parentPtr = parSegPtr; + parSegPtr->refCount++; + strSegPtr->bytes.offset = offset; + strSegPtr->length = length; + strSegPtr->line = 0; + strSegPtr->clLocPtr = NULL; + + return strSegPtr; +} + static void DupCodeSegmentInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ @@ -761,11 +787,22 @@ DupCodeSegmentInternalRep( { StringSegment *strSegPtr = srcPtr->internalRep.twoPtrValue.ptr1; copyPtr->typePtr = &tclCodeSegmentType; +#if 1 + /* be sure we have clean (full included) segment */ + strSegPtr = DupStringSegment(strSegPtr, + (size_t)srcPtr->internalRep.twoPtrValue.ptr2 /* offset */, + srcPtr->length); + copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; + copyPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* no offset */ + copyPtr->length = srcPtr->length; + strSegPtr->refCount++; +#else copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; copyPtr->internalRep.twoPtrValue.ptr2 = srcPtr->internalRep.twoPtrValue.ptr2; copyPtr->length = srcPtr->length; strSegPtr->refCount++; +#endif } static inline void @@ -957,24 +994,19 @@ TclNewCodeSegmentObj( if (offset) { assert(offset < (size_t)strSegPtr->length); } - /* object would share parent segment (directly or indirectly) */ - strSegPtr->refCount++; /* if fully included requested, check it (duplicate segment on demand) */ if ( (flags & TCLSEG_FULL_SEGREP) && (offset || length != strSegPtr->length) ) { /* not fully included - duplicate reference to parent */ - StringSegment *parSegPtr = strSegPtr; - - strSegPtr = ckalloc(sizeof(StringSegment)); - strSegPtr->refCount = 1; - strSegPtr->parentPtr = parSegPtr; /* refCount already incremented */ - strSegPtr->bytes.offset = offset; - strSegPtr->length = length; - strSegPtr->line = 0; - strSegPtr->clLocPtr = NULL; + StringSegment *orgSegPtr = strSegPtr; + + strSegPtr = DupStringSegment(strSegPtr, offset, length); + TclFreeStringSegment(orgSegPtr); /* we'll replace it in object (decr/free) */ offset = 0; } + /* object would share this segment */ + strSegPtr->refCount++; } else { if (flags & TCLSEG_DUP_STRREP) { char *newBytes; @@ -1078,6 +1110,7 @@ TclCopyByteCodeObject( StringSegment *strSegPtr = TclGetStringSegmentFromObj(objPtr, 0); bytes = Tcl_GetUtfFromObj(objPtr, &length); + /* it should be safe to create code segment without TCLSEG_DUP_STRREP */ cpyPtr = TclNewCodeSegmentObj(strSegPtr, bytes, length, 0); /* @@ -1110,17 +1143,13 @@ TclGetStringSegmentFromObj( || objPtr->length != strSegPtr->length ) ) { - StringSegment *parSegPtr = strSegPtr; - - parSegPtr->refCount++; - strSegPtr = ckalloc(sizeof(StringSegment)); - strSegPtr->refCount = 1; - strSegPtr->parentPtr = parSegPtr; - strSegPtr->bytes.offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; - strSegPtr->length = objPtr->length; - strSegPtr->line = 0; - strSegPtr->clLocPtr = NULL; + /* not fully included - duplicate reference to parent */ + StringSegment *orgSegPtr = strSegPtr; + size_t offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + strSegPtr = DupStringSegment(strSegPtr, offset, objPtr->length); + strSegPtr->refCount++; + TclFreeStringSegment(orgSegPtr); /* we'll replace it in object (decr/free) */ objPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; objPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* no offset anymore */ } @@ -1412,14 +1441,18 @@ DupByteCodeInternalRep( if ((strSegPtr = codePtr->strSegPtr)) { const char *bytes = TclGetStringSegmentBytes(strSegPtr); - size_t offset = codePtr->source - bytes; - + size_t offset = bytes - codePtr->source; /* normally always 0 */ + assert(bytes >= codePtr->source); +#if 1 + /* be sure we have clean (full included) segment */ + strSegPtr = DupStringSegment(strSegPtr, offset, codePtr->numSrcBytes); + offset = 0; +#endif strSegPtr->refCount++; copyPtr->typePtr = &tclCodeSegmentType; copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; copyPtr->internalRep.twoPtrValue.ptr2 = (void *)offset; copyPtr->length = codePtr->numSrcBytes; - return; } } @@ -3440,23 +3473,14 @@ TclInitByteCodeObj( offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; /* if offset is not 0 - create new part of segment */ if (offset) { - StringSegment *strSegPtr = ckalloc(sizeof(StringSegment)); - - strSegPtr->refCount = 1; - strSegPtr->parentPtr = codePtr->strSegPtr; - /* hold it simple referenced (only one parent deeply) */ - if (strSegPtr->parentPtr->parentPtr) { - offset += strSegPtr->parentPtr->bytes.offset; - strSegPtr->parentPtr = strSegPtr->parentPtr->parentPtr; - } - strSegPtr->parentPtr->refCount++; - strSegPtr->bytes.offset = offset; - strSegPtr->length = objPtr->length; - strSegPtr->line = 0; - strSegPtr->clLocPtr = NULL; + StringSegment *strSegPtr = codePtr->strSegPtr; + strSegPtr = DupStringSegment(strSegPtr, offset, objPtr->length); + strSegPtr->refCount++; + TclFreeStringSegment(codePtr->strSegPtr); /* we'll replace it in code (decr/free) */ codePtr->strSegPtr = strSegPtr; objPtr->bytes = NULL; + objPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* no offset */ } } objPtr->internalRep.twoPtrValue.ptr1 = codePtr; diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 5d78121de748..e11f57d6e4bb 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -12,6 +12,7 @@ */ #include "tclInt.h" +#include /* * Prototypes for functions defined later in this file: @@ -1524,7 +1525,10 @@ TclLsetFlat( Tcl_Obj *objPtr = chainPtr; if (result == TCL_OK) { - List *listRepPtr = ListRepPtr(objPtr); + List *listRepPtr; + + assert(objPtr->typePtr == &tclListType); + listRepPtr = ListRepPtr(objPtr); /* * We're going to store valuePtr, so spoil string reps of all @@ -1724,6 +1728,9 @@ TclListObjSetElement( elemPtrs[index] = valuePtr; + /* Invalidate object string representation */ + TclInvalidateStringRep(listPtr); + /* invalidate string segment referenced in the list */ if (listRepPtr->strSegPtr) { TclFreeStringSegment(listRepPtr->strSegPtr); diff --git a/generic/tclObj.c b/generic/tclObj.c index cb8e8bebcb4c..345397307c3e 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -1501,24 +1501,26 @@ TclObjBeingDeleted( *---------------------------------------------------------------------- */ -#define SetDuplicateObj(dupPtr, objPtr) \ - { \ - const Tcl_ObjType *typePtr = (objPtr)->typePtr; \ - const char *bytes = (objPtr)->bytes; \ - if (bytes) { \ - TclInitStringRep((dupPtr), bytes, (objPtr)->length); \ - } else { \ - (dupPtr)->bytes = NULL; \ - } \ - if (typePtr) { \ - if (typePtr->dupIntRepProc) { \ - typePtr->dupIntRepProc((objPtr), (dupPtr)); \ - } else { \ - (dupPtr)->internalRep = (objPtr)->internalRep; \ - (dupPtr)->typePtr = typePtr; \ - } \ - } \ +static inline void +SetDuplicateObj(Tcl_Obj *dupPtr, Tcl_Obj *objPtr) +{ + const Tcl_ObjType *typePtr = (objPtr)->typePtr; + const char *bytes = (objPtr)->bytes; + if (bytes && typePtr != &tclCodeSegmentType) { + TclInitStringRep((dupPtr), bytes, (objPtr)->length); + } else { + (dupPtr)->bytes = NULL; + (dupPtr)->length = (objPtr)->length; + } + if (typePtr) { + if (typePtr->dupIntRepProc) { + typePtr->dupIntRepProc((objPtr), (dupPtr)); + } else { + (dupPtr)->internalRep = (objPtr)->internalRep; + (dupPtr)->typePtr = typePtr; + } } +} Tcl_Obj * Tcl_DuplicateObj( @@ -1649,7 +1651,13 @@ Tcl_ObjHasBytes( return ( objPtr->bytes || (objPtr->typePtr == &tclCodeSegmentType) - || (objPtr->typePtr && objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) + || (objPtr->typePtr && ( + (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) +#if 0 /* still unsafe to use it from string segment by lists */ + || (objPtr->typePtr == &tclListType && ListRepPtr(objPtr)->strSegPtr) +#endif + ) + ) ); } @@ -1695,13 +1703,23 @@ Tcl_GetUtfFromObj( return bytes; } - if (objPtr->typePtr - && (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) - ) { - /* ByteCode */ - ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; - *lengthPtr = codePtr->numSrcBytes; - return codePtr->source; + if (objPtr->typePtr) { + if (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) { + /* ByteCode */ + ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; + *lengthPtr = codePtr->numSrcBytes; + return codePtr->source; + } +#if 0 /* still unsafe to use it from string segment by lists */ + if (objPtr->typePtr == &tclListType) { + List *listPtr = ListRepPtr(objPtr); + if (listPtr->strSegPtr) { + bytes = (const char *)TclGetStringSegmentBytes(listPtr->strSegPtr); + assert(!objPtr->bytes || memcmp(objPtr->bytes, bytes, 0) == 0); + return bytes; + } + } +#endif } /* From 9ba9f329e62e8dcb0bb5866415747d84d06cc349 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 18 Sep 2019 22:57:17 +0200 Subject: [PATCH 13/16] list internals obtain only ICL now (no sharing of string segment possible with this cut implementation) --- generic/tclCompile.c | 7 --- generic/tclInt.h | 3 +- generic/tclListObj.c | 45 +++++++--------- generic/tclObj.c | 122 +++++++++++++++++++++++-------------------- 4 files changed, 88 insertions(+), 89 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 284051ed1e81..94ea1e188f87 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1189,13 +1189,6 @@ TclGetStringSegmentFromObj( return codePtr->strSegPtr; } } - if (typePtr == &tclListType) { - /* List */ - List *listRepPtr = ListRepPtr(objPtr); - if (listRepPtr->strSegPtr) { - return listRepPtr->strSegPtr; - } - } #if 1 /* if requested only if exists */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 4ecc613123b4..599bad7bb43f 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2363,7 +2363,7 @@ typedef struct List { * derived from the list representation. May * be ignored if there is no string rep at * all.*/ - StringSegment *strSegPtr; /* String segment of the list (avoid shimmering). */ + ContLineLoc *clLocPtr; /* Locations of invisible continuation lines. */ Tcl_Obj *elements; /* First list element; the struct is grown to * accommodate all elements. */ } List; @@ -2883,6 +2883,7 @@ MODULE_SCOPE int TclChanCaughtErrorBypass(Tcl_Interp *interp, Tcl_Channel chan); MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd; MODULE_SCOPE Tcl_NRPostProc TclClearRootEnsemble; +MODULE_SCOPE ContLineLoc *TclContinuationsDupICL(ContLineLoc *clLocPtr); MODULE_SCOPE ContLineLoc *TclContinuationsEnter(Tcl_Obj *objPtr, int num, int *loc); MODULE_SCOPE void TclContinuationsEnterDerived(Tcl_Obj *objPtr, diff --git a/generic/tclListObj.c b/generic/tclListObj.c index e11f57d6e4bb..d1928569cf1f 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -115,7 +115,7 @@ NewListIntRep( listRepPtr->canonicalFlag = 0; listRepPtr->refCount = 0; listRepPtr->maxElemCount = objc; - listRepPtr->strSegPtr = NULL; + listRepPtr->clLocPtr = NULL; if (objv) { Tcl_Obj **elemPtrs; @@ -677,9 +677,9 @@ Tcl_ListObjAppendElement( */ TclInvalidateStringRep(listPtr); - if (listRepPtr->strSegPtr) { - TclFreeStringSegment(listRepPtr->strSegPtr); - listRepPtr->strSegPtr = NULL; + if (listRepPtr->clLocPtr) { + ckfree(listRepPtr->clLocPtr); + listRepPtr->clLocPtr = NULL; } return TCL_OK; } @@ -1079,9 +1079,9 @@ Tcl_ListObjReplace( */ TclInvalidateStringRep(listPtr); - if (listRepPtr->strSegPtr) { - TclFreeStringSegment(listRepPtr->strSegPtr); - listRepPtr->strSegPtr = NULL; + if (listRepPtr->clLocPtr) { + ckfree(listRepPtr->clLocPtr); + listRepPtr->clLocPtr = NULL; } return TCL_OK; } @@ -1536,11 +1536,9 @@ TclLsetFlat( */ TclInvalidateStringRep(objPtr); - - /* invalidate string segment referenced in the list */ - if (listRepPtr->strSegPtr) { - TclFreeStringSegment(listRepPtr->strSegPtr); - listRepPtr->strSegPtr = NULL; + if (listRepPtr->clLocPtr) { + ckfree(listRepPtr->clLocPtr); + listRepPtr->clLocPtr = NULL; } } @@ -1730,11 +1728,9 @@ TclListObjSetElement( /* Invalidate object string representation */ TclInvalidateStringRep(listPtr); - - /* invalidate string segment referenced in the list */ - if (listRepPtr->strSegPtr) { - TclFreeStringSegment(listRepPtr->strSegPtr); - listRepPtr->strSegPtr = NULL; + if (listRepPtr->clLocPtr) { + ckfree(listRepPtr->clLocPtr); + listRepPtr->clLocPtr = NULL; } return TCL_OK; @@ -1770,8 +1766,8 @@ FreeListInternalRep( for (i = 0; i < numElems; i++) { Tcl_DecrRefCount(elemPtrs[i]); } - if (listRepPtr->strSegPtr) { - TclFreeStringSegment(listRepPtr->strSegPtr); + if (listRepPtr->clLocPtr) { + ckfree(listRepPtr->clLocPtr); } ckfree(listRepPtr); } @@ -1879,7 +1875,7 @@ SetListFromAny( } } else { int estCount, length; - StringSegment *strSegPtr; + ContLineLoc *clLocPtr; const char *limit, *nextElem = Tcl_GetUtfFromObj(objPtr, &length); /* @@ -1896,11 +1892,10 @@ SetListFromAny( } elemPtrs = &listRepPtr->elements; - /* try to obtain original string segment if we can retain sharing this */ - strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_EXISTS); - if (strSegPtr) { - strSegPtr->refCount++; - listRepPtr->strSegPtr = strSegPtr; + /* try to obtain original ICL if object contains that */ + clLocPtr = TclContinuationsGet(objPtr); + if (clLocPtr) { + listRepPtr->clLocPtr = TclContinuationsDupICL(clLocPtr); } /* diff --git a/generic/tclObj.c b/generic/tclObj.c index 345397307c3e..4663432341c9 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -526,37 +526,29 @@ TclFinalizeObjects(void) *---------------------------------------------------------------------- */ -ContLineLoc * -TclContinuationsEnter( - Tcl_Obj *objPtr, +static inline ContLineLoc* +FillICL( + ContLineLoc *clLocPtr, int num, int *loc) { - ContLineLoc *clLocPtr; - StringSegment *strSegPtr; - - strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); - if (strSegPtr->clLocPtr != NULL) { - /* - * We're entering ContLineLoc data for the same value more than one - * time. Taking care not to leak the old entry. - * - * This can happen when literals in a proc body are shared. See for - * example test info-30.19 where the action (code) for all branches of - * the switch command is identical, mapping them all to the same - * literal. An interesting result of this is that the number and - * locations (offset) of invisible continuation lines in the literal - * are the same for all occurences. - * - * We will try to reuse the old entry memory here (and simply replace - * a content). - */ - - clLocPtr = strSegPtr->clLocPtr; - if (clLocPtr->num != num) { - clLocPtr = ckrealloc(clLocPtr, + /* + * We can enter ContLineLoc data for the same value more than one + * time. Taking care not to leak the old entry. + * + * This can happen when literals in a proc body are shared. See for + * example test info-30.19 where the action (code) for all branches of + * the switch command is identical, mapping them all to the same + * literal. An interesting result of this is that the number and + * locations (offset) of invisible continuation lines in the literal + * are the same for all occurences. + * + * We will try to reuse the old entry memory here (and simply replace + * a content). + */ + if (clLocPtr && clLocPtr->num != num) { + clLocPtr = ckrealloc(clLocPtr, sizeof(ContLineLoc) + num*sizeof(int)); - } } else { clLocPtr = ckalloc(sizeof(ContLineLoc) + num*sizeof(int)); } @@ -564,10 +556,35 @@ TclContinuationsEnter( clLocPtr->num = num; memcpy(&clLocPtr->loc, loc, num*sizeof(int)); clLocPtr->loc[num] = CLL_END; /* Sentinel */ - strSegPtr->clLocPtr = clLocPtr; - return clLocPtr; } + +ContLineLoc * +TclContinuationsDupICL( + ContLineLoc *clLocPtr) +{ + return FillICL(NULL, clLocPtr->num, clLocPtr->loc); +} + +ContLineLoc * +TclContinuationsEnter( + Tcl_Obj *objPtr, + int num, + int *loc) +{ + StringSegment *strSegPtr; + + if (objPtr->typePtr == &tclListType) { + /* List */ + List *listRepPtr = ListRepPtr(objPtr); + listRepPtr->clLocPtr = FillICL(listRepPtr->clLocPtr, num, loc); + return listRepPtr->clLocPtr; + } + + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); + strSegPtr->clLocPtr = FillICL(strSegPtr->clLocPtr, num, loc); + return strSegPtr->clLocPtr; +} /* *---------------------------------------------------------------------- @@ -686,22 +703,22 @@ TclContinuationsCopy( Tcl_Obj *objPtr, Tcl_Obj *originObjPtr) { - StringSegment *origSegPtr, *strSegPtr; - - /* only if segment can be obtained (also avoid shimmering problems) */ - origSegPtr = TclGetStringSegmentFromObj(originObjPtr, TCLSEG_EXISTS); - if (!origSegPtr) { - return; + StringSegment *strSegPtr; + ContLineLoc *clLocPtr = TclContinuationsGet(originObjPtr); + + if (objPtr->typePtr == &tclListType) { + /* List */ + List *listRepPtr = ListRepPtr(objPtr); + if (listRepPtr->clLocPtr != clLocPtr) { + listRepPtr->clLocPtr = FillICL(listRepPtr->clLocPtr, + clLocPtr->num, clLocPtr->loc); + } } - strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); - /* if both objects don't share same segment */ - if (origSegPtr != strSegPtr) { - ContLineLoc *clLocPtr = origSegPtr->clLocPtr; - - if (clLocPtr) { - TclContinuationsEnter(objPtr, clLocPtr->num, clLocPtr->loc); - } + strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_FULL_SEGREP); + if (strSegPtr->clLocPtr != clLocPtr) { + strSegPtr->clLocPtr = FillICL(strSegPtr->clLocPtr, + clLocPtr->num, clLocPtr->loc); } } @@ -730,6 +747,12 @@ TclContinuationsGet( { StringSegment *strSegPtr; + if (objPtr->typePtr == &tclListType) { + /* List */ + List *listRepPtr = ListRepPtr(objPtr); + return listRepPtr->clLocPtr; + } + /* only if segment can be obtained (also avoid shimmering problems) */ strSegPtr = TclGetStringSegmentFromObj(objPtr, TCLSEG_EXISTS); return strSegPtr ? strSegPtr->clLocPtr : NULL; @@ -1653,9 +1676,6 @@ Tcl_ObjHasBytes( || (objPtr->typePtr == &tclCodeSegmentType) || (objPtr->typePtr && ( (objPtr->typePtr->updateStringProc == TclUpdateStringOfByteCode) -#if 0 /* still unsafe to use it from string segment by lists */ - || (objPtr->typePtr == &tclListType && ListRepPtr(objPtr)->strSegPtr) -#endif ) ) ); @@ -1710,16 +1730,6 @@ Tcl_GetUtfFromObj( *lengthPtr = codePtr->numSrcBytes; return codePtr->source; } -#if 0 /* still unsafe to use it from string segment by lists */ - if (objPtr->typePtr == &tclListType) { - List *listPtr = ListRepPtr(objPtr); - if (listPtr->strSegPtr) { - bytes = (const char *)TclGetStringSegmentBytes(listPtr->strSegPtr); - assert(!objPtr->bytes || memcmp(objPtr->bytes, bytes, 0) == 0); - return bytes; - } - } -#endif } /* From 0b8156a35490241e5521adcab644c51dfd38b822 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 19 Sep 2019 00:34:08 +0200 Subject: [PATCH 14/16] stability fix - be sure TclGetStringSegmentFromObj with TCLSEG_FULL_SEGREP really create full (own) segment representation, that avoid rewrite of ICL in shared segments (supplied as clNext to TclContinuationsEnterDerived), todo: rewrite this without usage of TclContinuationsEnterDerived at all (wouldn't need if segments completely back-ported); + protect against several cases could occur very rarely; --- generic/tclCompile.c | 27 +++++++++++++++++++-------- generic/tclObj.c | 24 +++++++++++++++++++++--- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 94ea1e188f87..52983aef92e8 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -975,11 +975,19 @@ TclNewCodeSegmentObj( size_t offset = 0; + if (bytes == NULL) { + assert(length == 0); /* possible only in this case */ + if (!strSegPtr) { + return Tcl_NewStringObj(NULL, 0); + } + bytes = TclGetStringSegmentBytes(strSegPtr); + } + TclNewObj(objPtr); + assert(bytes != NULL); objPtr->bytes = NULL; - assert(bytes != NULL); if (strSegPtr) { const char *segBytes = TclGetStringSegmentBytes(strSegPtr); /* check bytes is included in segment (duplicate on demand only) */ @@ -1137,15 +1145,18 @@ TclGetStringSegmentFromObj( strSegPtr = (StringSegment *)objPtr->internalRep.twoPtrValue.ptr1; #if 1 if (flags & TCLSEG_FULL_SEGREP) { - /* check it owns fully included segment */ - if ( !strSegPtr->parentPtr - && (objPtr->internalRep.twoPtrValue.ptr2 /* offset */ - || objPtr->length != strSegPtr->length - ) - ) { + + const char *segBytes = TclGetStringSegmentBytes(strSegPtr); + size_t offset = 0; + if (objPtr->bytes) { + offset = objPtr->bytes - segBytes; + } else { + offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; + } + /* fully included requested, check it (duplicate segment on demand) */ + if (offset || objPtr->length != strSegPtr->length) { /* not fully included - duplicate reference to parent */ StringSegment *orgSegPtr = strSegPtr; - size_t offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; strSegPtr = DupStringSegment(strSegPtr, offset, objPtr->length); strSegPtr->refCount++; diff --git a/generic/tclObj.c b/generic/tclObj.c index 4663432341c9..a35ef212c4ae 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -546,9 +546,23 @@ FillICL( * We will try to reuse the old entry memory here (and simply replace * a content). */ - if (clLocPtr && clLocPtr->num != num) { - clLocPtr = ckrealloc(clLocPtr, - sizeof(ContLineLoc) + num*sizeof(int)); + if (clLocPtr) { + if (clLocPtr->num != num) { + if (clLocPtr->loc == loc) { + /* we can't copy inplace - duplicate and free original */ + ContLineLoc *orgLocPtr = clLocPtr; + + clLocPtr = ckalloc(sizeof(ContLineLoc) + num * sizeof(int)); + clLocPtr->num = num; + memcpy(&clLocPtr->loc, loc, num * sizeof(int)); + clLocPtr->loc[num] = CLL_END; /* Sentinel */ + ckfree(orgLocPtr); + return clLocPtr; + } + /* we can safe realocate it */ + clLocPtr = ckrealloc(clLocPtr, + sizeof(ContLineLoc) + num * sizeof(int)); + } } else { clLocPtr = ckalloc(sizeof(ContLineLoc) + num*sizeof(int)); } @@ -1720,6 +1734,7 @@ Tcl_GetUtfFromObj( size_t offset = (size_t)objPtr->internalRep.twoPtrValue.ptr2; *lengthPtr = (int)objPtr->length; bytes = (const char *)TclGetStringSegmentBytes(strSegPtr) + offset; + assert(bytes != NULL); return bytes; } @@ -1728,6 +1743,7 @@ Tcl_GetUtfFromObj( /* ByteCode */ ByteCode *codePtr = objPtr->internalRep.twoPtrValue.ptr1; *lengthPtr = codePtr->numSrcBytes; + assert(codePtr->source != NULL); return codePtr->source; } } @@ -1737,6 +1753,7 @@ Tcl_GetUtfFromObj( */ if ((bytes = objPtr->bytes)) { *lengthPtr = objPtr->length; + assert(bytes != NULL); return bytes; } @@ -1745,6 +1762,7 @@ Tcl_GetUtfFromObj( */ bytes = Tcl_GetString(objPtr); *lengthPtr = objPtr->length; + assert(bytes != NULL); return bytes; } From d92408b93c086894c4c45e547432c5c24a45cd0e Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 12 Mar 2020 22:40:39 +0100 Subject: [PATCH 15/16] code cleanup (removed unused code and old unneeded handling) --- generic/tclCompile.c | 16 ------------ generic/tclObj.c | 61 ++------------------------------------------ 2 files changed, 2 insertions(+), 75 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 52983aef92e8..268e2a2f6535 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -787,22 +787,11 @@ DupCodeSegmentInternalRep( { StringSegment *strSegPtr = srcPtr->internalRep.twoPtrValue.ptr1; copyPtr->typePtr = &tclCodeSegmentType; -#if 1 - /* be sure we have clean (full included) segment */ - strSegPtr = DupStringSegment(strSegPtr, - (size_t)srcPtr->internalRep.twoPtrValue.ptr2 /* offset */, - srcPtr->length); - copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; - copyPtr->internalRep.twoPtrValue.ptr2 = (void *)0; /* no offset */ - copyPtr->length = srcPtr->length; - strSegPtr->refCount++; -#else copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; copyPtr->internalRep.twoPtrValue.ptr2 = srcPtr->internalRep.twoPtrValue.ptr2; copyPtr->length = srcPtr->length; strSegPtr->refCount++; -#endif } static inline void @@ -1447,11 +1436,6 @@ DupByteCodeInternalRep( const char *bytes = TclGetStringSegmentBytes(strSegPtr); size_t offset = bytes - codePtr->source; /* normally always 0 */ assert(bytes >= codePtr->source); -#if 1 - /* be sure we have clean (full included) segment */ - strSegPtr = DupStringSegment(strSegPtr, offset, codePtr->numSrcBytes); - offset = 0; -#endif strSegPtr->refCount++; copyPtr->typePtr = &tclCodeSegmentType; copyPtr->internalRep.twoPtrValue.ptr1 = strSegPtr; diff --git a/generic/tclObj.c b/generic/tclObj.c index a35ef212c4ae..4b1ba7a00814 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -78,26 +78,15 @@ typedef struct ObjData { * The structure defined below is used in this file only. */ -typedef struct ThreadSpecificData { - Tcl_HashTable *lineCLPtr; /* This table remembers for each Tcl_Obj - * generated by a call to the function - * TclSubstTokens() from a literal text - * where bs+nl sequences occured in it, if - * any. I.e. this table keeps track of - * invisible and stripped continuation lines. - * Its keys are Tcl_Obj pointers, the values - * are ContLineLoc pointers. See the file - * tclCompile.h for the definition of this - * structure, and for references to all - * related places in the core. */ #if defined(TCL_MEM_DEBUG) && defined(TCL_THREADS) +typedef struct ThreadSpecificData { Tcl_HashTable *objThreadMap;/* Thread local table that is used to check * that a Tcl_Obj was not allocated by some * other thread. */ -#endif /* TCL_MEM_DEBUG && TCL_THREADS */ } ThreadSpecificData; static Tcl_ThreadDataKey dataKey; +#endif /* TCL_MEM_DEBUG && TCL_THREADS */ /* * Nested Tcl_Obj deletion management support @@ -1348,29 +1337,6 @@ TclFreeObj( } ObjDeletionUnlock(context); } - - /* - * We cannot use TclGetContinuationTable() here, because that may - * re-initialize the thread-data for calls coming after the finalization. - * We have to access it using the low-level call and then check for - * validity. This function can be called after TclFinalizeThreadData() has - * already killed the thread-global data structures. Performing - * TCL_TSD_INIT will leave us with an un-initialized memory block upon - * which we crash (if we where to access the uninitialized hashtable). - */ - - { - ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - Tcl_HashEntry *hPtr; - - if (tsdPtr->lineCLPtr) { - hPtr = Tcl_FindHashEntry(tsdPtr->lineCLPtr, objPtr); - if (hPtr) { - ckfree(Tcl_GetHashValue(hPtr)); - Tcl_DeleteHashEntry(hPtr); - } - } - } } #else /* TCL_MEM_DEBUG */ @@ -1458,29 +1424,6 @@ TclFreeObj( ObjDeletionUnlock(context); } } - - /* - * We cannot use TclGetContinuationTable() here, because that may - * re-initialize the thread-data for calls coming after the finalization. - * We have to access it using the low-level call and then check for - * validity. This function can be called after TclFinalizeThreadData() has - * already killed the thread-global data structures. Performing - * TCL_TSD_INIT will leave us with an un-initialized memory block upon - * which we crash (if we where to access the uninitialized hashtable). - */ - - { - ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - Tcl_HashEntry *hPtr; - - if (tsdPtr->lineCLPtr) { - hPtr = Tcl_FindHashEntry(tsdPtr->lineCLPtr, objPtr); - if (hPtr) { - ckfree(Tcl_GetHashValue(hPtr)); - Tcl_DeleteHashEntry(hPtr); - } - } - } } #endif /* TCL_MEM_DEBUG */ From 4c47fbe23b4a2eebc6e7ea72b1959f46197b19f9 Mon Sep 17 00:00:00 2001 From: "Sergey G. Brester" Date: Fri, 29 Oct 2021 16:13:05 +0200 Subject: [PATCH 16/16] review: remove forgotten debugging stuff (no functional changes) --- generic/tclObj.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/generic/tclObj.c b/generic/tclObj.c index 4b1ba7a00814..40ff26a7b848 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -1346,10 +1346,6 @@ TclFreeObj( { const Tcl_ObjType *typePtr = objPtr->typePtr; - if (objPtr->length == 40) { - int i = 0; - i++; - } /* * Firstly check special cases where string rep could be shared with * objects internal representation,