Skip to content

Commit

Permalink
Add bytearray checking to TclCheckEmptyString(), and then use
Browse files Browse the repository at this point in the history
TclCheckEmptyString() in Tcl_AppendObjToObj and TclStringCat() to reduce string
generation.
  • Loading branch information
pooryorick committed Apr 23, 2023
2 parents 4ee1252 + ce214e9 commit 21039aa
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 22 deletions.
2 changes: 1 addition & 1 deletion doc/StringObj.3
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ The index of the last Unicode character in the Unicode range to be
returned as a new value. If negative, take all characters up to
the last one available.
.AP Tcl_Obj *objPtr in/out
Points to a value to manipulate.
A pointer to a value to read, or to an unshared value to modify.
.AP Tcl_Obj *appendObjPtr in
The value to append to \fIobjPtr\fR in \fBTcl_AppendObjToObj\fR.
.AP "Tcl_Size \&| int" *lengthPtr out
Expand Down
1 change: 1 addition & 0 deletions doc/encoding.n
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'\"
'\" Copyright (c) 1998 Scriptics Corporation.
'\" Copyright (c) 2023 Nathan Coulter
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Expand Down
45 changes: 26 additions & 19 deletions generic/tclStringObj.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,11 @@ TclCheckEmptyString(
return TCL_EMPTYSTRING_YES;
}

if (TclIsPureByteArray(objPtr)
&& Tcl_GetCharLength(objPtr) == 0) {
return TCL_EMPTYSTRING_YES;
}

if (TclListObjIsCanonical(objPtr)) {
TclListObjLengthM(NULL, objPtr, &length);
return length == 0;
Expand Down Expand Up @@ -1431,26 +1436,26 @@ Tcl_AppendObjToObj(
Tcl_Size appendNumChars = TCL_INDEX_NONE;
const char *bytes;

/*
* Special case: second object is standard-empty is fast case. We know
* that appending nothing to anything leaves that starting anything...
*/
if (TclCheckEmptyString(appendObjPtr) == TCL_EMPTYSTRING_YES) {
return;
}

if (appendObjPtr->bytes == &tclEmptyString) {
if (TclCheckEmptyString(objPtr) == TCL_EMPTYSTRING_YES) {
TclSetDuplicateObj(objPtr, appendObjPtr);
return;
}

/*
* Handle append of one ByteArray object to another as a special case.
* Note that we only do this when the objects are pure so that the
* bytearray faithfully represent the true value; Otherwise appending the
* byte arrays together could lose information;
*/
if (
TclIsPureByteArray(appendObjPtr)
&& (TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
) {
/*
* Both bytearray objects are pure, so the second internal bytearray value
* can be appended to the first, with no need to modify the "bytes" field.
*/

if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
&& TclIsPureByteArray(appendObjPtr)) {
/*
* You might expect the code here to be
* One might expect the code here to be
*
* bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
* TclAppendBytesToByteArray(objPtr, bytes, length);
Expand Down Expand Up @@ -3126,7 +3131,7 @@ TclStringCat(
int allowUniChar = 1, requestUniChar = 0, forceUniChar = 0;
Tcl_Size first = objc - 1; /* Index of first value possibly not empty */
Tcl_Size last = 0; /* Index of last value possibly not empty */
int inPlace = flags & TCL_STRING_IN_PLACE;
int inPlace = (flags & TCL_STRING_IN_PLACE) && !Tcl_IsShared(*objv);

/* assert ( objc >= 0 ) */

Expand Down Expand Up @@ -3254,7 +3259,8 @@ TclStringCat(

Tcl_Obj *objPtr = *ov++;

if (objPtr->bytes == NULL) {
if (objPtr->bytes == NULL
&& TclCheckEmptyString(objPtr) != TCL_EMPTYSTRING_YES) {
/* No string rep; Take the chance we can avoid making it */
pendingPtr = objPtr;
} else {
Expand Down Expand Up @@ -3330,6 +3336,7 @@ TclStringCat(
}

objv += first; objc = (last - first + 1);
inPlace = (flags & TCL_STRING_IN_PLACE) && !Tcl_IsShared(*objv);

if (binary) {
/* Efficiently produce a pure byte array result */
Expand All @@ -3340,7 +3347,7 @@ TclStringCat(
* failure to allocate enough space. Following stanza may panic.
*/

if (inPlace && !Tcl_IsShared(*objv)) {
if (inPlace) {
Tcl_Size start = 0;

objResultPtr = *objv++; objc--;
Expand Down Expand Up @@ -3370,7 +3377,7 @@ TclStringCat(
/* Efficiently produce a pure Tcl_UniChar array result */
Tcl_UniChar *dst;

if (inPlace && !Tcl_IsShared(*objv)) {
if (inPlace) {
Tcl_Size start;

objResultPtr = *objv++; objc--;
Expand Down Expand Up @@ -3421,7 +3428,7 @@ TclStringCat(
/* Efficiently concatenate string reps */
char *dst;

if (inPlace && !Tcl_IsShared(*objv)) {
if (inPlace) {
Tcl_Size start;

objResultPtr = *objv++; objc--;
Expand Down
4 changes: 2 additions & 2 deletions tests/string.test
Original file line number Diff line number Diff line change
Expand Up @@ -2433,11 +2433,11 @@ test string-29.11.$noComp {string cat, efficiency} -body {
test string-29.12.$noComp {string cat, efficiency} -body {
tcl::unsupported::representation \
[run {string cat [encoding convertto utf-8 {}] [list x]}]
} -match glob -result {*, string representation "x"}
} -match glob -result {*, no string representation}
test string-29.13.$noComp {string cat, efficiency} -body {
tcl::unsupported::representation [run {string cat \
[encoding convertto utf-8 {}] [encoding convertto utf-8 {}] [list x]}]
} -match glob -result {*, string representation "x"}
} -match glob -result {*, no string representation}
test string-29.14.$noComp {string cat, efficiency} -setup {
set e [encoding convertto utf-8 {}]
} -cleanup {
Expand Down

0 comments on commit 21039aa

Please sign in to comment.