From c92ec738559fe76a57be40ef67f53bf51f3947fe Mon Sep 17 00:00:00 2001 From: Eddie Kohler Date: Fri, 10 Dec 2021 08:20:48 -0500 Subject: [PATCH] Add Dbl::utf8_charset and convert_utf8. --- lib/dbl.php | 45 ++++++++++++++++++++++++++++++++---------- src/confinvariants.php | 5 +++-- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/lib/dbl.php b/lib/dbl.php index f077afa64..75b9fca44 100644 --- a/lib/dbl.php +++ b/lib/dbl.php @@ -835,23 +835,48 @@ static function shutdown() { self::$query_log = false; } - /** @return string */ - static function utf8(/* [$dblink,] $qstr */) { - $args = func_get_args(); - $dblink = count($args) > 1 ? $args[0] : self::$default_dblink; + /** @param \mysqli|string $dblink + * @param ?string $qstr + * @return string */ + static function utf8($dblink, $qstr = null) { + if (is_string($dblink)) { + $qstr = $dblink; + $dblink = self::$default_dblink; + } $utf8 = $dblink->server_version >= 50503 ? "utf8mb4" : "utf8"; - $qstr = count($args) > 1 ? $args[1] : $args[0]; return "_{$utf8}{$qstr}"; } - /** @return string */ - static function utf8ci(/* [$dblink,] $qstr */) { - $args = func_get_args(); - $dblink = count($args) > 1 ? $args[0] : self::$default_dblink; + /** @param \mysqli|string $dblink + * @param ?string $qstr + * @return string */ + static function utf8ci($dblink, $qstr = null) { + if (is_string($dblink)) { + $qstr = $dblink; + $dblink = self::$default_dblink; + } $utf8 = $dblink->server_version >= 50503 ? "utf8mb4" : "utf8"; - $qstr = count($args) > 1 ? $args[1] : $args[0]; return "_{$utf8}{$qstr} collate {$utf8}_general_ci"; } + + /** @param \mysqli|string $dblink + * @param ?string $qstr + * @return string */ + static function convert_utf8($dblink, $qstr = null) { + if (is_string($dblink)) { + $qstr = $dblink; + $dblink = self::$default_dblink; + } + $utf8 = $dblink->server_version >= 50503 ? "utf8mb4" : "utf8"; + return "convert($qstr using $utf8)"; + } + + /** @param ?\mysqli $dblink + * @return string */ + static function utf8_charset($dblink = null) { + $dblink = $dblink ?? self::$default_dblink; + return $dblink->server_version >= 50503 ? "utf8mb4" : "utf8"; + } } // quoting for SQL diff --git a/src/confinvariants.php b/src/confinvariants.php index c95e68308..82b2b2165 100644 --- a/src/confinvariants.php +++ b/src/confinvariants.php @@ -162,8 +162,9 @@ function exec_main() { Dbl::free($result); // whitespace is simplified - $regex = Dbl::utf8($this->conf->dblink, "'^ | \$| |[\\n\\r\\t]'"); - $any = $this->invariantq("select email from ContactInfo where convert(firstName using utf8mb4) regexp $regex or convert(lastName using utf8mb4) regexp $regex or convert(affiliation using utf8mb4) regexp $regex limit 1"); + $utf8cs = Dbl::utf8_charset($this->conf->dblink); + $regex = "_{$utf8cs}'^ | \$| |[\\n\\r\\t]'"; + $any = $this->invariantq("select email from ContactInfo where convert(firstName using $utf8cs) regexp $regex or convert(lastName using $utf8cs) regexp $regex or convert(affiliation using $utf8cs) regexp $regex limit 1"); if ($any) { $this->invariant_error("user_whitespace", "user whitespace is not simplified"); }