From a5db6e004ccc82630e10d3182bfff4a282392f8d Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Fri, 12 Jul 2024 14:54:59 +0000 Subject: [PATCH] sqlliveness: detect and handle invalid SessionIDs Previously, the code for checking if sessions are alive supported non-RBR-encoded session IDs. However, in version 24.1, we removed this support without adding proper handling for invalid IDs, potentially leading to finalization failures during upgrades (if stale session IDs existed). This patch adds logic to treat invalid session IDs, which will allow upgrades to occur if stale session IDs exist.s Fixes: #127061 Release note: None --- .../tests/3node-tenant/generated_test.go | 7 ++++ .../local-read-committed/generated_test.go | 7 ++++ pkg/sql/catalog/lease/lease_internal_test.go | 4 +-- .../logictest/testdata/logic_test/sqlliveness | 36 +++++++++++++++++++ .../tests/fakedist-disk/generated_test.go | 7 ++++ .../tests/fakedist-vec-off/generated_test.go | 7 ++++ .../tests/fakedist/generated_test.go | 7 ++++ .../generated_test.go | 7 ++++ .../tests/local-mixed-23.2/generated_test.go | 7 ++++ .../tests/local-vec-off/generated_test.go | 7 ++++ .../logictest/tests/local/generated_test.go | 7 ++++ .../comparator_generated_test.go | 5 +++ pkg/sql/sqlliveness/slstorage/key_encoder.go | 5 +++ pkg/sql/sqlliveness/slstorage/sessionid.go | 34 ++++++++---------- pkg/sql/sqlliveness/slstorage/slstorage.go | 13 ++++++- 15 files changed, 138 insertions(+), 22 deletions(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/sqlliveness diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index 11be5fd5b63a..77cd5ff6ff2a 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -1936,6 +1936,13 @@ func TestTenantLogic_sqllite( runLogicTest(t, "sqllite") } +func TestTenantLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestTenantLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go b/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go index b062fbf4b6b5..6f15c6b008a3 100644 --- a/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go @@ -1948,6 +1948,13 @@ func TestReadCommittedLogic_sqllite( runLogicTest(t, "sqllite") } +func TestReadCommittedLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestReadCommittedLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/catalog/lease/lease_internal_test.go b/pkg/sql/catalog/lease/lease_internal_test.go index 7c0951e911d6..cffdbf89329a 100644 --- a/pkg/sql/catalog/lease/lease_internal_test.go +++ b/pkg/sql/catalog/lease/lease_internal_test.go @@ -1709,8 +1709,8 @@ func TestLeaseCountDetailSessionBased(t *testing.T) { version := 1 region := enum.One _, err := executor.Exec(ctx, "add-rows-for-test", nil, - fmt.Sprintf("INSERT INTO system.lease VALUES (%d, %d, %s, '%s', '\\x%x')", - descID, version, nodeID, session.ID(), region)) + fmt.Sprintf("INSERT INTO system.lease VALUES (%d, %d, %s, '\\x%x', '\\x%x')", + descID, version, nodeID, session.ID().UnsafeBytes(), region)) if err != nil { return err } diff --git a/pkg/sql/logictest/testdata/logic_test/sqlliveness b/pkg/sql/logictest/testdata/logic_test/sqlliveness new file mode 100644 index 000000000000..d9347aabdec8 --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/sqlliveness @@ -0,0 +1,36 @@ +# Validate that invalid sessionID's are always +# considered dead. +subtest invalid_sessions + +# Legacy non-RBR format +query B +select crdb_internal.sql_liveness_is_alive(x'1f915e98f96145a5baa9f3a42c378eb6'); +---- +false + +# Wrong length +query B +select crdb_internal.sql_liveness_is_alive(x'deadbeef'); +---- +false + +subtest end + + +subtest valid_sessions + +# Sanity: All sessions are alive in sqlliveness. +query I +SELECT count(*) FROM system.sqlliveness WHERE crdb_internal.sql_liveness_is_alive(session_id) = false; +---- +0 + +query B +SELECT count(*) > 0 FROM system.sqlliveness WHERE crdb_internal.sql_liveness_is_alive(session_id) = true; +---- +true + +subtest end + + + diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index 5074355e8844..7262bf83506a 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -1919,6 +1919,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index a88349f4d32d..e0689ef9e41b 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -1919,6 +1919,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index 1818a68527d9..06a99011c9bb 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -1933,6 +1933,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index 7945dd8a194b..d8d25ac294de 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -1912,6 +1912,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-mixed-23.2/generated_test.go b/pkg/sql/logictest/tests/local-mixed-23.2/generated_test.go index 148891993312..fb1d2f557101 100644 --- a/pkg/sql/logictest/tests/local-mixed-23.2/generated_test.go +++ b/pkg/sql/logictest/tests/local-mixed-23.2/generated_test.go @@ -1926,6 +1926,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 13ba0ea77d2b..3928d64b98ba 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -1940,6 +1940,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index ae5ee244776c..a645072b3217 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -2143,6 +2143,13 @@ func TestLogic_sqllite( runLogicTest(t, "sqllite") } +func TestLogic_sqlliveness( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "sqlliveness") +} + func TestLogic_sqlsmith( t *testing.T, ) { diff --git a/pkg/sql/schemachanger/comparator_generated_test.go b/pkg/sql/schemachanger/comparator_generated_test.go index 564cde1e2048..9fc91f9e2180 100644 --- a/pkg/sql/schemachanger/comparator_generated_test.go +++ b/pkg/sql/schemachanger/comparator_generated_test.go @@ -1653,6 +1653,11 @@ func TestSchemaChangeComparator_sqllite(t *testing.T) { var logicTestFile = "pkg/sql/logictest/testdata/logic_test/sqllite" runSchemaChangeComparatorTest(t, logicTestFile) } +func TestSchemaChangeComparator_sqlliveness(t *testing.T) { + defer leaktest.AfterTest(t)() + var logicTestFile = "pkg/sql/logictest/testdata/logic_test/sqlliveness" + runSchemaChangeComparatorTest(t, logicTestFile) +} func TestSchemaChangeComparator_sqlsmith(t *testing.T) { defer leaktest.AfterTest(t)() var logicTestFile = "pkg/sql/logictest/testdata/logic_test/sqlsmith" diff --git a/pkg/sql/sqlliveness/slstorage/key_encoder.go b/pkg/sql/sqlliveness/slstorage/key_encoder.go index 9981d1fa4747..d22f79638d42 100644 --- a/pkg/sql/sqlliveness/slstorage/key_encoder.go +++ b/pkg/sql/sqlliveness/slstorage/key_encoder.go @@ -24,6 +24,7 @@ import ( type keyCodec interface { encode(sid sqlliveness.SessionID) (roachpb.Key, string, error) decode(key roachpb.Key) (sqlliveness.SessionID, error) + validate(session sqlliveness.SessionID) error // indexPrefix returns the prefix for an encoded key. encode() will return // something with the prefix and decode will expect a key with this prefix. @@ -37,6 +38,10 @@ type rbrEncoder struct { rbrIndex roachpb.Key } +func (e *rbrEncoder) validate(session sqlliveness.SessionID) error { + return ValidateSessionID(session) +} + func (e *rbrEncoder) encode(session sqlliveness.SessionID) (roachpb.Key, string, error) { region, _, err := SafeDecodeSessionID(session) if err != nil { diff --git a/pkg/sql/sqlliveness/slstorage/sessionid.go b/pkg/sql/sqlliveness/slstorage/sessionid.go index b21f9d2fa841..976f20141d57 100644 --- a/pkg/sql/sqlliveness/slstorage/sessionid.go +++ b/pkg/sql/sqlliveness/slstorage/sessionid.go @@ -67,18 +67,8 @@ func MakeSessionID(region []byte, id uuid.UUID) (sqlliveness.SessionID, error) { // not be mutated. func UnsafeDecodeSessionID(session sqlliveness.SessionID) (region, id []byte, err error) { b := session.UnsafeBytes() - if len(b) == legacyLen { - return nil, nil, errors.Newf("unexpected legacy SessionID format") - } - if len(b) < minimumNonLegacyLen { - // The smallest valid v1 session id is a [version, 1, single_byte_region, uuid...], - // which is three bytes larger than a uuid. - return nil, nil, errors.New("session id is too short") - } - - // Decode the version. - if b[0] != sessionIDVersion { - return nil, nil, errors.Newf("invalid session id version: %d", b[0]) + if err = ValidateSessionID(sqlliveness.SessionID(b)); err != nil { + return nil, nil, err } regionLen := int(b[1]) rest := b[2:] @@ -91,24 +81,30 @@ func UnsafeDecodeSessionID(session sqlliveness.SessionID) (region, id []byte, er return rest[:regionLen], rest[regionLen:], nil } -// SafeDecodeSessionID decodes the region and id from the SessionID. -func SafeDecodeSessionID(session sqlliveness.SessionID) (region, id string, err error) { +// ValidateSessionID validates that the SessionID has the correct format. +func ValidateSessionID(session sqlliveness.SessionID) error { if len(session) == legacyLen { - return "", "", errors.Newf("unexpected legacy SessionID format") + return errors.Newf("unexpected legacy SessionID format") } if len(session) < minimumNonLegacyLen { // The smallest valid v1 session id is a [version, 1, single_byte_region, uuid...], // which is three bytes larger than a uuid. - return "", "", errors.New("session id is too short") + return errors.New("session id is too short") } - // Decode the version. if session[0] != sessionIDVersion { - return "", "", errors.Newf("invalid session id version: %d", session[0]) + return errors.Newf("invalid session id version: %d", session[0]) + } + return nil +} + +// SafeDecodeSessionID decodes the region and id from the SessionID. +func SafeDecodeSessionID(session sqlliveness.SessionID) (region, id string, err error) { + if err = ValidateSessionID(session); err != nil { + return "", "", err } regionLen := int(session[1]) rest := session[2:] - // Decode and validate the length of the region. if len(rest) != regionLen+uuid.Size { return "", "", errors.Newf("session id with length %d is the wrong size to include a region with length %d", len(session), regionLen) diff --git a/pkg/sql/sqlliveness/slstorage/slstorage.go b/pkg/sql/sqlliveness/slstorage/slstorage.go index 4cf51723f83c..2abe9088755f 100644 --- a/pkg/sql/sqlliveness/slstorage/slstorage.go +++ b/pkg/sql/sqlliveness/slstorage/slstorage.go @@ -200,7 +200,15 @@ const ( func (s *Storage) isAlive( ctx context.Context, sid sqlliveness.SessionID, syncOrAsync readType, ) (alive bool, _ error) { - + // Confirm the session ID has the correct format, and if it + // doesn't then we can consider it as dead without any extra + // work. + if err := s.keyCodec.validate(sid); err != nil { + // This SessionID may be invalid because of the wrong format + // so consider it as dead. + //nolint:returnerrcheck + return false, nil + } // If wait is false, alive is set and future is unset. // If wait is true, alive is unset and future is set. alive, wait, future, err := func() (bool, bool, singleflight.Future, error) { @@ -318,6 +326,9 @@ func (s *Storage) deleteOrFetchSession( ctx = multitenant.WithTenantCostControlExemption(ctx) livenessProber := regionliveness.NewLivenessProber(s.db, s.codec, nil, s.settings) k, regionPhysicalRep, err := s.keyCodec.encode(sid) + if err != nil { + return false, hlc.Timestamp{}, err + } if err := s.txn(ctx, func(ctx context.Context, txn *kv.Txn) error { // Reset captured variable in case of retry. deleted, expiration, prevExpiration = false, hlc.Timestamp{}, hlc.Timestamp{}