From c711dc95449330ec7d7d2345d3fcbb29e1cd945c Mon Sep 17 00:00:00 2001 From: Michael Primeaux Date: Sat, 2 Nov 2024 13:23:27 -0500 Subject: [PATCH] defect: Fixed minimum alphabet length logic and related tests and test functions for generating ASCII and Unicode alphabets (#17) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixed minimum alphabet length logic and related tests. The `MinAlphabetLength` was incorrectly set to a value of `1`. * Fixed the ASCII and Unicode alphabet test functions to correctly generate the range requested. **NOTE:** The printable Unicode range is extensive and varies widely across different scripts and symbol sets, as Unicode was designed to represent characters from numerous languages, symbols, and emojis. Unlike ASCII, Unicode doesn’t have a simple, contiguous range for all printable characters. However, there are several primary ranges in Unicode where printable characters are defined: 1. Basic Multilingual Plane (BMP): The majority of commonly used printable characters are in the BMP, which spans 0x0020 to 0xFFFF (decimal 32 to 65,535). This plane includes: - Latin characters (including ASCII, starting from 0x0020 for space). - Greek, Cyrillic, Hebrew, Arabic, and other alphabets. - Mathematical symbols, punctuation, and various technical symbols. - Chinese, Japanese, and Korean (CJK) characters. - Emojis and other miscellaneous symbols. 2. Supplementary Multilingual Plane (SMP): Includes additional printable characters, such as: - Historic scripts. - Musical notation. - Extended emoji sets. - This plane spans 0x10000 to 0x1FFFF. 3. Supplementary Ideographic Plane (SIP): Contains additional Chinese, Japanese, and Korean ideographs from 0x20000 to 0x2FFFF. 4. Other Supplementary Planes: These include various specialized characters, symbols, and private-use areas. --- .github/workflows/ci.yaml | 22 +++++++-------- CHANGELOG/CHANGELOG-1.x.md | 14 +++++++++- README.md | 3 +- nanoid.go | 5 ++-- nanoid_benchmark_test.go | 55 ++++++++++++++++++++++++------------ nanoid_test.go | 57 ++++++++++++++++++++++++++++---------- 6 files changed, 108 insertions(+), 48 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6e693ab..b6c021a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,13 +51,14 @@ jobs: with: fetch-depth: 0 - - name: Functional Tests - run: | - make test + - name: Lint + uses: golangci/golangci-lint-action@v6 + with: + args: --timeout=30m --config=.golangci.yaml --issues-exit-code=0 - - name: Benchmark Tests + - name: Test run: | - make bench + make test - name: Cover run: | @@ -67,11 +68,6 @@ jobs: run: | make analyze - - name: Lint - uses: golangci/golangci-lint-action@v6 - with: - args: --timeout=30m --config=.golangci.yaml --issues-exit-code=0 - # Ref: https://github.com/actions/cache - name: Cache SonarCloud Packages uses: actions/cache@v4 @@ -88,4 +84,8 @@ jobs: env: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} GITHUB_USER: ${{ github.actor }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Benchmark Tests + run: | + make bench diff --git a/CHANGELOG/CHANGELOG-1.x.md b/CHANGELOG/CHANGELOG-1.x.md index f83ebba..97c9b15 100644 --- a/CHANGELOG/CHANGELOG-1.x.md +++ b/CHANGELOG/CHANGELOG-1.x.md @@ -15,6 +15,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Security +--- +## [1.10.1] - 2024-NOV-02 + +### Added +### Changed +### Deprecated +### Removed +### Fixed +- **DEFECT:** Fixed minimum alphabet length logic and related tests and test functions for generating ASCII and Unicode alphabets. +### Security + --- ## [1.10.0] - 2024-NOV-01 @@ -193,7 +204,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Security -[Unreleased]: https://github.com/scriptures-social/platform/compare/v1.10.0...HEAD +[Unreleased]: https://github.com/scriptures-social/platform/compare/v1.10.1...HEAD +[1.10.1]: https://github.com/sixafter/nanoid/compare/v1.10.0...v1.10.1 [1.10.0]: https://github.com/sixafter/nanoid/compare/v1.9.0...v1.10.0 [1.9.0]: https://github.com/sixafter/nanoid/compare/v1.8.2...v1.9.0 [1.8.2]: https://github.com/sixafter/nanoid/compare/v1.8.1...v1.8.2 diff --git a/README.md b/README.md index fb03bbe..152e7fa 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,8 @@ The nanoid module defines several error types to handle various failure scenario * `ErrInvalidLength`: Returned when a non-positive Nano ID length is specified. * `ErrInvalidAlphabet`: Returned when an alphabet is invalid; e.g. due to length constraints. * `ErrNonUTF8Alphabet`: Returned when an alphabet contains invalid UTF-8 characters. -* `ErrAlphabetTooLong`: Returned when an alphabet length exceeds 256 character. +* `ErrAlphabetTooShort`: Returned when alphabet length is less than 2 characters. +* `ErrAlphabetTooLong`: Returned when an alphabet length exceeds 256 characters. ## Constants diff --git a/nanoid.go b/nanoid.go index bea4a93..bba88ce 100644 --- a/nanoid.go +++ b/nanoid.go @@ -61,6 +61,7 @@ var ( ErrInvalidLength = errors.New("invalid length") ErrInvalidAlphabet = errors.New("invalid alphabet") ErrNonUTF8Alphabet = errors.New("alphabet contains invalid UTF-8 characters") + ErrAlphabetTooShort = errors.New("alphabet length is less than 2") ErrAlphabetTooLong = errors.New("alphabet length exceeds 256") ) @@ -79,7 +80,7 @@ const ( bufferMultiplier = 128 // MinAlphabetLength defines the minimum allowed length for the alphabet. - MinAlphabetLength = 1 + MinAlphabetLength = 2 // MaxAlphabetLength defines the maximum allowed length for the alphabet. MaxAlphabetLength = 256 @@ -256,7 +257,7 @@ func buildRuntimeConfig(opts *ConfigOptions) (*runtimeConfig, error) { } if alphabetLen < MinAlphabetLength { - return nil, ErrInvalidAlphabet + return nil, ErrAlphabetTooShort } // Represents how many bits are required to generate an index for selecting a character from the alphabet. diff --git a/nanoid_benchmark_test.go b/nanoid_benchmark_test.go index 5326298..a46557b 100644 --- a/nanoid_benchmark_test.go +++ b/nanoid_benchmark_test.go @@ -11,19 +11,20 @@ import ( "testing" ) -// Helper function to create an ASCII-based alphabet of a specified length +// Helper function to create an ASCII-based alphabet of a specified length without duplicates func makeASCIIBasedAlphabet(length int) string { const ( - start = 33 // '!' - end = 126 // '~' + start = 33 // (!) + end = 126 // (~) ) - // Ensure the length is within the printable ASCII range - if length < 2 { - length = 2 - } - if length > end-start+1 { - length = end - start + 1 + // Calculate the number of unique printable ASCII characters in the range + rangeSize := end - start + 1 + + // Ensure the length does not exceed the number of unique characters + if length > rangeSize { + length = rangeSize } + alphabet := make([]byte, length) for i := 0; i < length; i++ { alphabet[i] = byte(start + i) @@ -31,19 +32,37 @@ func makeASCIIBasedAlphabet(length int) string { return string(alphabet) } -// Helper function to create a Unicode alphabet of a specified length +// Helper function to create a Unicode alphabet of a specified length without duplicates +// The printable Unicode range is extensive and varies widely across different scripts and symbol sets, as Unicode was designed to represent characters from numerous languages, symbols, and emojis. Unlike ASCII, Unicode doesn’t have a simple, contiguous range for all printable characters. However, there are several primary ranges in Unicode where printable characters are defined: +// 1. Basic Multilingual Plane (BMP): The majority of commonly used printable characters are in the BMP, which spans 0x0020 to 0xFFFF (decimal 32 to 65,535). This plane includes: +// - Latin characters (including ASCII, starting from 0x0020 for space). +// - Greek, Cyrillic, Hebrew, Arabic, and other alphabets. +// - Mathematical symbols, punctuation, and various technical symbols. +// - Chinese, Japanese, and Korean (CJK) characters. +// - Emojis and other miscellaneous symbols. +// +// 2. Supplementary Multilingual Plane (SMP): Includes additional printable characters, such as: +// - Historic scripts. +// - Musical notation. +// - Extended emoji sets. +// - This plane spans 0x10000 to 0x1FFFF. +// +// 3. Supplementary Ideographic Plane (SIP): Contains additional Chinese, Japanese, and Korean ideographs from 0x20000 to 0x2FFFF. +// 4. Other Supplementary Planes: These include various specialized characters, symbols, and private-use areas. func makeUnicodeAlphabet(length int) string { + // Greek and Coptic Block const ( - start = 0x0905 // 'अ' - end = 0x0939 // 'ह' + start = 0x0370 // (ἰ) + end = 0x047F // (ѫ) ) - // Ensure the length is within the specified Unicode range - if length < 2 { - length = 2 - } - if length > end-start+1 { - length = end - start + 1 + // Calculate the number of unique runes in the range + rangeSize := end - start + 1 + + // Ensure the length does not exceed the number of unique characters + if length > rangeSize { + length = rangeSize } + var builder strings.Builder for i := 0; i < length; i++ { builder.WriteRune(rune(start + i)) diff --git a/nanoid_test.go b/nanoid_test.go index d0126cd..dd9140a 100644 --- a/nanoid_test.go +++ b/nanoid_test.go @@ -103,6 +103,47 @@ func TestGenerateWithDuplicateAlphabet(t *testing.T) { is.Equal(ErrDuplicateCharacters, err, "Expected ErrDuplicateCharacters") } +// TestNewGeneratorWithInvalidAlphabet tests that the generator returns an error with invalid alphabets. +func TestNewGeneratorWithInvalidAlphabet(t *testing.T) { + t.Parallel() + is := assert.New(t) + + lengths := []int{1, 2, 256, 257} + + // Define the alphabet types to test + alphabetTypes := []string{"ASCII", "Unicode"} + + for _, alphabetType := range alphabetTypes { + for _, length := range lengths { + // New the appropriate alphabet + var alphabet string + if alphabetType == "ASCII" { + alphabet = makeASCIIBasedAlphabet(length) + } else { + alphabet = makeUnicodeAlphabet(length) + } + gen, err := NewGenerator( + WithAlphabet(alphabet), + ) + + alphabetRunes := []rune(alphabet) + l := len(alphabetRunes) + switch true { + case l < MinAlphabetLength: + is.Error(err, "NewGenerator() should return an error with an invalid alphabet length") + is.Nil(gen, "Generator should be nil when initialization fails") + is.Equal(ErrAlphabetTooShort, err, "Expected ErrAlphabetTooShort") + case l > MaxAlphabetLength: + is.Error(err, "NewGenerator() should return an error with an invalid alphabet length") + is.Nil(gen, "Generator should be nil when initialization fails") + is.Equal(ErrAlphabetTooLong, err, "Expected ErrAlphabetTooLong") + default: + is.NoError(err, "NewGenerator() should not return an error when initialization succeeds") + } + } + } +} + // TestGetConfig tests the Config() method of the generator. func TestGetConfig(t *testing.T) { t.Parallel() @@ -128,6 +169,7 @@ func TestGetConfig(t *testing.T) { is.Equal((runtimeConfig.AlphabetLen()&(runtimeConfig.AlphabetLen()-1)) == 0, runtimeConfig.IsPowerOfTwo(), "Config.IsPowerOfTwo should be correct") + is.Positive(runtimeConfig.BufferSize(), "Config.BufferSize should be a positive integer") is.Positive(runtimeConfig.BitsNeeded(), "Config.BitsNeeded should be a positive integer") is.Positive(runtimeConfig.BytesNeeded(), "Config.BytesNeeded should be a positive integer") is.Equal(rand.Reader, runtimeConfig.RandReader(), "Config.RandReader should be rand.Reader by default") @@ -194,21 +236,6 @@ func TestConcurrency(t *testing.T) { } } -// TestInvalidAlphabetLength tests that alphabets with invalid lengths are rejected. -func TestInvalidAlphabetLength(t *testing.T) { - t.Parallel() - is := assert.New(t) - - // Alphabet length less than 2 - shortAlphabet := "a" - gen, err := NewGenerator( - WithAlphabet(shortAlphabet), - ) - is.Error(err, "NewGenerator() should return an error for alphabets shorter than 2 characters") - is.Nil(gen, "Generator should be nil when initialization fails") - is.Equal(ErrInvalidAlphabet, err, "Expected ErrInvalidAlphabet") -} - // isValidID checks if all characters in the ID are within the specified alphabet. func isValidID(id string, alphabet string) bool { alphabetSet := make(map[rune]struct{}, len([]rune(alphabet)))