defect: Fixed minimum alphabet length logic and related tests and tes…

…t functions for generating ASCII and Unicode alphabets (#17) * Fixed minimum alphabet length logic and related tests. The `MinAlphabetLength` was incorrectly set to a value of `1`. * Fixed the ASCII and Unicode alphabet test functions to correctly generate the range requested. **NOTE:** The printable Unicode range is extensive and varies widely across different scripts and symbol sets, as Unicode was designed to represent characters from numerous languages, symbols, and emojis. Unlike ASCII, Unicode doesn’t have a simple, contiguous range for all printable characters. However, there are several primary ranges in Unicode where printable characters are defined: 1. Basic Multilingual Plane (BMP): The majority of commonly used printable characters are in the BMP, which spans 0x0020 to 0xFFFF (decimal 32 to 65,535). This plane includes: - Latin characters (including ASCII, starting from 0x0020 for space). - Greek, Cyrillic, Hebrew, Arabic, and other alphabets. - Mathematical symbols, punctuation, and various technical symbols. - Chinese, Japanese, and Korean (CJK) characters. - Emojis and other miscellaneous symbols. 2. Supplementary Multilingual Plane (SMP): Includes additional printable characters, such as: - Historic scripts. - Musical notation. - Extended emoji sets. - This plane spans 0x10000 to 0x1FFFF. 3. Supplementary Ideographic Plane (SIP): Contains additional Chinese, Japanese, and Korean ideographs from 0x20000 to 0x2FFFF. 4. Other Supplementary Planes: These include various specialized characters, symbols, and private-use areas.
sixafter · Nov 2, 2024 · c711dc9 · c711dc9
1 parent 8f68414
commit c711dc9
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 48 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -51,13 +51,14 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Functional Tests
-        run: |
-          make test
+      - name: Lint
+        uses: golangci/golangci-lint-action@v6
+        with:
+          args: --timeout=30m --config=.golangci.yaml --issues-exit-code=0
 
-      - name: Benchmark Tests
+      - name: Test
         run: |
-          make bench
+          make test
 
       - name: Cover
         run: |
@@ -67,11 +68,6 @@ jobs:
         run: |
           make analyze          
 
-      - name: Lint
-        uses: golangci/golangci-lint-action@v6
-        with:
-          args: --timeout=30m --config=.golangci.yaml --issues-exit-code=0
-
       # Ref: https://github.com/actions/cache
       - name: Cache SonarCloud Packages
         uses: actions/cache@v4
@@ -88,4 +84,8 @@ jobs:
         env:
           SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
           GITHUB_USER: ${{ github.actor }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Benchmark Tests
+        run: |
+          make bench
diff --git a/CHANGELOG/CHANGELOG-1.x.md b/CHANGELOG/CHANGELOG-1.x.md
@@ -15,6 +15,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 ### Security
 
+---
+## [1.10.1] - 2024-NOV-02
+
+### Added
+### Changed
+### Deprecated
+### Removed
+### Fixed
+- **DEFECT:** Fixed minimum alphabet length logic and related tests and test functions for generating ASCII and Unicode alphabets.
+### Security
+
 ---
 ## [1.10.0] - 2024-NOV-01
 
@@ -193,7 +204,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 ### Security
 
-[Unreleased]: https://github.com/scriptures-social/platform/compare/v1.10.0...HEAD
+[Unreleased]: https://github.com/scriptures-social/platform/compare/v1.10.1...HEAD
+[1.10.1]: https://github.com/sixafter/nanoid/compare/v1.10.0...v1.10.1
 [1.10.0]: https://github.com/sixafter/nanoid/compare/v1.9.0...v1.10.0
 [1.9.0]: https://github.com/sixafter/nanoid/compare/v1.8.2...v1.9.0
 [1.8.2]: https://github.com/sixafter/nanoid/compare/v1.8.1...v1.8.2

diff --git a/README.md b/README.md
@@ -235,7 +235,8 @@ The nanoid module defines several error types to handle various failure scenario
 * `ErrInvalidLength`: Returned when a non-positive Nano ID length is specified. 
 * `ErrInvalidAlphabet`: Returned when an alphabet is invalid; e.g. due to length constraints.
 * `ErrNonUTF8Alphabet`: Returned when an alphabet contains invalid UTF-8 characters.
-* `ErrAlphabetTooLong`: Returned when an alphabet length exceeds 256 character.
+* `ErrAlphabetTooShort`: Returned when alphabet length is less than 2 characters.
+* `ErrAlphabetTooLong`: Returned when an alphabet length exceeds 256 characters.
 
 ## Constants
 

diff --git a/nanoid.go b/nanoid.go
@@ -61,6 +61,7 @@ var (
 	ErrInvalidLength       = errors.New("invalid length")
 	ErrInvalidAlphabet     = errors.New("invalid alphabet")
 	ErrNonUTF8Alphabet     = errors.New("alphabet contains invalid UTF-8 characters")
+	ErrAlphabetTooShort    = errors.New("alphabet length is less than 2")
 	ErrAlphabetTooLong     = errors.New("alphabet length exceeds 256")
 )
 
@@ -79,7 +80,7 @@ const (
 	bufferMultiplier = 128
 
 	// MinAlphabetLength defines the minimum allowed length for the alphabet.
-	MinAlphabetLength = 1
+	MinAlphabetLength = 2
 
 	// MaxAlphabetLength defines the maximum allowed length for the alphabet.
 	MaxAlphabetLength = 256
@@ -256,7 +257,7 @@ func buildRuntimeConfig(opts *ConfigOptions) (*runtimeConfig, error) {
 	}
 
 	if alphabetLen < MinAlphabetLength {
-		return nil, ErrInvalidAlphabet
+		return nil, ErrAlphabetTooShort
 	}
 
 	// Represents how many bits are required to generate an index for selecting a character from the alphabet.

diff --git a/nanoid_benchmark_test.go b/nanoid_benchmark_test.go
@@ -11,39 +11,58 @@ import (
 	"testing"
 )
 
-// Helper function to create an ASCII-based alphabet of a specified length
+// Helper function to create an ASCII-based alphabet of a specified length without duplicates
 func makeASCIIBasedAlphabet(length int) string {
 	const (
-		start = 33  // '!'
-		end   = 126 // '~'
+		start = 33  // (!)
+		end   = 126 // (~)
 	)
-	// Ensure the length is within the printable ASCII range
-	if length < 2 {
-		length = 2
-	}
-	if length > end-start+1 {
-		length = end - start + 1
+	// Calculate the number of unique printable ASCII characters in the range
+	rangeSize := end - start + 1
+
+	// Ensure the length does not exceed the number of unique characters
+	if length > rangeSize {
+		length = rangeSize
 	}
+
 	alphabet := make([]byte, length)
 	for i := 0; i < length; i++ {
 		alphabet[i] = byte(start + i)
 	}
 	return string(alphabet)
 }
 
-// Helper function to create a Unicode alphabet of a specified length
+// Helper function to create a Unicode alphabet of a specified length without duplicates
+// The printable Unicode range is extensive and varies widely across different scripts and symbol sets, as Unicode was designed to represent characters from numerous languages, symbols, and emojis. Unlike ASCII, Unicode doesn’t have a simple, contiguous range for all printable characters. However, there are several primary ranges in Unicode where printable characters are defined:
+// 1. Basic Multilingual Plane (BMP): The majority of commonly used printable characters are in the BMP, which spans 0x0020 to 0xFFFF (decimal 32 to 65,535). This plane includes:
+//   - Latin characters (including ASCII, starting from 0x0020 for space).
+//   - Greek, Cyrillic, Hebrew, Arabic, and other alphabets.
+//   - Mathematical symbols, punctuation, and various technical symbols.
+//   - Chinese, Japanese, and Korean (CJK) characters.
+//   - Emojis and other miscellaneous symbols.
+//
+// 2. Supplementary Multilingual Plane (SMP): Includes additional printable characters, such as:
+//   - Historic scripts.
+//   - Musical notation.
+//   - Extended emoji sets.
+//   - This plane spans 0x10000 to 0x1FFFF.
+//
+// 3. Supplementary Ideographic Plane (SIP): Contains additional Chinese, Japanese, and Korean ideographs from 0x20000 to 0x2FFFF.
+// 4. Other Supplementary Planes: These include various specialized characters, symbols, and private-use areas.
 func makeUnicodeAlphabet(length int) string {
+	// Greek and Coptic Block
 	const (
-		start = 0x0905 // 'अ'
-		end   = 0x0939 // 'ह'
+		start = 0x0370 // (ἰ)
+		end   = 0x047F // (ѫ)
 	)
-	// Ensure the length is within the specified Unicode range
-	if length < 2 {
-		length = 2
-	}
-	if length > end-start+1 {
-		length = end - start + 1
+	// Calculate the number of unique runes in the range
+	rangeSize := end - start + 1
+
+	// Ensure the length does not exceed the number of unique characters
+	if length > rangeSize {
+		length = rangeSize
 	}
+
 	var builder strings.Builder
 	for i := 0; i < length; i++ {
 		builder.WriteRune(rune(start + i))

diff --git a/nanoid_test.go b/nanoid_test.go
@@ -103,6 +103,47 @@ func TestGenerateWithDuplicateAlphabet(t *testing.T) {
 	is.Equal(ErrDuplicateCharacters, err, "Expected ErrDuplicateCharacters")
 }
 
+// TestNewGeneratorWithInvalidAlphabet tests that the generator returns an error with invalid alphabets.
+func TestNewGeneratorWithInvalidAlphabet(t *testing.T) {
+	t.Parallel()
+	is := assert.New(t)
+
+	lengths := []int{1, 2, 256, 257}
+
+	// Define the alphabet types to test
+	alphabetTypes := []string{"ASCII", "Unicode"}
+
+	for _, alphabetType := range alphabetTypes {
+		for _, length := range lengths {
+			// New the appropriate alphabet
+			var alphabet string
+			if alphabetType == "ASCII" {
+				alphabet = makeASCIIBasedAlphabet(length)
+			} else {
+				alphabet = makeUnicodeAlphabet(length)
+			}
+			gen, err := NewGenerator(
+				WithAlphabet(alphabet),
+			)
+
+			alphabetRunes := []rune(alphabet)
+			l := len(alphabetRunes)
+			switch true {
+			case l < MinAlphabetLength:
+				is.Error(err, "NewGenerator() should return an error with an invalid alphabet length")
+				is.Nil(gen, "Generator should be nil when initialization fails")
+				is.Equal(ErrAlphabetTooShort, err, "Expected ErrAlphabetTooShort")
+			case l > MaxAlphabetLength:
+				is.Error(err, "NewGenerator() should return an error with an invalid alphabet length")
+				is.Nil(gen, "Generator should be nil when initialization fails")
+				is.Equal(ErrAlphabetTooLong, err, "Expected ErrAlphabetTooLong")
+			default:
+				is.NoError(err, "NewGenerator() should not return an error when initialization succeeds")
+			}
+		}
+	}
+}
+
 // TestGetConfig tests the Config() method of the generator.
 func TestGetConfig(t *testing.T) {
 	t.Parallel()
@@ -128,6 +169,7 @@ func TestGetConfig(t *testing.T) {
 
 	is.Equal((runtimeConfig.AlphabetLen()&(runtimeConfig.AlphabetLen()-1)) == 0, runtimeConfig.IsPowerOfTwo(), "Config.IsPowerOfTwo should be correct")
 
+	is.Positive(runtimeConfig.BufferSize(), "Config.BufferSize should be a positive integer")
 	is.Positive(runtimeConfig.BitsNeeded(), "Config.BitsNeeded should be a positive integer")
 	is.Positive(runtimeConfig.BytesNeeded(), "Config.BytesNeeded should be a positive integer")
 	is.Equal(rand.Reader, runtimeConfig.RandReader(), "Config.RandReader should be rand.Reader by default")
@@ -194,21 +236,6 @@ func TestConcurrency(t *testing.T) {
 	}
 }
 
-// TestInvalidAlphabetLength tests that alphabets with invalid lengths are rejected.
-func TestInvalidAlphabetLength(t *testing.T) {
-	t.Parallel()
-	is := assert.New(t)
-
-	// Alphabet length less than 2
-	shortAlphabet := "a"
-	gen, err := NewGenerator(
-		WithAlphabet(shortAlphabet),
-	)
-	is.Error(err, "NewGenerator() should return an error for alphabets shorter than 2 characters")
-	is.Nil(gen, "Generator should be nil when initialization fails")
-	is.Equal(ErrInvalidAlphabet, err, "Expected ErrInvalidAlphabet")
-}
-
 // isValidID checks if all characters in the ID are within the specified alphabet.
 func isValidID(id string, alphabet string) bool {
 	alphabetSet := make(map[rune]struct{}, len([]rune(alphabet)))