-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.go
456 lines (396 loc) · 17.2 KB
/
config.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
// Copyright (c) 2024 Six After, Inc
//
// This source code is licensed under the Apache 2.0 License found in the
// LICENSE file in the root directory of this source tree.
package nanoid
import (
"io"
"math"
"math/bits"
"unicode"
"unicode/utf8"
)
// ConfigOptions holds the configurable options for the Generator.
// It is used with the Function Options pattern.
type ConfigOptions struct {
// RandReader is the source of randomness used for generating IDs.
// By default, it uses x/crypto/prng/Reader, which provides cryptographically secure random bytes.
RandReader io.Reader
// Alphabet is the set of characters used to generate the Nano ID.
// It must be a valid UTF-8 string containing between 2 and 256 unique characters.
// Using a diverse and appropriately sized alphabet ensures the uniqueness and randomness of the generated IDs.
Alphabet string
// LengthHint specifies a typical or default length for generated IDs.
LengthHint uint16
}
// Config holds the runtime configuration for the Nano ID generator.
//
// It is immutable after initialization and provides all the necessary
// parameters for generating unique IDs efficiently and securely.
type Config interface {
// AlphabetLen returns the number of unique characters in the provided alphabet.
//
// This length determines the range of indices for selecting characters during ID generation.
// Using uint16 allows for alphabets up to 65,535 characters.
AlphabetLen() uint16
// BaseMultiplier returns the foundational multiplier used in buffer size calculations.
//
// It is based on the logarithm of the intended ID length (LengthHint) plus 2.
// This helps scale the buffer size appropriately with different ID lengths.
BaseMultiplier() int
// BitsNeeded returns the minimum number of bits required to represent all possible indices of the alphabet.
//
// This value is crucial for generating random numbers that map uniformly to the alphabet indices without bias.
BitsNeeded() uint
// BufferMultiplier returns the combined multiplier used in the buffer size calculation.
//
// It adds a fraction of the scaling factor to the base multiplier to fine-tune the buffer size,
// considering both the ID length and the alphabet size.
BufferMultiplier() int
// BufferSize returns the total size of the buffer (in bytes) used for generating random data.
//
// The buffer size is calculated to balance efficiency and performance,
// minimizing calls to the random number generator by reading larger chunks of random data at once.
BufferSize() int
// ByteAlphabet returns the slice of bytes representing the alphabet,
// used when the alphabet consists solely of ASCII characters.
//
// For non-ASCII alphabets, this returns nil, and RuneAlphabet is used instead.
ByteAlphabet() []byte
// BytesNeeded returns the number of bytes required to store the BitsNeeded for each character in the ID.
//
// It rounds up BitsNeeded to the nearest byte, ensuring sufficient space for random data generation.
BytesNeeded() uint
// IsASCII returns true if the alphabet consists solely of ASCII characters.
//
// This allows for optimization in processing, using bytes instead of runes for ID generation.
IsASCII() bool
// IsPowerOfTwo returns true if the length of the alphabet is a power of two.
//
// When true, random index selection can be optimized using bitwise operations,
// such as bitwise AND with the mask, improving performance.
IsPowerOfTwo() bool
// LengthHint returns the intended length of the IDs to be generated.
//
// This hint is used in calculations to adjust buffer sizes and scaling factors accordingly.
LengthHint() uint16
// MaxBytesPerRune represents the maximum number of bytes required to encode
// any rune in the alphabet using UTF-8 encoding.
//
// This value is computed during
// configuration based on the provided alphabet and is used to preallocate the
// buffer size in the newUnicode function. By accurately estimating the buffer size,
// we ensure efficient string building without unnecessary memory allocations
// or buffer resizing.
//
// For example, if the alphabet includes only ASCII and Latin-1 characters, each rune
// requires at most 2 bytes. However, if the alphabet includes emojis or other
// multibyte characters, this value could be up to 4 bytes.
MaxBytesPerRune() int
// Mask returns the bitmask used to extract the necessary bits from randomly generated bytes.
//
// The mask is essential for efficiently mapping random values to valid alphabet indices,
// ensuring uniform distribution and preventing bias.
Mask() uint
// RandReader returns the source of randomness used for generating IDs.
//
// It is typically a cryptographically secure random number generator (e.g., crypto/rand.Reader).
RandReader() io.Reader
// RuneAlphabet returns the slice of runes representing the alphabet.
//
// This is used for ID generation when the alphabet includes non-ASCII (multibyte) characters,
// allowing support for a wider range of characters.
RuneAlphabet() []rune
// ScalingFactor returns the scaling factor used to adjust the buffer size.
//
// It balances the influence of the alphabet size and the intended ID length,
// ensuring efficient random data generation without excessive memory usage.
ScalingFactor() int
}
// Configuration defines the interface for retrieving generator configuration.
type Configuration interface {
// Config returns the runtime configuration of the generator.
Config() Config
}
// Option defines a function type for configuring the Generator.
// It allows for flexible and extensible configuration by applying
// various settings to the ConfigOptions during Generator initialization.
type Option func(*ConfigOptions)
// WithAlphabet sets a custom alphabet for the Generator.
// The provided alphabet string defines the set of characters that will be
// used to generate Nano IDs. This allows users to customize the character set
// according to their specific requirements, such as using only alphanumeric
// characters, including symbols, or supporting non-ASCII characters.
//
// Parameters:
// - alphabet string: A string representing the desired set of characters for ID generation.
//
// Returns:
// - Option: A configuration option that applies the custom alphabet to ConfigOptions.
//
// Usage:
//
// generator, err := nanoid.NewGenerator(nanoid.WithAlphabet("abcdef123456"))
func WithAlphabet(alphabet string) Option {
return func(c *ConfigOptions) {
c.Alphabet = alphabet
}
}
// WithRandReader sets a custom random reader for the Generator.
// By default, the Generator uses a cryptographically secure random number
// generator (e.g., crypto/rand.Reader). However, in some cases, users might
// want to provide their own source of randomness, such as for testing purposes
// or to integrate with a different entropy source.
//
// Parameters:
// - reader io.Reader: An implementation of io.Reader that supplies random data.
//
// Returns:
// - Option: A configuration option that applies the custom random reader to ConfigOptions.
//
// Usage Example:
//
// customReader := myCustomRandomReader()
// generator, err := nanoid.NewGenerator(
// nanoid.WithRandReader(customReader))
func WithRandReader(reader io.Reader) Option {
return func(c *ConfigOptions) {
c.RandReader = reader
}
}
// WithLengthHint sets the hint of the intended length of the IDs to be generated.
// Providing a length hint allows the Generator to optimize internal configurations,
// such as buffer sizes and scaling factors, based on the expected ID length. This
// can enhance performance and efficiency, especially when generating a large number
// of IDs with similar lengths.
//
// Parameters:
// - hint uint16: A non-zero unsigned integer representing the anticipated length of the Nano IDs.
//
// Returns:
// - Option: A configuration option that applies the length hint to ConfigOptions.
//
// Usage Example:
//
// generator, err := nanoid.NewGenerator(nanoid.WithLengthHint(21))
func WithLengthHint(hint uint16) Option {
return func(c *ConfigOptions) {
c.LengthHint = hint
}
}
// runtimeConfig holds the runtime configuration for the Nano ID generator.
// It is immutable after initialization.
type runtimeConfig struct {
randReader io.Reader // 16 bytes
byteAlphabet []byte // 24 bytes
runeAlphabet []rune // 24 bytes
mask uint // 8 bytes
bitsNeeded uint // 8 bytes
bytesNeeded uint // 8 bytes
bufferSize int // 8 bytes
bufferMultiplier int // 8 bytes
scalingFactor int // 8 bytes
baseMultiplier int // 8 bytes
maxBytesPerRune int // 8 bytes
alphabetLen uint16 // 2 bytes
lengthHint uint16 // 2 bytes
isASCII bool // 1 byte
isPowerOfTwo bool // 1 byte
}
func buildRuntimeConfig(opts *ConfigOptions) (*runtimeConfig, error) {
if len(opts.Alphabet) == 0 {
return nil, ErrInvalidAlphabet
}
// Check if the alphabet is valid UTF-8
if !utf8.ValidString(opts.Alphabet) {
return nil, ErrNonUTF8Alphabet
}
alphabetRunes := []rune(opts.Alphabet)
isASCII := true
byteAlphabet := make([]byte, len(alphabetRunes))
maxBytesPerRune := 1 // Initialize to 1 for ASCII
for i, r := range alphabetRunes {
if r > unicode.MaxASCII {
isASCII = false
// Compute the number of bytes needed to encode this rune
runeBytes := utf8.RuneLen(r)
if runeBytes < 0 {
return nil, ErrInvalidAlphabet
}
if runeBytes > maxBytesPerRune {
maxBytesPerRune = runeBytes
}
} else {
byteAlphabet[i] = byte(r)
}
}
if !isASCII {
// Convert to rune alphabet if non-ASCII characters are present
byteAlphabet = nil // Clear byteAlphabet as it's not used
}
// Check for duplicate characters
seenRunes := make(map[rune]bool)
for _, r := range alphabetRunes {
if seenRunes[r] {
return nil, ErrDuplicateCharacters
}
seenRunes[r] = true
}
// The length of the alphabet, representing the number of unique characters available for ID generation.
alphabetLen := uint16(len(alphabetRunes))
// Ensure the alphabet length adheres to predefined constraints.
if alphabetLen > MaxAlphabetLength {
return nil, ErrAlphabetTooLong
}
if alphabetLen < MinAlphabetLength {
return nil, ErrAlphabetTooShort
}
// Calculate the minimum number of bits needed to represent all indices of the alphabet.
// This is essential for generating random numbers that map uniformly to the alphabet indices.
// The calculation uses bits.Len to find the position of the highest set bit in alphabetLen - 1.
bitsNeeded := uint(bits.Len(uint(alphabetLen - 1)))
if bitsNeeded == 0 {
return nil, ErrInvalidAlphabet
}
// Create a bitmask that isolates the bits needed to represent the alphabet indices.
// The mask is used to efficiently extract valid bits from randomly generated bytes.
mask := uint((1 << bitsNeeded) - 1)
// TODO: Scale bitsNeeded based on length hint (???)
//adjustedBitsNeeded := bitsNeeded + uint(math.Log2(float64(opts.LengthHint)))
// Determine the number of bytes required to store 'bitsNeeded' bits, rounding up to the nearest byte.
bytesNeeded := (bitsNeeded + 7) / 8
// Check if the alphabet length is a power of two, allowing optimization of modulus operations using bitwise AND.
// This optimization improves performance during random index generation.
isPowerOfTwo := (alphabetLen & (alphabetLen - 1)) == 0
// Calculate a base multiplier for buffer size based on the length hint.
// The length hint indicates the desired length of the generated IDs.
// Using logarithm ensures the buffer scales appropriately with the ID length.
baseMultiplier := int(math.Ceil(math.Log2(float64(opts.LengthHint) + 2.0)))
// Determine a scaling factor to adjust the buffer size.
// This factor ensures the buffer is sufficiently large to accommodate the randomness needed,
// balancing between performance (less frequent random reads) and memory usage.
scalingFactor := int(math.Max(3.0, float64(alphabetLen)/math.Pow(float64(opts.LengthHint), 0.6)))
// Compute the buffer multiplier by adding the base multiplier and a fraction of the scaling factor.
// This combination fine-tunes the buffer size, considering both the ID length and the alphabet size.
bufferMultiplier := baseMultiplier + int(math.Ceil(float64(scalingFactor)/1.5))
// Calculate the total buffer size in bytes for generating random data.
// The buffer size is influenced by the buffer multiplier, bytes needed per character,
// and a factor that scales with the length hint.
// A larger buffer reduces the number of calls to the random number generator, improving efficiency.
bufferSize := bufferMultiplier * int(bytesNeeded) * int(math.Max(1.5, float64(opts.LengthHint)/10.0))
return &runtimeConfig{
randReader: opts.RandReader,
byteAlphabet: byteAlphabet,
runeAlphabet: alphabetRunes,
mask: mask,
bitsNeeded: bitsNeeded,
bytesNeeded: bytesNeeded,
bufferSize: bufferSize,
bufferMultiplier: bufferMultiplier,
scalingFactor: scalingFactor,
baseMultiplier: baseMultiplier,
alphabetLen: alphabetLen,
isASCII: isASCII,
isPowerOfTwo: isPowerOfTwo,
lengthHint: opts.LengthHint,
maxBytesPerRune: maxBytesPerRune,
}, nil
}
// AlphabetLen returns the number of unique characters in the provided alphabet.
//
// This length determines the range of indices for selecting characters during ID generation.
// Using uint16 allows for alphabets up to 65,535 characters.
func (r *runtimeConfig) AlphabetLen() uint16 {
return r.alphabetLen
}
// BaseMultiplier returns the foundational multiplier used in buffer size calculations.
//
// It is based on the logarithm of the intended ID length (LengthHint) plus 2.
// This helps scale the buffer size appropriately with different ID lengths.
func (r *runtimeConfig) BaseMultiplier() int {
return r.baseMultiplier
}
// BitsNeeded returns the minimum number of bits required to represent all possible indices of the alphabet.
//
// This value is crucial for generating random numbers that map uniformly to the alphabet indices without bias.
func (r *runtimeConfig) BitsNeeded() uint {
return r.bitsNeeded
}
// BufferMultiplier returns the combined multiplier used in the buffer size calculation.
//
// It adds a fraction of the scaling factor to the base multiplier to fine-tune the buffer size,
// considering both the ID length and the alphabet size.
func (r *runtimeConfig) BufferMultiplier() int {
return r.bufferMultiplier
}
// BufferSize returns the total size of the buffer (in bytes) used for generating random data.
//
// The buffer size is calculated to balance efficiency and performance,
// minimizing calls to the random number generator by reading larger chunks of random data at once.
func (r *runtimeConfig) BufferSize() int {
return r.bufferSize
}
// ByteAlphabet returns the slice of bytes representing the alphabet,
// used when the alphabet consists solely of ASCII characters.
//
// For non-ASCII alphabets, this returns nil, and RuneAlphabet is used instead.
func (r *runtimeConfig) ByteAlphabet() []byte {
return r.byteAlphabet
}
// BytesNeeded returns the number of bytes required to store the BitsNeeded for each character in the ID.
//
// It rounds up BitsNeeded to the nearest byte, ensuring sufficient space for random data generation.
func (r *runtimeConfig) BytesNeeded() uint {
return r.bytesNeeded
}
// IsASCII returns true if the alphabet consists solely of ASCII characters.
//
// This allows for optimization in processing, using bytes instead of runes for ID generation.
func (r *runtimeConfig) IsASCII() bool {
return r.isASCII
}
// IsPowerOfTwo returns true if the length of the alphabet is a power of two.
//
// When true, random index selection can be optimized using bitwise operations,
// such as bitwise AND with the mask, improving performance.
func (r *runtimeConfig) IsPowerOfTwo() bool {
return r.isPowerOfTwo
}
// LengthHint returns the intended length of the IDs to be generated.
//
// This hint is used in calculations to adjust buffer sizes and scaling factors accordingly.
func (r *runtimeConfig) LengthHint() uint16 {
return r.lengthHint
}
// Mask returns the bitmask used to extract the necessary bits from randomly generated bytes.
//
// The mask is essential for efficiently mapping random values to valid alphabet indices,
// ensuring uniform distribution and preventing bias.
func (r *runtimeConfig) Mask() uint {
return r.mask
}
// RandReader returns the source of randomness used for generating IDs.
//
// It is typically a cryptographically secure random number generator (e.g., crypto/rand.Reader).
func (r *runtimeConfig) RandReader() io.Reader {
return r.randReader
}
// RuneAlphabet returns the slice of runes representing the alphabet.
//
// This is used for ID generation when the alphabet includes non-ASCII (multibyte) characters,
// allowing support for a wider range of characters.
func (r *runtimeConfig) RuneAlphabet() []rune {
return r.runeAlphabet
}
// ScalingFactor returns the scaling factor used to adjust the buffer size.
//
// It balances the influence of the alphabet size and the intended ID length,
// ensuring efficient random data generation without excessive memory usage.
func (r *runtimeConfig) ScalingFactor() int {
return r.scalingFactor
}
// MaxBytesPerRune represents the maximum number of bytes required to encode
// any rune in the alphabet using UTF-8 encoding.
func (r *runtimeConfig) MaxBytesPerRune() int {
return r.maxBytesPerRune
}