Skip to content

Commit

Permalink
optimize parsing ulid
Browse files Browse the repository at this point in the history
  • Loading branch information
shogo82148 committed Mar 1, 2024
1 parent a1d104f commit 5f985ca
Showing 1 changed file with 75 additions and 74 deletions.
149 changes: 75 additions & 74 deletions ulid.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,35 +175,39 @@ func parse(v []byte, strict bool, id *ULID) error {
return ErrDataSize
}

// Use an optimized unrolled loop to decode a base32 ULID.
h := uint64(dec[v[0]])<<45 |
uint64(dec[v[1]])<<40 |
uint64(dec[v[2]])<<35 |
uint64(dec[v[3]])<<30 |
uint64(dec[v[4]])<<25 |
uint64(dec[v[5]])<<20 |
uint64(dec[v[6]])<<15 |
uint64(dec[v[7]])<<10 |
uint64(dec[v[8]])<<5 |
uint64(dec[v[9]])

m := uint64(dec[v[10]])<<35 |
uint64(dec[v[11]])<<30 |
uint64(dec[v[12]])<<25 |
uint64(dec[v[13]])<<20 |
uint64(dec[v[14]])<<15 |
uint64(dec[v[15]])<<10 |
uint64(dec[v[16]])<<5 |
uint64(dec[v[17]])

l := uint64(dec[v[18]])<<35 |
uint64(dec[v[19]])<<30 |
uint64(dec[v[20]])<<25 |
uint64(dec[v[21]])<<20 |
uint64(dec[v[22]])<<15 |
uint64(dec[v[23]])<<10 |
uint64(dec[v[24]])<<5 |
uint64(dec[v[25]])

// Check if all the characters in a base32 encoded ULID are part of the
// expected base32 character set.
if strict &&
(dec[v[0]] == 0xFF ||
dec[v[1]] == 0xFF ||
dec[v[2]] == 0xFF ||
dec[v[3]] == 0xFF ||
dec[v[4]] == 0xFF ||
dec[v[5]] == 0xFF ||
dec[v[6]] == 0xFF ||
dec[v[7]] == 0xFF ||
dec[v[8]] == 0xFF ||
dec[v[9]] == 0xFF ||
dec[v[10]] == 0xFF ||
dec[v[11]] == 0xFF ||
dec[v[12]] == 0xFF ||
dec[v[13]] == 0xFF ||
dec[v[14]] == 0xFF ||
dec[v[15]] == 0xFF ||
dec[v[16]] == 0xFF ||
dec[v[17]] == 0xFF ||
dec[v[18]] == 0xFF ||
dec[v[19]] == 0xFF ||
dec[v[20]] == 0xFF ||
dec[v[21]] == 0xFF ||
dec[v[22]] == 0xFF ||
dec[v[23]] == 0xFF ||
dec[v[24]] == 0xFF ||
dec[v[25]] == 0xFF) {
if strict && (h|m|l)&(1<<63) != 0 {
return ErrInvalidCharacters
}

Expand All @@ -216,28 +220,25 @@ func parse(v []byte, strict bool, id *ULID) error {
return ErrOverflow
}

// Use an optimized unrolled loop (from https://github.com/RobThree/NUlid)
// to decode a base32 ULID.

// 6 bytes timestamp (48 bits)
(*id)[0] = (dec[v[0]] << 5) | dec[v[1]]
(*id)[1] = (dec[v[2]] << 3) | (dec[v[3]] >> 2)
(*id)[2] = (dec[v[3]] << 6) | (dec[v[4]] << 1) | (dec[v[5]] >> 4)
(*id)[3] = (dec[v[5]] << 4) | (dec[v[6]] >> 1)
(*id)[4] = (dec[v[6]] << 7) | (dec[v[7]] << 2) | (dec[v[8]] >> 3)
(*id)[5] = (dec[v[8]] << 5) | dec[v[9]]
(*id)[0] = byte(h >> 40)
(*id)[1] = byte(h >> 32)
(*id)[2] = byte(h >> 24)
(*id)[3] = byte(h >> 16)
(*id)[4] = byte(h >> 8)
(*id)[5] = byte(h)

// 10 bytes of entropy (80 bits)
(*id)[6] = (dec[v[10]] << 3) | (dec[v[11]] >> 2)
(*id)[7] = (dec[v[11]] << 6) | (dec[v[12]] << 1) | (dec[v[13]] >> 4)
(*id)[8] = (dec[v[13]] << 4) | (dec[v[14]] >> 1)
(*id)[9] = (dec[v[14]] << 7) | (dec[v[15]] << 2) | (dec[v[16]] >> 3)
(*id)[10] = (dec[v[16]] << 5) | dec[v[17]]
(*id)[11] = (dec[v[18]] << 3) | dec[v[19]]>>2
(*id)[12] = (dec[v[19]] << 6) | (dec[v[20]] << 1) | (dec[v[21]] >> 4)
(*id)[13] = (dec[v[21]] << 4) | (dec[v[22]] >> 1)
(*id)[14] = (dec[v[22]] << 7) | (dec[v[23]] << 2) | (dec[v[24]] >> 3)
(*id)[15] = (dec[v[24]] << 5) | dec[v[25]]
(*id)[6] = byte(m >> 32)
(*id)[7] = byte(m >> 24)
(*id)[8] = byte(m >> 16)
(*id)[9] = byte(m >> 8)
(*id)[10] = byte(m)
(*id)[11] = byte(l >> 32)
(*id)[12] = byte(l >> 24)
(*id)[13] = byte(l >> 16)
(*id)[14] = byte(l >> 8)
(*id)[15] = byte(l)

return nil
}
Expand Down Expand Up @@ -360,34 +361,34 @@ func (id ULID) MarshalTextTo(dst []byte) error {
}

// Byte to index table for O(1) lookups when unmarshaling.
// We use 0xFF as sentinel value for invalid indexes.
var dec = [...]byte{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF,
0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, 0x1D, 0x1E,
0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C,
0x0D, 0x0E, 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14,
0x15, 0xFF, 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C,
0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
// We use -1 (all bits are 1) as sentinel value for invalid indexes.
var dec = [...]int8{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, -1, -1,
-1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, 0x15, -1,
0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, 0x1D, 0x1E,
0x1F, -1, -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C,
0x0D, 0x0E, 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14,
0x15, -1, 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C,
0x1D, 0x1E, 0x1F, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1,
}

// EncodedSize is the length of a text encoded ULID.
Expand Down

0 comments on commit 5f985ca

Please sign in to comment.