Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revised Compression Method #1

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions humanhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import operator
import uuid as uuidlib
import math
import sys

if sys.version_info.major == 3:
Expand Down Expand Up @@ -141,29 +142,36 @@ def compress(bytes_, target):
>>> list(HumanHasher.compress(bytes_, 4))
[205, 128, 156, 96]

Attempting to compress a smaller number of bytes to a larger number is
an error:
If there are less than the target number bytes, the input bytes will be returned

>>> HumanHasher.compress(bytes_, 15) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Fewer input bytes than requested output
[96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
"""

bytes_list = list(bytes_)

length = len(bytes_list)
if target > length:
raise ValueError("Fewer input bytes than requested output")

# Split `bytes` into `target` segments.
seg_size = length // target
segments = [bytes_list[i * seg_size:(i + 1) * seg_size]
for i in range(target)]
# Catch any left-over bytes in the last segment.
segments[-1].extend(bytes_list[target * seg_size:])

return map(checksum, segments)
# If there are less than the target number bytes, the input bytes will be returned
if target >= length:
return bytes

# Split `bytes` evenly into `target` segments
# Each segment will be composed of `seg_size` bytes, rounded down for some segments
seg_size = float(length) / float(target)
# Initialize `target` number of segments
segments = [0] * target
seg_num = 0

# Use a simple XOR checksum-like function for compression
for i, byte in enumerate(bytes_list):
# Divide the byte index by the segment size to determine which segment to place it in
# Floor to create a valid segment index
# Min to ensure the index is within `target`
seg_num = min(int(math.floor(i / seg_size)), target-1)
# Apply XOR to the existing segment and the byte
segments[seg_num] = operator.xor(segments[seg_num], byte)

return segments

def uuid(self, **params):

Expand All @@ -189,6 +197,7 @@ def uuid(self, **params):

DEFAULT_HASHER = HumanHasher()
uuid = DEFAULT_HASHER.uuid

humanize = DEFAULT_HASHER.humanize
humanize_list = DEFAULT_HASHER.humanize_list

Expand Down