diff --git a/humanhash.py b/humanhash.py index de833b7..30d8abe 100644 --- a/humanhash.py +++ b/humanhash.py @@ -7,6 +7,7 @@ import operator import uuid as uuidlib +import math import sys if sys.version_info.major == 3: @@ -141,29 +142,36 @@ def compress(bytes_, target): >>> list(HumanHasher.compress(bytes_, 4)) [205, 128, 156, 96] - Attempting to compress a smaller number of bytes to a larger number is - an error: + If there are less than the target number bytes, the input bytes will be returned >>> HumanHasher.compress(bytes_, 15) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - ValueError: Fewer input bytes than requested output + [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151] """ bytes_list = list(bytes_) length = len(bytes_list) - if target > length: - raise ValueError("Fewer input bytes than requested output") - - # Split `bytes` into `target` segments. - seg_size = length // target - segments = [bytes_list[i * seg_size:(i + 1) * seg_size] - for i in range(target)] - # Catch any left-over bytes in the last segment. - segments[-1].extend(bytes_list[target * seg_size:]) - - return map(checksum, segments) + # If there are less than the target number bytes, the input bytes will be returned + if target >= length: + return bytes + + # Split `bytes` evenly into `target` segments + # Each segment will be composed of `seg_size` bytes, rounded down for some segments + seg_size = float(length) / float(target) + # Initialize `target` number of segments + segments = [0] * target + seg_num = 0 + + # Use a simple XOR checksum-like function for compression + for i, byte in enumerate(bytes_list): + # Divide the byte index by the segment size to determine which segment to place it in + # Floor to create a valid segment index + # Min to ensure the index is within `target` + seg_num = min(int(math.floor(i / seg_size)), target-1) + # Apply XOR to the existing segment and the byte + segments[seg_num] = operator.xor(segments[seg_num], byte) + + return segments def uuid(self, **params): @@ -189,6 +197,7 @@ def uuid(self, **params): DEFAULT_HASHER = HumanHasher() uuid = DEFAULT_HASHER.uuid + humanize = DEFAULT_HASHER.humanize humanize_list = DEFAULT_HASHER.humanize_list