Skip to content

Commit

Permalink
Create deterministic GeHashCode method (microsoft#6860)
Browse files Browse the repository at this point in the history
  • Loading branch information
ceciliaavila authored Nov 11, 2024
1 parent 913f7c3 commit e449b3c
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
34 changes: 33 additions & 1 deletion libraries/Microsoft.Bot.Builder.Azure/CosmosDBKeyEscape.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,43 @@ private static string TruncateKeyIfNeeded(string key, bool truncateKeysForCompat

if (key.Length > MaxKeyLength)
{
var hash = key.GetHashCode().ToString("x", CultureInfo.InvariantCulture);
var hash = key.GetDeterministicHashCode().ToString("x", CultureInfo.InvariantCulture);
key = key.Substring(0, MaxKeyLength - hash.Length) + hash;
}

return key;
}

/// <summary>
/// Creates a deterministic hash code by iterating through the string two characters at a time,
/// updating two separate hash values, and then combining them at the end.
/// This approach helps in reducing hash collisions and provides a consistent hash code for the same string across
/// different runs and environments.
/// </summary>
/// <param name="str">The string to calculate the hash on.</param>
/// <returns>The hash code.</returns>
private static int GetDeterministicHashCode(this string str)
{
unchecked
{
var hash1 = (5381 << 16) + 5381; //shifts 5381 left by 16 bits and adds 5381 to it
var hash2 = hash1;
for (var i = 0; i < str.Length; i += 2)
{
// ((hash1 << 5) + hash1) is equivalent to hash1 * 33, which is a common multiplier in hash functions.
// The character str[i] is then XORed with this value.
hash1 = ((hash1 << 5) + hash1) ^ str[i];
if (i == str.Length - 1)
{
break;
}

hash2 = ((hash2 << 5) + hash2) ^ str[i + 1];
}

//1566083941 is a large prime number used to mix the two hash values together, ensuring a more uniform distribution of hash codes.
return hash1 + (hash2 * 1566083941);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

using System;
using System.Reflection;
using Xunit;

namespace Microsoft.Bot.Builder.Azure.Tests
Expand Down Expand Up @@ -40,7 +41,8 @@ public void Long_Key_Should_Be_Truncated()
Assert.True(sanitizedKey.Length <= CosmosDbKeyEscape.MaxKeyLength, "Key too long");

// The resulting key should be:
var hash = tooLongKey.GetHashCode().ToString("x");
var getHashMethod = typeof(CosmosDbKeyEscape).GetMethod("GetDeterministicHashCode", BindingFlags.NonPublic | BindingFlags.Static);
var hash = ((int)getHashMethod.Invoke(null, new object[] { tooLongKey })).ToString("x");
var correctKey = sanitizedKey.Substring(0, CosmosDbKeyEscape.MaxKeyLength - hash.Length) + hash;

Assert.Equal(correctKey, sanitizedKey);
Expand Down

0 comments on commit e449b3c

Please sign in to comment.