Skip to content

Commit

Permalink
Merge pull request #345 from yashksaini-coder/yash/fix-344
Browse files Browse the repository at this point in the history
perf: ⚡️ Enhance the Hash code snippets + Test cases
  • Loading branch information
UTSAVS26 authored Nov 6, 2024
2 parents c79112e + 550f88f commit 8b50eb2
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 26 deletions.
7 changes: 6 additions & 1 deletion pysnippets/Hashing/Hashing.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ A **Hash Function** is a function that takes input data (such as strings, files,
- **Efficient**: The computation of the hash value should be fast.
- **Uniform distribution**: The hash values should be uniformly distributed to minimize collisions.

![Hash Function Diagram](https://media.geeksforgeeks.org/wp-content/uploads/20240508162701/Components-of-Hashing.webp)

### 2. Hash Table
A **Hash Table** is a data structure that stores key-value pairs. The key is processed by a hash function, and its output determines where the value is stored in the table. Hash tables allow for average-case constant time complexity, O(1), for lookup, insertion, and deletion operations.

![Hash Table Structure](https://www.tutorialspoint.com/data_structures_algorithms/images/hash_function.jpg)

### 3. Collisions and Collision Resolution
In practice, different inputs might produce the same hash value; this is known as a **collision**. A good hashing algorithm minimizes collisions, but when they occur, they are handled using **collision resolution techniques**:

Expand All @@ -26,6 +30,8 @@ In practice, different inputs might produce the same hash value; this is known a
- **Quadratic Probing**: Searches for an empty slot with increasing intervals (e.g., 1, 4, 9).
- **Double Hashing**: Uses a second hash function to calculate the next available slot.

![Collision Resolution Techniques](https://www.gatevidyalay.com/wp-content/uploads/2018/06/Collision-Resolution-Techniques-1.png)

### 4. Load Factor
The **Load Factor** is defined as the ratio of the number of elements stored in the hash table to the total number of slots. A high load factor increases the chance of collisions.

Expand Down Expand Up @@ -83,6 +89,5 @@ The **Load Factor** is defined as the ratio of the number of elements stored in

# Example usage
print(check_collisions([1, 2, 12, 22, 32], 10)) # Output: [12, 22]

```

58 changes: 33 additions & 25 deletions pysnippets/Hashing/Hashing.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
import hashlib
from typing import Any, List, Optional, Tuple
import logging
from typing import Any, List, Optional
from dataclasses import dataclass

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

@dataclass
class HashTableEntry:
key: int
value: Any

class HashTable:
def __init__(self, size: int = 10) -> None:
self.size = size
self.table: List[List[Tuple[int, Any]]] = [[] for _ in range(self.size)]
self.table: List[List[HashTableEntry]] = [[] for _ in range(self.size)]
logging.info(f"Initialized HashTable with size {self.size}")

def _hash_function(self, key: int) -> int:
# Using simple modulo hash function
return key % self.size

def _resize(self) -> None:
old_table = self.table
self.size *= 2
self.table = [[] for _ in range(self.size)]

logging.info(f"Resized HashTable to new size {self.size}")

for bucket in old_table:
for key, value in bucket:
self.insert(key, value)
for entry in bucket:
self.insert(entry.key, entry.value)

def load_factor(self) -> float:
num_elements = sum(len(bucket) for bucket in self.table)
Expand All @@ -26,37 +36,43 @@ def load_factor(self) -> float:
def insert(self, key: int, value: Any) -> None:
if not isinstance(key, int):
raise TypeError("Key must be an integer.")
if self.load_factor() > 0.7: # Check load factor
if self.load_factor() > 0.7:
self._resize()
hash_key = self._hash_function(key)
for pair in self.table[hash_key]:
if pair[0] == key:
pair[1] = value
for entry in self.table[hash_key]:
if entry.key == key:
entry.value = value
logging.info(f"Updated key {key} with new value {value}")
return
self.table[hash_key].append((key, value))
self.table[hash_key].append(HashTableEntry(key, value))
logging.info(f"Inserted key {key} with value {value}")

def search(self, key: int) -> Optional[Any]:
if not isinstance(key, int):
raise TypeError("Key must be an integer.")
hash_key = self._hash_function(key)
for pair in self.table[hash_key]:
if pair[0] == key:
return pair[1]
for entry in self.table[hash_key]:
if entry.key == key:
logging.info(f"Found key {key} with value {entry.value}")
return entry.value
logging.warning(f"Key {key} not found")
return None

def delete(self, key: int) -> bool:
if not isinstance(key, int):
raise TypeError("Key must be an integer.")
hash_key = self._hash_function(key)
for i, pair in enumerate(self.table[hash_key]):
if pair[0] == key:
for i, entry in enumerate(self.table[hash_key]):
if entry.key == key:
del self.table[hash_key][i]
logging.info(f"Deleted key {key}")
return True
logging.warning(f"Key {key} not found for deletion")
return False

def display(self) -> None:
for index, bucket in enumerate(self.table):
print(f"Index {index} ({len(bucket)} entries): {bucket}")
logging.info(f"Index {index} ({len(bucket)} entries): {[(entry.key, entry.value) for entry in bucket]}")

@staticmethod
def string_hash(s: str, table_size: int) -> int:
Expand All @@ -82,31 +98,23 @@ def sha256_hash(string: str) -> str:
return hashlib.sha256(string.encode()).hexdigest()


# Example usage:
if __name__ == "__main__":
ht = HashTable()

# Inserting values
ht.insert(10, 'Value1')
ht.insert(20, 'Value2')
ht.insert(30, 'Value3')

# Display the hash table
ht.display()

# Search for a key
print("Search key 20:", ht.search(20))

# Delete a key
ht.delete(20)
print("After deleting key 20:")
ht.display()

# Hashing a string
print("Hash of 'example':", ht.string_hash("example", 10))

# Checking collisions
print("Collisions in [1, 2, 12, 22, 32]:", ht.check_collisions([1, 2, 12, 22, 32], 10))

# SHA-256 Hashing
print("SHA-256 hash of 'Hello, World!':", ht.sha256_hash("Hello, World!"))
48 changes: 48 additions & 0 deletions pysnippets/Hashing/test_hashing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import hashlib
import unittest
from Hashing import HashTable

class TestHashTable(unittest.TestCase):
def setUp(self):
self.ht = HashTable(size=5)

def test_insert_and_search(self):
self.ht.insert(10, 'Value1')
self.ht.insert(15, 'Value2')
self.assertEqual(self.ht.search(10), 'Value1')
self.assertEqual(self.ht.search(15), 'Value2')
self.assertIsNone(self.ht.search(20))

def test_update_value(self):
self.ht.insert(10, 'Value1')
self.ht.insert(10, 'NewValue')
self.assertEqual(self.ht.search(10), 'NewValue')

def test_delete_key(self):
self.ht.insert(10, 'Value1')
self.assertTrue(self.ht.delete(10))
self.assertIsNone(self.ht.search(10))
self.assertFalse(self.ht.delete(10))

def test_resize_table(self):
for i in range(10):
self.ht.insert(i, f'Value{i}')
self.assertEqual(self.ht.size, 20)
self.assertEqual(self.ht.search(5), 'Value5')
self.assertEqual(self.ht.search(9), 'Value9')

def test_string_hash(self):
hash_value = HashTable.string_hash("example", 10)
self.assertEqual(hash_value, sum(ord(char) for char in "example") % 10)

def test_check_collisions(self):
collisions = HashTable.check_collisions([1, 6, 11, 21], 5)
self.assertEqual(collisions, [6, 11, 21])

def test_sha256_hash(self):
result = HashTable.sha256_hash("Hello, World!")
expected = hashlib.sha256("Hello, World!".encode()).hexdigest()
self.assertEqual(result, expected)

if __name__ == "__main__":
unittest.main()

0 comments on commit 8b50eb2

Please sign in to comment.