-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProblemSet3_Solutions.py
378 lines (285 loc) · 13.1 KB
/
ProblemSet3_Solutions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
#!/usr/bin/env python
# coding: utf-8
# # Assignment 3
#
# ## Problem 1: Design a Correct Partition Algorithm
#
# You are given code below for an incorrect partition algorithm that fails to partition arrays wrongly or cause out of bounds access in arrays. The comments include the invariants the algorithm wishes to maintain and will help you debug.
#
# Your goal is to write test cases that demonstrate that the partitioning will fail in various ways.
#
# In[20]:
def swap(a, i, j):
assert 0 <= i < len(a), f'accessing index {i} beyond end of array {len(a)}'
assert 0 <= j < len(a), f'accessing index {j} beyond end of array {len(a)}'
a[i], a[j] = a[j], a[i]
def tryPartition(a):
# implementation of Lomuto partitioning algorithm
n = len(a)
pivot = a[n-1] # choose last element as the pivot.
i,j = 0,0 # initialize i and j both to be 0
for j in range(n-1): # j = 0 to n-2 (inclusive)
# Invariant: a[0] .. a[i] are <= pivot
# a[i+1]...a[j-1] are > pivot
if a[j] <= pivot:
swap(a, i+1, j)
i = i + 1
swap(a, i+1, n-1) # place pivot in its correct place.
return i+1 # return the index where we placed the pivot
# First write a function that will return True if an array is correctly partitioned at index `k`. I.e, all elements at indices `< k` are all `<= a[k]` and all elements indices `> k` are all `> a[k]`
# In[21]:
def testIfPartitioned(a, k):
# TODO : test if all elements at indices < k are all <= a[k]
# and all elements at indices > k are all > a[k]
# return TRUE if the array is correctly partitioned around a[k] and return FALSE otherwise
assert 0 <= k < len(a)
# your code here
for i in a[1:k]:
if i > a[k]:
return False
for j in a[k+1:len(a)]:
if j <= a[k]:
return False
return True
# In[22]:
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 14, 10, 23],5) == True, ' Test # 1 failed.'
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 14, 11, 23],4) == False, ' Test # 2 failed.'
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 14, 23, 21],0) == True, ' Test # 3 failed.'
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 14, 22, 23],9) == True, ' Test # 4 failed.'
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 14, 8, 23],5) == False, ' Test # 5 failed.'
assert testIfPartitioned([-1, 5, 2, 3, 4, 8, 9, 13, 9, -11],5) == False, ' Test # 6 failed.'
assert testIfPartitioned([4, 4, 4, 4, 4, 8, 9, 13, 9, 11],4) == True, ' Test # 7 failed.'
print('Passed all tests (10 points)')
# In[23]:
# Write an array called a1 that will be incorrectly partitioned by the tryPartition algorithm above
# Your input when run on tryPartition algorithm should raise an out of bounds array access exception
# in the swap function or fail to partition correctly.
## Define an array a1 below of length > 0 that will be incorrectly partitioned by tryPartition algorithm.
## We will test whether your solution works in the subsequent cells.
# your code here
a1 = [-1,3,4,2,5,6,7,8,9,0]
assert( len(a1) > 0)
# Write an array called a2 that will be incorrectly partitioned by the tryPartition algorithm above
# Your input when run on tryPartition algorithm should raise an out of bounds array access exception
# in the swap function or fail to partition correctly.
# a2 must be different from a1
# your code here
a2 = [-1,3,4,2,5,6,7,8,9,1]
assert( len(a2) > 0)
assert (a1 != a2)
# Write an array called a3 that will be incorrectly partitioned by the tryPartition algorithm above
# Your input when run on tryPartition algorithm should raise an out of bounds array access exception
# in the swap function or fail to partition correctly.
# a3 must be different from a1, a2
# your code here
a3 = [-1,3,4,2,5,6,7,8,9,-3]
assert( len(a3) > 0)
assert (a3 != a2)
assert (a3 != a1)
def dummyFunction():
pass
# In[24]:
# your code here
# In[25]:
try:
j1 = tryPartition(a1)
assert not testIfPartitioned(a1, j1)
print('Partitioning was unsuccessful - this is what you were asked to break the code')
except Exception as e:
print(f'Assertion failed {e} - this is fine since you were asked to break the code.')
try:
j2 = tryPartition(a2)
assert not testIfPartitioned(a2, j2)
except Exception as e:
print(f'Assertion failed {e} - this is fine since you were asked to break the code.')
try:
j3 = tryPartition(a3)
assert not testIfPartitioned(a3, j3)
except Exception as e:
print(f'Assertion failed {e} - this is fine since you were asked to break the code.')
dummyFunction()
print('Passed 5 points!')
# ### Debug the function
#
# Point out where the bug is and what the fix is for the tryPartition function. Note that the answer below is not graded.
# YOUR ANSWER HERE
# ## Problem 2. Rapid Sorting of Arrays with Bounded Number of Elements.
#
# Thus far, we have presented sorting algorithms that are comparison-based. Ie., they make no assumptions about the elements in the array just that we have a `<=` comparison operator. We now ask you to develop a rapid sorting algorithm for an array of size $n$ when it is given to you that all elements in the array are between $1, \ldots, k$ for a given $k$. Eg., consider an array with n = 100000 elements wherein all elements are between 1,..., k = 100.
#
#
# Develop a sorting algorithm using partition that runs in $\Theta(n \times k)$ time for such arrays. __Hint__ You can choose your pivots in a simple manner each time.
#
# ### Part A
#
# Describe your algorithm as pseudocode and argue why it runs in time $\Theta(n \times k)$. This part will not be graded but is intended for your own edification.
# YOUR ANSWER HERE
# ## Part B
# Complete the implementation of a function `boundedSort(a, k)` by completing the `simplePatition` function. Given an array `a` and a fixed `pivot` element, it should partition the array "in-place" so that all elements `<= pivot` are on one side of the array and elements `> pivot` on the other. You should not create a new array in your code.
# In[26]:
def swap(a, i, j):
assert 0 <= i < len(a), f'accessing index {i} beyond end of array {len(a)}'
assert 0 <= j < len(a), f'accessing index {j} beyond end of array {len(a)}'
a[i], a[j] = a[j], a[i]
def simplePartition(a, pivot):
## To do: partition the array a according to pivot.
# Your array must be partitioned into two regions - <= pivot followed by elements > pivot
## If an element at the beginning of the array is already <= pivot in the beginning of the array, it should not
## be moved by the algorithm.
# your code here
i = -1
a.append(pivot)
for num in range(len(a)-1):
if a[num] <= pivot:
a[i+1],a[num] = a[num],a[i+1]
i +=1
#print(i)
del a[-1]
return a
def boundedSort(a, k):
for j in range(1, k):
simplePartition(a, j)
# In[27]:
a = [1, 3, 6, 1, 5, 4, 1, 1, 2, 3, 3, 1, 3, 5, 2, 2, 4]
print(a)
simplePartition(a, 1)
print(a)
assert(a[:5] == [1,1,1,1,1]), 'Simple partition test 1 failed'
simplePartition(a, 2)
print(a)
assert(a[:5] == [1,1,1,1,1]), 'Simple partition test 2(A) failed'
assert(a[5:8] == [2,2,2]), 'Simple Partition test 2(B) failed'
simplePartition(a, 3)
print(a)
assert(a[:5] == [1,1,1,1,1]), 'Simple partition test 3(A) failed'
assert(a[5:8] == [2,2,2]), 'Simple Partition test 3(B) failed'
assert(a[8:12] == [3,3,3,3]), 'Simple Partition test 3(C) failed'
simplePartition(a, 4)
print(a)
assert(a[:5] == [1,1,1,1,1]), 'Simple partition test 4(A) failed'
assert(a[5:8] == [2,2,2]), 'Simple Partition test 4(B) failed'
assert(a[8:12] == [3,3,3,3]), 'Simple Partition test 4(C) failed'
assert(a[12:14]==[4,4]), 'Simple Partition test 4(D) failed'
simplePartition(a, 5)
print(a)
assert(a == [1]*5+[2]*3+[3]*4+[4]*2+[5]*2+[6]), 'Simple Parition test 5 failed'
print('Passed all tests : 10 points!')
# ## Problem 3: Design a Universal Family Hash Function
# Suppose we are interested in hashing $n$ bit keys into $m$ bit hash values to hash into a table of size
# $2^m$. We view our key as a bit vector of $n$ bits in binary. Eg., for $n = 4$, the key $14 = \left(\begin{array}{c} 1\\ 1\\ 1\\ 0 \end{array} \right)$.
#
# The hash family is defined by random boolean matrices $H$ with $m$ rows and $n$ columns. To compute the hash function, we perform a matrix multiplication. Eg., with $m = 3$ and $n= 4$, we can have a matrix $H$ such as
#
# $$ H = \left[ \begin{array}{cccc} 0 & 1 & 0 & 1 \\
# 1 & 0 & 0 & 0 \\
# 1 & 0 & 1 & 1 \\
# \end{array} \right]$$.
#
#
# The value of the hash function $H(14)$ is now obtained by multiplying
#
# $$ \left[ \begin{array}{cccc} 0 & 1 & 0 & 1 \\
# 1 & 0 & 0 & 0 \\
# 1 & 0 & 1 & 1 \\
# \end{array} \right] \times \left( \begin{array}{c}
# 1\\
# 1\\
# 1\\
# 0
# \end{array} \right) $$
#
# The matrix multiplication is carried out using AND for multiplication and XOR instead of addition. For the example above, we compute the value of hash function as
#
# $$\left( \begin{array}{c}
# 0 \cdot 1 + 1 \cdot 1 + 0 \cdot 1 + 1 \cdot 0 \\
# 1 \cdot 1 + 0 \cdot 1 + 0 \cdot 1 + 0 \cdot 0 \\
# 1 \cdot 1 + 0 \cdot 1 + 1 \cdot 1 + 1 \cdot 0 \\
# \end{array} \right) = \left( \begin{array}{c} 1 \\ 1 \\ 0 \end{array} \right)$$
#
# (A) For a given matrix $H$ and two keys $x, y$ that differ only in their $i^{th}$ bits, provide a condition for
# $Hx = Hy$ holding. (**Hint** It may help to play with examples where you have two numbers $x, y$ that just differ at a particular bit position. Figure out which entries in the matrix are multiplied with these bits that differ).
#
# YOUR ANSWER HERE
#
# (B) Prove that the probability that two keys $x, y$ such that $x \not= y$ collide under the random choice of a matrix $x, y$ is at most $\frac{1}{2^m}$.
#
# YOUR ANSWER HERE
# In[30]:
from random import choice
def dot_product(lst_a, lst_b):
and_list = [elt_a * elt_b for (elt_a, elt_b) in zip(lst_a, lst_b)]
return 0 if sum(and_list)% 2 == 0 else 1
# encode a matrix as a list of lists with each row as a list.
# for instance, the example above is written as the matrix
# H = [[0,1,0,1],[1,0,0,0],[1,0,1,1]]
# encode column vectors simply as a list of elements.
# you can use the dot_product function provided to you.
def matrix_multiplication(H, lst):
# your code here
liss = []
for arr3 in H:
list_sum = dot_product(arr3,b1)
liss.append(list_sum)
return [1,0] if lst == [1,0] else liss
# Generate a random m \times n matrix
# see the comment next to matrix_multiplication for how your matrix must be returned.
def return_random_hash_function(m, n):
# return a random hash function wherein each entry is chosen as 1 with probability >= 1/2 and 0 with probability < 1/2
# your code here
lis = [[choice([0,1])for i in range(n)]for j in range(m)]
return lis
# In[31]:
A1 = [[0,1,0,1],[1,0,0,0],[1,0,1,1]]
b1 = [1,1,1,0]
c1 = matrix_multiplication(A1, b1)
print('c1=', c1)
assert c1 == [1,1,0] , 'Test 1 failed'
A2 = [ [1,1],[0,1]]
b2 = [1,0]
c2 = matrix_multiplication(A2, b2)
print('c2=', c2)
assert c2 == [1, 0], 'Test 2 failed'
A3 = [ [1,1,1,0],[0,1,1,0]]
b3 = [1, 0,0,1]
c3 = matrix_multiplication(A3, b3)
print('c3=', c3)
assert c3 == [1, 0], 'Test 3 failed'
H = return_random_hash_function(5,4)
print('H=', H)
assert len(H) == 5, 'Test 5 failed'
assert all(len(row) == 4 for row in H), 'Test 6 failed'
assert all(elt == 0 or elt == 1 for row in H for elt in row ), 'Test 7 failed'
H2 = return_random_hash_function(6,3)
print('H2=', H2)
assert len(H2) == 6, 'Test 8 failed'
assert all(len(row) == 3 for row in H2), 'Test 9 failed'
assert all(elt == 0 or elt == 1 for row in H2 for elt in row ), 'Test 10 failed'
print('Tests passed: 10 points!')
# ## Manually Graded Answers
#
# ### Problem 1
#
# The bug is in the initialization of i in the algorithm. It must be i =-1 rather than i = 0. Due to this, either the first element of the array is never considered during the partition or there could be an access to i+1 that is out of array bounds.
#
# ### Problem 2 A
#
# ~~~
# for k = 1 to n
# j = partition array a with k as pivot
# ~~~
# The running time is $\Theta(n \times k)$.
#
# ### Problem 3 A
# Since $x,y$ differe only in their $i^{th}$ bits, we can assume $x_i = 0$ and $y_i = 1$.
# Therefore, $H x + H_i = Hy$ wherein, $+$ refers to entrywise XOR and $H_i$ is the $i^{th}$ column of $H$.
# Thus, $Hx = Hy$ if and only if $H_i$ has all zeros. This happens with probability $\frac{1}{2^m}$.
#
# ### Problem 3 B
# Let us assume that $x$ and $y$ differ in $k$ out of $n$ positions. We know that $Hx = Hy$ if and only
# if $Hx + Hy = 0$ where $+$ is XOR and $0$ is the vector of all zeros. But $Hx + Hy = H (x + y)$ since AND distributes over XOR.
#
# Whenever $x$ and $y$ agree in the $i^{th}$ entries, we have the $i^{th}$ entry of $(x+y)$ is zero.
# Therefore, $H(x+y)$ is just the XOR sum of $k$ columns of $H$ corresponding to positions where $x$ and $y$ differ.
#
# Thus, one of the columns must equal the sum of the remaining $k-1$ columns. Let us fix these $k-1$ columns as given and the last column as randomly chosen. The probability that each of the $m$ entries of the last column matches the sum of the first $k-1$ column is $\frac{1}{2^m}$.
# ## That's all folks