From b3db44bb312999c73fb45bbc17eb8e528e3af779 Mon Sep 17 00:00:00 2001
From: Sayandip Dutta <sayandip199309@gmail.com>
Date: Wed, 2 Feb 2022 01:57:30 +0530
Subject: [PATCH 1/3] Improved logic for iterating over 'pairs.txt'

Instead of loading the entire file into the memory, use the iterator
object natively provided by the file object returned by the open
function.

- This saves memory.
- Avoid __getitem__ call, improve performance.
---
 lfw_eval.py | 62 +++++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/lfw_eval.py b/lfw_eval.py
index 16d70bc..d4a41aa 100755
--- a/lfw_eval.py
+++ b/lfw_eval.py
@@ -85,36 +85,38 @@ def find_best_threshold(thresholds, predicts):
     landmark[l[0]] = [int(k) for k in l[1:]]
 
 with open('data/pairs.txt') as f:
-    pairs_lines = f.readlines()[1:]
-
-for i in range(6000):
-    p = pairs_lines[i].replace('\n','').split('\t')
-
-    if 3==len(p):
-        sameflag = 1
-        name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
-        name2 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[2]))
-    if 4==len(p):
-        sameflag = 0
-        name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
-        name2 = p[2]+'/'+p[2]+'_'+'{:04}.jpg'.format(int(p[3]))
-
-    img1 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name1),np.uint8),1),landmark[name1])
-    img2 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name2),np.uint8),1),landmark[name2])
-
-    imglist = [img1,cv2.flip(img1,1),img2,cv2.flip(img2,1)]
-    for i in range(len(imglist)):
-        imglist[i] = imglist[i].transpose(2, 0, 1).reshape((1,3,112,96))
-        imglist[i] = (imglist[i]-127.5)/128.0
-
-    img = np.vstack(imglist)
-    img = Variable(torch.from_numpy(img).float(),volatile=True).cuda()
-    output = net(img)
-    f = output.data
-    f1,f2 = f[0],f[2]
-    cosdistance = f1.dot(f2)/(f1.norm()*f2.norm()+1e-5)
-    predicts.append('{}\t{}\t{}\t{}\n'.format(name1,name2,cosdistance,sameflag))
-
+    _ = next(f, None)     # skip header
+
+    for line_no, line in enumerate(f, start=1):
+        p = line.replace('\n','').split('\t')
+
+        if 3==len(p):
+            sameflag = 1
+            name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
+            name2 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[2]))
+        if 4==len(p):
+            sameflag = 0
+            name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
+            name2 = p[2]+'/'+p[2]+'_'+'{:04}.jpg'.format(int(p[3]))
+
+        img1 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name1),np.uint8),1),landmark[name1])
+        img2 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name2),np.uint8),1),landmark[name2])
+
+        imglist = [img1,cv2.flip(img1,1),img2,cv2.flip(img2,1)]
+        for i in range(len(imglist)):
+            imglist[i] = imglist[i].transpose(2, 0, 1).reshape((1,3,112,96))
+            imglist[i] = (imglist[i]-127.5)/128.0
+
+        img = np.vstack(imglist)
+        img = Variable(torch.from_numpy(img).float(),volatile=True).cuda()
+        output = net(img)
+        f = output.data
+        f1,f2 = f[0],f[2]
+        cosdistance = f1.dot(f2)/(f1.norm()*f2.norm()+1e-5)
+        predicts.append('{}\t{}\t{}\t{}\n'.format(name1,name2,cosdistance,sameflag))
+
+        if line_no >= 6000:     # break as soon as 6000 lines have been processed.
+            break
 
 accuracy = []
 thd = []

From 3a4c230c898a4937eb9f903a15fb0811bcf0ad79 Mon Sep 17 00:00:00 2001
From: Sayandip Dutta <sayandip199309@gmail.com>
Date: Wed, 2 Feb 2022 02:10:05 +0530
Subject: [PATCH 2/3] Change inner loop to use enumerate

The inner loop used `range(len(imglist))`, this iterates over the entire
length twice. Using `enumerate` saves one complete iteration.

Furthermore, inside the inner loop `__getitem__` and `__setitem__` were
being called twice in each iteration. Reduces `__getitem__` calls to 0,
as it is already being taken care by for loop, and reduced `__setitem__`
to 1.
---
 lfw_eval.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lfw_eval.py b/lfw_eval.py
index d4a41aa..99f5aee 100755
--- a/lfw_eval.py
+++ b/lfw_eval.py
@@ -103,9 +103,9 @@ def find_best_threshold(thresholds, predicts):
         img2 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name2),np.uint8),1),landmark[name2])
 
         imglist = [img1,cv2.flip(img1,1),img2,cv2.flip(img2,1)]
-        for i in range(len(imglist)):
-            imglist[i] = imglist[i].transpose(2, 0, 1).reshape((1,3,112,96))
-            imglist[i] = (imglist[i]-127.5)/128.0
+        for i, image in imglist:
+            image = image.transpose(2, 0, 1).reshape((1,3,112,96))
+            imglist[i] = (image - 127.5) / 128.0
 
         img = np.vstack(imglist)
         img = Variable(torch.from_numpy(img).float(),volatile=True).cuda()

From 762c458f9258a8d85210a92c2ae0548942e4e062 Mon Sep 17 00:00:00 2001
From: Sayandip Dutta <sayandip199309@gmail.com>
Date: Wed, 2 Feb 2022 02:26:09 +0530
Subject: [PATCH 3/3] Add enumerate in inner loop

---
 lfw_eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lfw_eval.py b/lfw_eval.py
index 99f5aee..def0b4b 100755
--- a/lfw_eval.py
+++ b/lfw_eval.py
@@ -103,7 +103,7 @@ def find_best_threshold(thresholds, predicts):
         img2 = alignment(cv2.imdecode(np.frombuffer(zfile.read(name2),np.uint8),1),landmark[name2])
 
         imglist = [img1,cv2.flip(img1,1),img2,cv2.flip(img2,1)]
-        for i, image in imglist:
+        for i, image in enumerate(imglist):
             image = image.transpose(2, 0, 1).reshape((1,3,112,96))
             imglist[i] = (image - 127.5) / 128.0