-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
passport_mrz_td3_decoder.py
87 lines (57 loc) · 3.56 KB
/
passport_mrz_td3_decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from mrz.checker.td3 import TD3CodeChecker
import argparse
import datetime
import os
import pytesseract
import typing
PASSPORT_MRZ_LINE_LENGTH = 44
def create_dict_from_result(mrz_result) -> dict:
# Uses 2-digit years (no century) instead of the standard 4-digit year
passport_date_fmt = "%y%m%d"
passport_data = dict({"surname": mrz_result.surname, "first_name": mrz_result.name, "country_issued": mrz_result.country, "owner_nationality": mrz_result.nationality, "birth_date": datetime.datetime.strptime(mrz_result.birth_date, passport_date_fmt),
"expiration_date": datetime.datetime.strptime(mrz_result.expiry_date, passport_date_fmt), "owner_sex": mrz_result.sex, "document_type": mrz_result.document_type, "document_number": mrz_result.document_number, "optional_data": mrz_result.optional_data})
hash_data = dict({"birth_date": mrz_result.birth_date_hash, "expiry_date": mrz_result.expiry_date_hash,
"document_number": mrz_result.document_number_hash, "optional_data": mrz_result.optional_data_hash, "final": mrz_result.final_hash})
passport_data["hashes"] = hash_data
return passport_data
def extract_mrz_from_passport_image(passport_image_filename: str) -> typing.List:
# See the `config/` dir for more info
raw_passport_text = pytesseract.image_to_string(
passport_image_filename, lang='eng', config="--tessdata-dir config")
# Only grab text lines that aren't empty
passport_text_lines = [x for x in raw_passport_text.split("\n") if x]
# remember, MRZ is always the bottom 2 lines of a passport
return passport_text_lines[-2:]
def decode_passport_mrz(passport_mrz) -> dict:
if len(passport_mrz) < (PASSPORT_MRZ_LINE_LENGTH * 2):
raise ValueError(
f"Passport MRZ received was incorrect length. Received={len(passport_mrz)}, Minimum required={PASSPORT_MRZ_LINE_LENGTH*2}")
passport_checker_result = TD3CodeChecker(passport_mrz)
if not passport_checker_result:
raise ValueError("Passport was invalid -- either expired or it was not in the correct format.")
return create_dict_from_result(passport_checker_result.fields())
if __name__ == "__main__":
argparse_parser = argparse.ArgumentParser()
argparse_parser.add_argument(
"-1", "--line-1", type=str, help="The first line of the passport text")
argparse_parser.add_argument(
"-2", "--line-2", type=str, help="The second line of the passport text")
argparse_parser.add_argument(
"-t", "--text", type=str, help="The full text of the passport MRZ. Do not use this in conjunction with any other options.")
# TODO
argparse_parser.add_argument(
"-i", "--image", type=str, help="The text image to parse passport MRZ from. Do not use this in conjunction with any other options")
argparse_args = argparse_parser.parse_args()
if argparse_args.line_1 and argparse_args.line_2:
if len(argparse_args.line_1) == PASSPORT_MRZ_LINE_LENGTH and len(argparse_args.line_2) == PASSPORT_MRZ_LINE_LENGTH:
result = decode_passport_mrz(
f"{argparse_args.line_1}\n{argparse_args.line_2}")
print(result)
else:
print(
f"[ERROR] Either line 1 or line 2 were of incorrect length -- check that both strings have the length of {PASSPORT_MRZ_LINE_LENGTH}")
elif os.path.exists(argparse_args.image):
mrz_lines_from_img = extract_mrz_from_passport_image(
argparse_args.image)
result = decode_passport_mrz(
f"{mrz_lines_from_img[0]}\n{mrz_lines_from_img[1]}")