From 4812ff4b13e2a8f530d260885dbd4eaa002b752c Mon Sep 17 00:00:00 2001
From: jaeyoonpark <jaeyoon.park13@gmail.com>
Date: Wed, 26 Jan 2022 13:24:51 +0100
Subject: [PATCH 1/4] update setup and others

---
 .pre-commit-config.yaml          |  11 +
 pyproject.toml                   |   8 +
 setup.cfg                        |   4 +
 setup.py                         |  20 +-
 shipdataprocess/__init__.py      |  10 +-
 shipdataprocess/collapse.py      | 192 ++++++---
 shipdataprocess/normalize.py     | 325 ++++++++-------
 shipdataprocess/shiptype.py      | 658 +++++++++++++++++++------------
 shipdataprocess/standardize.py   | 529 ++++++++++++++++++-------
 tests/test_normalize_shipname.py |  55 ++-
 10 files changed, 1186 insertions(+), 626 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 pyproject.toml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..ca8c002
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+-   repo: https://github.com/psf/black
+    rev: 21.11b0
+    hooks:
+    - id: black
+      args: [--line-length=79]
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.9.2
+    hooks:
+    - id: flake8
+
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..eb1bdcb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,8 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel"
+]
+
+[tool.black]
+line-length = 79
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index a0183fb..c62e09e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,6 @@
 [metadata]
 Obsoletes-Dist: ShipDataProcess
+
+[options.extras_require]
+test =
+    pytest
diff --git a/setup.py b/setup.py
index 61eff23..58342bf 100644
--- a/setup.py
+++ b/setup.py
@@ -5,21 +5,15 @@
 """
 
 import codecs
-import os
 
 from setuptools import find_packages
 from setuptools import setup
 
-package = __import__('shipdataprocess')
+package = __import__("shipdataprocess")
 
-DEPENDENCIES = [
-    "pytest",
-    "unidecode",
-    "roman",
-    "Django"
-]
+DEPENDENCIES = ["pytest", "unidecode", "roman"]
 
-with codecs.open('README.md', encoding='utf-8') as f:
+with codecs.open("README.md", encoding="utf-8") as f:
     readme = f.read().strip()
 
 setup(
@@ -28,13 +22,13 @@
     description=package.__doc__.strip(),
     include_package_data=True,
     install_requires=DEPENDENCIES,
-    keywords=['ship','vessel','fishing','normalization'],
+    python_requires=">=3.6",
+    keywords=["ship", "vessel", "fishing", "normalization"],
     license="Apache 2.0",
     long_description=readme,
-    name='shipdataprocess',
-    packages=find_packages(exclude=['test*.*', 'tests']),
+    name="shipdataprocess",
+    packages=find_packages(exclude=["test*.*", "tests"]),
     url=package.__source__,
     version=package.__version__,
     zip_safe=True,
 )
-
diff --git a/shipdataprocess/__init__.py b/shipdataprocess/__init__.py
index 7b322e5..de625df 100644
--- a/shipdataprocess/__init__.py
+++ b/shipdataprocess/__init__.py
@@ -3,15 +3,15 @@
 """
 
 
-__version__ = '0.6.18'
-__author__ = 'Jaeyoon Park'
-__email__ = 'jaeyoon.park13@gmail.com'
-__source__ = 'https://github.com/GlobalFishingWatch/shipdataprocess'
+__version__ = "0.7.1"
+__author__ = "Jaeyoon Park"
+__email__ = "jaeyoon@globalfishingwatch.org"
+__source__ = "https://github.com/GlobalFishingWatch/shipdataprocess"
 __license__ = """
 Copyright 2017 Global Fishing Watch Inc.
 Authors:
 
-Jaeyoon Park <jaeyoon.park13@gmail.com>
+Jaeyoon Park <jaeyoon@globalfishingwatch.org>
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/shipdataprocess/collapse.py b/shipdataprocess/collapse.py
index b18a0ce..823a580 100644
--- a/shipdataprocess/collapse.py
+++ b/shipdataprocess/collapse.py
@@ -1,32 +1,51 @@
+"""
+This file contains functions that help collapse (multiple) rows
+for each vessel found in the process of producing Global Fishing Watch's
+internal vessel database.
+
+Last updated: 2022-01-24
+Jaeyoon Park
+"""
+
 import pandas as pd
 import numpy as np
 import re
 from collections import Counter
 
 
-### helper functions for collapsing rows by vessel
-
 def non_zero_mean(x):
     try:
-        x = x[(x!=0)&(x!=None)]
-        if len(x)==0: return 0.0
-        else: return x.mean()
-    except:
+        x = x[(x != 0) & (x is not None)]
+        if len(x) == 0:
+            return 0.0
+        else:
+            return x.mean()
+    except AttributeError:
         return 0.0
-    
+
+
 def non_zero_std(x):
     try:
-        x = x[(x!=0)&(x!=None)]
-        if len(x)<2: return 0.0
-        else: return x.std()
-    except: 
+        x = x[(x != 0) & (x is not None)]
+        if len(x) < 2:
+            return 0.0
+        else:
+            return x.std()
+    except AttributeError:
         return 0.0
-    
-def most_common_value(x): ## remove if standard deviation is too big compared to mean value of all numbers
-    '''remove if standard deviation is too big compared to mean value of all numbers'''
-    if (type(x)==list)&(len(x)>0):
+
+
+def most_common_value(x):
+    """
+    Remove if standard deviation is too big compared to mean value of
+    all numbers. The standard deviation threshold is set to be 10%.
+
+    x: Pandas Series or list, a list of numerical values
+    (for length, tonnage, engine power)
+    """
+    if (type(x) == list) & (len(x) > 0):
         x = pd.Series(x)
-    if (type(x)==pd.core.series.Series)&(len(x.dropna())>0):
+    if (type(x) == pd.core.series.Series) & (len(x.dropna()) > 0):
         x_mean = non_zero_mean(x)
         x_std = non_zero_std(x)
         if x_std > x_mean * 0.1:
@@ -36,95 +55,156 @@ def most_common_value(x): ## remove if standard deviation is too big compared to
     else:
         return np.nan
 
+
 def most_common_value_with_confidence(cx):
-    '''same functionality as most_common_value() but with confidence level taken account'''
-    if (type(cx)==pd.core.series.Series)&(len(cx)>0):
-        if len(cx.dropna())==0:
+    """
+    same functionality as most_common_value() but with confidence level
+    taken into account
+
+    cx: Pandas Series or list, a list of numerical values
+    (for length, tonnage, engine power)
+    with a confidence level indicator attached with '-' in front of the value.
+    """
+    if (type(cx) == pd.core.series.Series) & (len(cx) > 0):
+        if len(cx.dropna()) == 0:
             return np.nan
         else:
             cx = list(cx.values)
-    if (type(cx)==list)&(len(cx)>0):
-        clist = [int(elem.split('-')[0]) for elem in cx if (elem==elem)&(elem!=None)]
-        xlist = [elem for elem in cx if (elem==elem)&(elem!=None)]
-        if len(clist)>0:
+    if (type(cx) == list) & (len(cx) > 0):
+        clist = [
+            int(elem.split("-")[0])
+            for elem in cx
+            if (elem == elem) & (elem is not None)
+        ]
+        xlist = [elem for elem in cx if (elem == elem) & (elem is not None)]
+        if len(clist) > 0:
             max_c = max(clist)
-            x = [float(elem.split('-')[1]) for elem in xlist if int(elem.split('-')[0])==max_c]
+            x = [
+                float(elem.split("-")[1])
+                for elem in xlist
+                if int(elem.split("-")[0]) == max_c
+            ]
+            # Call the function to return the most common value
             return most_common_value(x)
         else:
             return np.nan
     else:
         return np.nan
-    
-def most_common_num(x): ## mostly for imo collapsing
+
+
+def most_common_num(x):
+    """
+    Return the most common number (mostly for imo collapsing).
+
+    x: Pandas Series, a list of numbers
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
             vals = x.values
-            vs = [v for v in vals if (v!=0)]
-        #vs = list(set(vs))
-            if len(vs)==0:
+            vs = [v for v in vals if (v != 0)]
+            # vs = list(set(vs))
+            if len(vs) == 0:
                 return np.nan
-            else: 
+            else:
                 data = Counter(vs)
                 return max(vs, key=data.get)
-    except:
+    except AttributeError:
         return np.nan
-    
+
+
 def most_common_str(x):
+    """
+    Return the most common string.
+
+    x: Pandas Series, a list of values in string
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
-            vals = x.values
-            vs = [re.sub('\s+',' ',str(v)).strip().upper() for v in x.values]
-            vs = [v for v in vs if v!='']
-        #vs = list(set(vs))
-            if len(vs)==0:
+            vs = [
+                re.sub(r"\s+", " ", str(v)).strip().upper() for v in x.values
+            ]
+            vs = [v for v in vs if v != ""]
+            # vs = list(set(vs))
+            if len(vs) == 0:
                 return np.nan
             else:
                 data = Counter(vs)
                 return max(vs, key=data.get)
 
-        #if len(vs)==1:
-        #    return vs[0]
-        #else:
-        #    return None
-    except:
+    except AttributeError:
         return np.nan
-    
-def str_attached(x): ## join all strings
+
+
+def str_attached(x):
+    """
+    Return all strings joined. If the values are in numbers, convert them
+    to string and combined.
+
+    :param x: Pandas Series or list
+    :return: A joined string
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
-            x = x.apply(lambda v: str(int(v)) if (type(v)==float)|(type(v)==int)|(type(v)==long) else v)  
+            x = x.apply(
+                lambda v: str(int(v))
+                if (type(v) == float) | (type(v) == int)
+                else v
+            )
             vals = x.values.tolist()
-        #vs = [str(v).strip() for v in vals if (v==v)&(v!=None)&(v!='')]
-        #vs = [v for v in vs if (v!='')]
+            # vs = [str(v).strip() for v in vals if (v==v)&(v!=None)&(v!='')]
+            # vs = [v for v in vs if (v!='')]
             vs = list(set(vals))
-            return ', '.join(sorted(vs))
-    except:
+            return ", ".join(sorted(vs))
+    except AttributeError:
         return np.nan
-    
+
+
 def min_time(x):
+    """
+    Return the minimum time
+
+    :param x: Pandas Series
+    :return: Timestamp
+    """
     vals = x.values
-    vs = [v for v in vals if (v==v)&(v!=None)&(v!='')]
+    vs = [v for v in vals if (v == v) & (v is not None) & (v != "")]
     vs = pd.Series(vs)
+
     return vs.min()
 
+
 def max_time(x):
+    """
+    Return the maximum time
+
+    :param x: Pandas Series
+    :return: Timestamp
+    """
     vals = x.values
-    vs = [v for v in vals if (v==v)&(v!=None)&(v!='')]
+    vs = [v for v in vals if (v == v) & (v is not None) & (v != "")]
     vs = pd.Series(vs)
+
     return vs.max()
 
+
 def highest_confidence(x):
+    """
+    Return the maximum confidence if none return 1 (the lowest).
+
+    :param x: Pandas Series or list
+    :return: Integer
+    """
     x = x.dropna()
-    if len(x)>0:
+    if len(x) > 0:
         return max(x.tolist())
     else:
         return 1
diff --git a/shipdataprocess/normalize.py b/shipdataprocess/normalize.py
index 29104a9..c95a2cd 100644
--- a/shipdataprocess/normalize.py
+++ b/shipdataprocess/normalize.py
@@ -1,205 +1,260 @@
+"""
+This file provides functions that normalize ship name and call sign of a vessel
+either recorded in registries or in vessel tracking data. The normalization, or
+standardization of string, will ensure that the strings are comparable to other
+strings despite various ways of recording names of the same vessel.
+It also removes all non-essential characters or white spaces.
+"""
 from unidecode import unidecode
 import roman
 import re
-import sys
 
 
 def normalize_shipname(name):
-        
-    if (name is None)|(name != name)|(name == ''):
-        return None
+    """
+    Return a normalized ship name by removing all non-essential characters,
+    prefix, and suffix, and standardizing roman numerals or other parts
+    of the vessel name.
 
-    #
-    # Turn to upper cases
-    name = name.upper()
-    
+    :param name: String, an original vessel name
+    :return: String, a normalized vessel name
+    """
 
+    if (name is None) | (name != name) | (name == ""):
+        return None
+    print(name)
     #
     # Remove nasty characters and white spaces
-    if sys.version_info[0] < 3:
+    # try:
+    #     name = unidecode(str(name.decode("utf-8")))
+    # except UnicodeDecodeError:
+    #     name = unidecode(str(name.decode("iso_8859-1")))
+
+    if issubclass(type(name), str):
+        name = unidecode(name)
+    elif isinstance(name, bytes):
         try:
-            name = unidecode(str(name)) # get rid of nasty characters, but sometimes this fails
-        except:
-            try:
-                name = unidecode(str(name.decode('utf8')))
-            except:
-                name = unidecode(str(name.decode('iso_8859-1')))
+            name = unidecode(str(name, "utf-8", "strict"))
+        except UnicodeDecodeError:
+            name = unidecode(str(name, "iso-8859-1", "strict"))
+    elif isinstance(name, int):
+        name = str(name)
     else:
-        name = unidecode(str(name))
+        return None
+    print(name)
+    #
+    # Turn to upper cases
+    name = name.upper()
 
-    name = re.sub('\s+',' ',name)
+    name = re.sub(r"\s+", " ", name)
     name = name.strip()
-    name = name.replace('\n','').replace('\r','')
- 
+    name = name.replace("\n", "").replace("\r", "")
+
     #
     # Remove fishing vessel code
-    name = re.sub('MFV[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('MPV[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('HMS[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('LPG[/|C]*[\W]*|LNG[/|C]*[\W]*', ' ', name)  # LPG/LNG variations
-    
-    name = re.sub('(\s|^)F[^\w\s]*V[^\w]*', ' ', name)  # fishing vessel code in English (F/V, F-V, F.V, FV: etc)
-    name = re.sub('^F[^\w\s]*B[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub(' F[^\w\s]*B[^\w]*(\s|$)', ' ', name)    
-    name = re.sub('^M[^\w\s]*P[^\w]+', ' ', name)  # fishing vessel code in Italy/Spain
-    name = re.sub(' M[^\w\s]*P[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*B[^\w]+', ' ', name)  # fishing vessel code in Italy/Spain
-    name = re.sub(' M[^\w\s]*B[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^G[^\w\s]*V[^\w]+', ' ', name)  # mostly in UK
-    name = re.sub('S+F+[^\w]+G[^\w\s]*V[^\w]*', ' ', name)
-    name = re.sub(' G[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*V[^\w]+', ' ', name)  # in English
-    name = re.sub(' M[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]+S[^\w]+', ' ', name)  # Merchant Ship
-    name = re.sub(' M[^\w\s]+S[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*K[^\w]+', ' ', name)  # mostly in northern europe
-    name = re.sub(' M[^\w\s]+K[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^R[^\w\s]*V[^\w]+', ' ', name)  # Research Vessel
-    name = re.sub(' R[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    
-    name = re.sub('^T[^\w\s]*T[^\w]+', ' ', name)  # Tender To
-    name = re.sub(' T[^\w\s]*T[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*Y[^\w]+', ' ', name)  # Steam Yacht
-    name = re.sub(' S[^\w\s]*Y[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]*F[^\w]+', ' ', name)  # Motor Ferry
-    name = re.sub(' M[^\w\s]*F[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*S[^\w]+', ' ', name)  # Steam Ship
-    name = re.sub(' S[^\w\s]*S[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*V[^\w]+', ' ', name)  # Sailing Vessel
-    name = re.sub(' S[^\w\s]*V[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]*T[^\w]+', ' ', name)  # Motor Tanker
-    name = re.sub(' M[^\w\s]*T[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]+Y[^\w]+', ' ', name)  # Motor Yacht
-    name = re.sub(' M[^\w\s]+Y[^\w]*($)', ' ', name)
-    name = re.sub('^[A-Z]/[A-Z][^\w]+', ' ', name)  # All other types of X/X
-    name = re.sub(' [A-Z]/[A-Z]($)', ' ', name)
-    name = re.sub('^[A-Z]\\\\[A-Z][^\w]+', ' ', name) ## All other types of X\X
-    name = re.sub(' [A-Z]\\\\[A-Z]($)', ' ', name)
-    name = re.sub('^KM[^\w]+', ' ', name)  # Indonesia K.M
-    name = re.sub('^E.B. ', ' ', name)  # Dutch E.B. equivalent to NO.
-
-    name = re.sub('\(.+\)', ' ', name)  # All additional information in parentheses
-    name = re.sub('\[.+\]', ' ', name)
-    
+    name = re.sub(r"MFV[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(r"MPV[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(r"HMS[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(
+        r"LPG[/|C]*[\W]*|LNG[/|C]*[\W]*", " ", name
+    )  # LPG/LNG variations
+
+    name = re.sub(
+        r"(\s|^)F[^\w\s]*V[^\w]*", " ", name
+    )  # fishing vessel code in English (F/V, F-V, F.V, FV: etc)
+    name = re.sub(
+        r"^F[^\w\s]*B[^\w]+", " ", name
+    )  # fishing vessel code in English
+    name = re.sub(r" F[^\w\s]*B[^\w]*(\s|$)", " ", name)
+    name = re.sub(
+        r"^M[^\w\s]*P[^\w]+", " ", name
+    )  # fishing vessel code in Italy/Spain
+    name = re.sub(r" M[^\w\s]*P[^\w]*(\s|$)", " ", name)
+    name = re.sub(
+        r"^M[^\w\s]*B[^\w]+", " ", name
+    )  # fishing vessel code in Italy/Spain
+    name = re.sub(r" M[^\w\s]*B[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^G[^\w\s]*V[^\w]+", " ", name)  # mostly in UK
+    name = re.sub(r"S+F+[^\w]+G[^\w\s]*V[^\w]*", " ", name)
+    name = re.sub(r" G[^\w\s]*V[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]*V[^\w]+", " ", name)  # in English
+    name = re.sub(r" M[^\w\s]*V[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]+S[^\w]+", " ", name)  # Merchant Ship
+    name = re.sub(r" M[^\w\s]+S[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]*K[^\w]+", " ", name)  # mostly in northern europe
+    name = re.sub(r" M[^\w\s]+K[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^R[^\w\s]*V[^\w]+", " ", name)  # Research Vessel
+    name = re.sub(r" R[^\w\s]*V[^\w]*(\s|$)", " ", name)
+
+    name = re.sub(r"^T[^\w\s]*T[^\w]+", " ", name)  # Tender To
+    name = re.sub(r" T[^\w\s]*T[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*Y[^\w]+", " ", name)  # Steam Yacht
+    name = re.sub(r" S[^\w\s]*Y[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]*F[^\w]+", " ", name)  # Motor Ferry
+    name = re.sub(r" M[^\w\s]*F[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*S[^\w]+", " ", name)  # Steam Ship
+    name = re.sub(r" S[^\w\s]*S[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*V[^\w]+", " ", name)  # Sailing Vessel
+    name = re.sub(r" S[^\w\s]*V[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]*T[^\w]+", " ", name)  # Motor Tanker
+    name = re.sub(r" M[^\w\s]*T[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]+Y[^\w]+", " ", name)  # Motor Yacht
+    name = re.sub(r" M[^\w\s]+Y[^\w]*($)", " ", name)
+    name = re.sub(r"^[A-Z]/[A-Z][^\w]+", " ", name)  # All other types of X/X
+    name = re.sub(r" [A-Z]/[A-Z]($)", " ", name)
+    name = re.sub(
+        r"^[A-Z]\\\\[A-Z][^\w]+", " ", name
+    )  # All other types of X\X
+    name = re.sub(r" [A-Z]\\\\[A-Z]($)", " ", name)
+    name = re.sub(r"^KM[^\w]+", " ", name)  # Indonesia K.M
+    name = re.sub(r"^E.B. ", " ", name)  # Dutch E.B. equivalent to NO.
+
+    name = re.sub(
+        r"\(.+\)", " ", name
+    )  # All additional information in parentheses
+    name = re.sub(r"\[.+\]", " ", name)
+
     #
     # Numbers in letters
-    name = re.sub(' ONE($)| UNO($)| UN($)', ' 1', name)
-    name = re.sub(' TWO($)| DOS($)| DEUX($)', ' 2', name)
-    name = re.sub(' THREE($)| TRES($)| TROIS($)', ' 3', name)
-    name = re.sub(' FOUR($)| CUATRO($)| QUATRE($)', ' 4', name)
-    name = re.sub(' FIVE($)| CINCO($)| CINQ($)', ' 5', name)
-    name = re.sub(' SIX($)| SEIS($)| SIX($)', ' 6', name)
-    name = re.sub(' SEVEN($)| SIETE($)| SEPT($)', ' 7', name)
-    name = re.sub(' EIGHT($)| OCHO($)| HUIT($)', ' 8', name)
-    name = re.sub(' NINE($)| NUEVE($)| NEUF($)', ' 9', name)
-    name = re.sub(' TEN($)| DIEZ($)| DIX($)', ' 10', name)
-    name = re.sub(' ELEVEN($)| ONCE($)| ONZE($)', ' 11', name)
-    name = re.sub(' TWELVE($)| DOCE($)| DOUZE($)', ' 12', name)
-    name = re.sub(' THIRTEEN($)| TRECE($)| TREIZE($)', ' 13', name)
-    name = re.sub(' FOURTEEN($)| CATORCE($)| QUATORZE($)', ' 14', name)
-    name = re.sub(' FIFTEEN($)| QUINCE($)| QUINZE($)', ' 15', name)
-
-    name = re.sub('1ST ', 'FIRST ', name)
-    name = re.sub('2ND ', 'SECOND ', name)
-    name = re.sub('3RD ', 'THIRD ', name)
-    name = re.sub('4TH ', 'FOURTH ', name)
-    name = re.sub('5TH ', 'FIFTH ', name)
+    name = re.sub(r" ONE($)| UNO($)| UN($)", " 1", name)
+    name = re.sub(r" TWO($)| DOS($)| DEUX($)", " 2", name)
+    name = re.sub(r" THREE($)| TRES($)| TROIS($)", " 3", name)
+    name = re.sub(r" FOUR($)| CUATRO($)| QUATRE($)", " 4", name)
+    name = re.sub(r" FIVE($)| CINCO($)| CINQ($)", " 5", name)
+    name = re.sub(r" SIX($)| SEIS($)", " 6", name)
+    name = re.sub(r" SEVEN($)| SIETE($)| SEPT($)", " 7", name)
+    name = re.sub(r" EIGHT($)| OCHO($)| HUIT($)", " 8", name)
+    name = re.sub(r" NINE($)| NUEVE($)| NEUF($)", " 9", name)
+    name = re.sub(r" TEN($)| DIEZ($)| DIX($)", " 10", name)
+    name = re.sub(r" ELEVEN($)| ONCE($)| ONZE($)", " 11", name)
+    name = re.sub(r" TWELVE($)| DOCE($)| DOUZE($)", " 12", name)
+    name = re.sub(r" THIRTEEN($)| TRECE($)| TREIZE($)", " 13", name)
+    name = re.sub(r" FOURTEEN($)| CATORCE($)| QUATORZE($)", " 14", name)
+    name = re.sub(r" FIFTEEN($)| QUINCE($)| QUINZE($)", " 15", name)
+
+    name = re.sub("1ST ", "FIRST ", name)
+    name = re.sub("2ND ", "SECOND ", name)
+    name = re.sub("3RD ", "THIRD ", name)
+    name = re.sub("4TH ", "FOURTH ", name)
+    name = re.sub("5TH ", "FIFTH ", name)
 
     #
     # Country specific appendix (S. Korea and China)
-    name = re.sub('\d+\s*HO($)', ' ', name)
-    name = re.sub('\d+\s*HAO($)', ' ', name)
+    name = re.sub(r"\d+\s*HO($)", " ", name)
+    name = re.sub(r"\d+\s*HAO($)", " ", name)
 
     #
     # Remove NO.s such in NO.5, NO5, NO:5, NO. 5, NO 5, N5, N-5 etc
-    name = re.sub('NO[^\w\s]*[\s]*(?=\d+)', '', name)
-    name = re.sub('[\s]+N[\W_0]*(?=\d+)', '', name)
-    name = re.sub('NO\.\s*(?=[^0-9]+)', '', name)
-    
+    name = re.sub(r"NO[^\w\s]*[\s]*(?=\d+)", "", name)
+    name = re.sub(r"[\s]+N[\W_0]*(?=\d+)", "", name)
+    name = re.sub(r"NO\.\s*(?=[^0-9]+)", "", name)
+
     #
     # Turn '&' to 'AND'
-    name = re.sub('(?<=[A-Z])\s+&\s+(?=[A-Z])', ' AND ', name)  # replace 'BLACK & WHITE' to 'BLACK AND WHITE'
-    
+    name = re.sub(
+        r"(?<=[A-Z])\s+&\s+(?=[A-Z])", " AND ", name
+    )  # replace 'BLACK & WHITE' to 'BLACK AND WHITE'
+
     #
     # Deromanization
-    vs = re.split('\s+|-|(?<=[A-Z]{3})\.',name)
+    vs = re.split(r"\s+|-|(?<=[A-Z]{3})\.", name)
     try:
         #
         # If last word from the name text has L/C/D/M then do not deromanize
-        if re.search('[LCDM]', vs[-1]).group(0): pass
-    except:
+        if re.search(r"[LCDM]", vs[-1]).group(0):
+            pass
+    except AttributeError:
         #
         # Try to deromanize the last word from the name text
         try:
             vs[-1] = roman.fromRoman(vs[-1])
             vs[-1] = str(int(vs[-1]))
-        except:
+        except roman.InvalidRomanNumeralError:
+            #
+            # No corresponding roman numeral found. Let's leave it as is.
             pass
-    
+
     #
     # Attach the deromanized digits to the end
-    name = ''.join(vs)
+    name = "".join(vs)
 
-        
     #
     # Now, remove all special characters
-    name = re.sub('[\W_]', '', name)
-    
+    name = re.sub(r"[\W_]", "", name)
+
     #
     # Check if the name starts with digits, if yes move it to the end
-    try: 
-        first_digit = re.search('^\d+', name).group(0)
-        name = re.sub('^\d+', '', name) + str(first_digit)
-    except:
-        pass
+    obj = re.search(r"^\d+", name)
+    if obj:
+        first_digit = obj.group(0)
+        name = re.sub(r"^\d+", "", name) + str(first_digit)
 
     #
     # Remove 0s from the numbers starting with 0s
-    try:
-        last_digit = re.search('\d+$', name).group(0)
-        non_zeros = re.sub('^0+', '', last_digit)
-        name = re.sub('\d+$', '', name) + str(non_zeros)
-    except:
-        pass
+    obj = re.search(r"\d+$", name)
+    if obj:
+        last_digit = obj.group(0)
+        non_zeros = re.sub("^0+", "", last_digit)
+        name = re.sub(r"\d+$", "", name) + str(non_zeros)
+
+    #
+    # Remove all excessive white spaces
+    name = re.sub(r"\s+", " ", name)
 
-    if name == '':
+    if name == "" or name == " ":
         return None
-    
-    return name
+    else:
+        return name
 
 
 def normalize_callsign(callsign):
+    """
+    Return a normalized International Radio Call Sign by removing non-essential
+    characters and ignoring meaningless call sign including 'NONE', 'UNKNOWN'
+
+    :param callsign: String, an original call sign
+    :return: String, a normalized call sign
+    """
 
-    if (callsign is None) | (callsign != callsign) | (callsign == '') | \
-            (callsign == "NONE") | (callsign == "UNKNOWN") | (callsign == "NIL") | (callsign == "NULL"):
+    if (
+        (callsign is None)
+        | (callsign != callsign)
+        | (callsign == "")
+        | (callsign == "NONE")
+        | (callsign == "UNKNOWN")
+        | (callsign == "NIL")
+        | (callsign == "NULL")
+    ):
         return None
 
     #
     # Turn to upper cases
     callsign = callsign.upper()
-    
+
     #
     # Remove nasty characters, white space
     try:
-        callsign = unidecode(str(callsign))  # get rid of nasty characters, but sometimes this fails
-    except:
+        #
+        # get rid of nasty characters, but sometimes this fails
+        callsign = unidecode(str(callsign))
+    except UnicodeDecodeError:
         try:
-            callsign = unidecode(str(callsign.decode('utf8')))
-        except:
-            callsign = unidecode(str(callsign.decode('iso_8859-1')))
+            callsign = unidecode(str(callsign.decode("utf8")))
+        except UnicodeDecodeError:
+            callsign = unidecode(str(callsign.decode("iso_8859-1")))
 
     callsign = callsign.strip()
-    callsign = re.sub('\s+',' ',callsign)
+    callsign = re.sub(r"\s+", " ", callsign)
 
     #
     # Get rid of all non-word characters
-    callsign = re.sub('[\W_]', '', callsign) 
-    
+    callsign = re.sub(r"[\W_]", "", callsign)
+
     #
     # Remove 0s from callsign starting with 0s
-    callsign = re.sub('^0+', '', callsign)
-    
-    if callsign == '':
-        return None
+    callsign = re.sub(r"^0+", "", callsign)
 
-    return callsign
\ No newline at end of file
+    if callsign == "":
+        return None
+    else:
+        return callsign
diff --git a/shipdataprocess/shiptype.py b/shipdataprocess/shiptype.py
index 70eff72..8ee5773 100644
--- a/shipdataprocess/shiptype.py
+++ b/shipdataprocess/shiptype.py
@@ -1,226 +1,305 @@
-import pandas as pd
+"""
+This file provides functions that process operations with regard to vessel
+types defined by Global Fishing Watch (There are about 40 ship types
+pre-defined). See here
+https://globalfishingwatch.org/datasets-and-code-vessel-identity/
+
+Last updates: 2022-01-25
+Jaeyoon Park
+"""
 import numpy as np
 
 
-
 def determine_shiptype(gears, shiptype_dict):
-    ''' 
-    determinte_shiptype module receives multiple types of ship and returns the most specific ship type.
-    
-    --------
-    ARGUMENT
-    --------
-    gears: SERIES, LIST, OR STR, single or multiple combination of ship types joined by '|' (OR) 
-    (examples: fixed_gear|set_longlines, cargo)
-    --------
-
-    ------
-    RETURN 
-    ------
-    STR or None, select the most detailed type among the ship types received if they are all in one category, 
+    """
+    This module receives multiple types of ship and returns the most specific
+    ship type in the pre-defined vessel classification hierarchy.
+    https://globalfishingwatch.org/datasets-and-code-vessel-identity/
+
+    :param gears: SERIES, LIST, OR STR, single or multiple combination of ship
+    type joined by '|' (OR) (examples: fixed_gear|set_longlines, cargo)
+    :param shiptype_dict: DICT, a geartype dictionary containing 'path'
+    information in the vessel class hierarchy
+    :return: STR or None, select the most detailed type among the ship types
+    received if they are all in one category,
     otherwise a combination of ship types.
-    (examples: fixed_gear|set_longlines -> set_longlines, trawler|fixed_gear|set_longlines -> trawler|set_longlines)
-    ------
-    '''
-    
+    (examples: fixed_gear|set_longlines -> set_longlines,
+    trawler|fixed_gear|set_longlines -> trawler|set_longlines)
+    """
 
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return None
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove Nones
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    
-    ### take only specific ones if there are several possibly duplicated ones (example: trawlers, trawlers|purse_seines)
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove Nones
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+
+    #
+    # take only specific ones if there are several possibly duplicated ones
+    # (example: trawlers, trawlers|purse_seines)
     gears = reduce_to_specifics_with_multiples(gears, shiptype_dict)
 
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories
+    # (broader/ ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    if final_value=='':
+    final_value = "|".join(gears)
+    if final_value == "":
         return None
     else:
         return final_value
 
 
-
 def determine_shiptype_simple(gears, shiptype_dict):
-    ''' 
-    same as determinte_shiptype module but without reducing multiple gears to specific (this is for testing).
-    '''
+    """
+    same as determine_shiptype module but without reducing multiple gears
+    to specific (this is for testing).
+
+    :param gears: SERIES, LIST, OR STR, single or multiple combination of
+    ship types joined by '|' (OR) (examples: fixed_gear|set_longlines, cargo)
+    :param shiptype_dict: DICT, ship type dictionary containing 'path' of
+    gear type in the hierarchy
+    :return: STR or None, select the most detailed type among the ship types
+    received if they are all in one category,
+    otherwise a combination of ship types.
+    (examples: fixed_gear|set_longlines -> set_longlines,
+    trawler|fixed_gear|set_longlines -> trawler|set_longlines)
+    """
 
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return None
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove Nones
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove Nones
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories (broader ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    if final_value=='':
+    final_value = "|".join(gears)
+    if final_value == "":
         return None
     else:
         return final_value
 
 
 def tag_confidence_level(x, c):
-    if (x==x)&(x!=None)&(x!=0)&(x!=''):
-        return str(c) + '-' + str(x)
+    """
+    Helper function to add confidence level to geartype
+
+    :param x: STRING, geartype
+    :param c: INT, confidence level (1 to 4)
+    :return: STRING, geartype attached with confidence level by a dash ('-')
+    """
+    if (x == x) & (x is not None) & (x != 0) & (x != ""):
+        return str(c) + "-" + str(x)
     else:
         return np.nan
 
 
 def determine_shiptype_with_confidence(gears, shiptype_dict):
-    ''' 
-    same as determine_shiptype but with confidence level taken into account
-    '''
-
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    """
+    same as the determine_shiptype module above
+    but with confidence level taken into account
+    """
+
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return np.nan
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove NaN/None
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    if len(gears)==0:
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove NaN/None
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+    if len(gears) == 0:
         return np.nan
-    
-    ### remove all gear values from lists of less confidence level
-    levels = [int(gear.split('-')[0]) for gear in gears]
-    if len(levels)>0:
+
+    #
+    # remove all gear values from lists of less confidence level
+    levels = [int(gear.split("-")[0]) for gear in gears]
+    if len(levels) > 0:
         highest_level = max(levels)
-        if (highest_level==3)&(2 in levels):
-            gears_3 = [gear.split('-')[1] for gear in gears if ('3' in gear)] 
-            gears_2 = [gear.split('-')[1] for gear in gears if ('2' in gear)] 
-            gears = [gear.split('-')[1] for gear in gears if ('2' in gear)|('3' in gear)]
+        if (highest_level == 3) & (2 in levels):
+            gears_3 = [gear.split("-")[1] for gear in gears if ("3" in gear)]
+            gears_2 = [gear.split("-")[1] for gear in gears if ("2" in gear)]
+            gears = [
+                gear.split("-")[1]
+                for gear in gears
+                if ("2" in gear) | ("3" in gear)
+            ]
         else:
-            gears = [gear.split('-')[1] for gear in gears if str(highest_level) in gear]
-
-    ### take only specific ones if there are several possibly duplicated ones (example: trawlers, trawlers|purse_seines)
+            gears = [
+                gear.split("-")[1]
+                for gear in gears
+                if str(highest_level) in gear
+            ]
+
+    #
+    # take only specific ones if there are several possibly duplicated ones
+    # (example: trawlers, trawlers|purse_seines)
     gears = reduce_to_specifics_with_multiples(gears, shiptype_dict)
 
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories (broader ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    
-    ### check the case of combination of level 2 and 3
-    if (highest_level==3)&(2 in levels):
+    final_value = "|".join(gears)
+
+    #
+    # check the case of combination of level 2 and 3
+    if (highest_level == 3) & (2 in levels):
         final_value_3 = determine_shiptype(gears_3, shiptype_dict)
         final_value_2 = determine_shiptype(gears_2, shiptype_dict)
-        if (not final_value in final_value_3)&(final_value in final_value_2):
+        if (final_value not in final_value_3) & (final_value in final_value_2):
             pass
-        else: 
+        else:
             final_value = final_value_3
-    
-    ### output
-    if final_value=='':
+
+    #
+    # output
+    if final_value == "":
         return np.nan
     else:
-        final_value = str(highest_level) + '-' + final_value
+        final_value = str(highest_level) + "-" + final_value
         return final_value
 
 
 def select_high_confidence_geartype(x, y, shiptype_dict):
-    '''return a geartype that has higher confidence level'''
-
-    if (x==x)&(x!=None)&(y==y)&(y!=None):
-        x_level = int(x.split('-')[0]) 
-        x_value = x.split('-')[1]
-        y_level = int(y.split('-')[0])
-        y_value = y.split('-')[1]
-        ## if x confidence level is higher, return x
+    """
+    Return a geartype that has higher confidence level
+
+    :param x: STRING, geartype attached with a confidence to compare
+    :param y: STRING, geartype attached with a confidence to compare
+    :param shiptype_dict: DICT, a geartype dictionary containing 'path'
+    info in the hierarchy
+    :return: STRING, geartype attached with a higher confidence between x and y
+    """
+
+    if (x == x) & (x is not None) & (y == y) & (y is not None):
+        x_level = int(x.split("-")[0])
+        x_value = x.split("-")[1]
+        y_level = int(y.split("-")[0])
+        y_value = y.split("-")[1]
+        #
+        # if x confidence level is higher, return x
         if x_level > y_level:
             return x
-        ## if confidence levels are the same, determine shiptype and return
+        #
+        # if confidence levels are the same, determine shiptype and return
         elif x_level == y_level:
-            return str(x_level) + '-' + determine_shiptype([x_value, y_value], shiptype_dict)
-        ## if y confidence level is higher, return y
+            return (
+                str(x_level)
+                + "-"
+                + determine_shiptype([x_value, y_value], shiptype_dict)
+            )
+        #
+        # if y confidence level is higher, return y
         else:
             return y
-    elif (x==x)&(x!=None):
+    elif (x == x) & (x is not None):
         return x
-    elif (y==y)&(y!=None):
+    elif (y == y) & (y is not None):
         return y
     else:
         return np.nan
 
 
-### function that makes geartype dictionary from shiptypes yaml file
 def make_shiptype_dict(shiptypes):
-    '''
-    This module returns a categorical dictionary of ship types from a ship type yml file received.
-    Values of the dictionary show where a specific ship type is situated in the ship type category tree. 
-
-    --------
-    ARGUMENT
-    --------
-    shiptypes: DICT, usually loaded from a .yml file that place categorically all possible ship types as a tree
-    --------
-
-    ------
-    RETURN
-    ------
-    shiptype_dict: DICT, shiptype categorical dictionary
-    (examples: (key, value) -> (set_longlines, (fishing, fixed_gear, set_longlines)))
-    ------
-    '''
-        
-    ### create a geartype dictionary where each gear has categorical information
+    """
+    This module returns a categorical dictionary of ship types
+    from a ship type yml file received. Values of the dictionary show
+    where a specific ship type is situated in the ship type category tree.
+
+    :param shiptypes: DICT, usually loaded from a .yml file that place
+    categorically all possible ship types as a tree
+    :return shiptype_dict: DICT, shiptype categorical dictionary
+    (examples:
+    (key, value) -> (set_longlines, (fishing, fixed_gear, set_longlines)))
+    """
+
+    #
+    # create a geartype dictionary where each gear has categorical information
     shiptype_dict = {}
     for stype in shiptypes:
         for l1 in shiptypes[stype]:
@@ -233,143 +312,169 @@ def make_shiptype_dict(shiptypes):
                             shiptype_dict[l3] = [stype, l1, l2, l3]
                             if shiptypes[stype][l1][l2][l3] is not None:
                                 for l4 in shiptypes[stype][l1][l2][l3]:
-                                    shiptype_dict[l4] = [stype, l1, l2, l3, l4]
-    
-    ### other_fishing, other_not_fishing, unknown_fishing can be replaced by other more specific gears
-    shiptype_dict['fishing'] = ['fishing']
-    shiptype_dict['non_fishing'] = ['non_fishing']
-    shiptype_dict['unknown'] = None
-    shiptype_dict[''] = None
-    
+                                    shiptype_dict[l4] = [
+                                        stype,
+                                        l1,
+                                        l2,
+                                        l3,
+                                        l4,
+                                    ]
+
+    #
+    # other_fishing, other_not_fishing, unknown_fishing
+    # can be replaced by other more specific gears
+    shiptype_dict["fishing"] = ["fishing"]
+    shiptype_dict["non_fishing"] = ["non_fishing"]
+    shiptype_dict["unknown"] = None
+    shiptype_dict[""] = None
+
     return shiptype_dict
 
 
-### function to choose only specific gear values if broader level values exist with specific level values
 def reduce_to_specifics(gears, shiptype_dict):
-    '''
-    this module reduces the list of gear values only to contain specific gear values if there are broader gear values together
-
-    --------
-    ARGUMENT
-    --------
-    gears: LIST of strings that are gear types predefined
-    --------
-
-    ------
-    RETURN
-    ------
-    values: LIST of string that are gear types predefined
-
-    '''
-    if len(gears)==0:
+    """
+    This module reduces the list of gear values only to contain specific
+    gear values if there are broader gear values together
+
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
+    if len(gears) == 0:
         return []
-    
-    ### reduce only single gear values
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-
-    ### mapped to shiptype dictionary values
-    gears_mapped = [shiptype_dict[gear] for gear in singles if shiptype_dict[gear]!=None]
-    
+
+    #
+    # reduce only single gear values
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    #
+    # mapped to shiptype dictionary values
+    gears_mapped = [
+        shiptype_dict[gear]
+        for gear in singles
+        if shiptype_dict[gear] is not None
+    ]
+
     temp = list(gears_mapped)
     for gear in gears_mapped:
-        others = [g for g in gears_mapped if g!=gear]
+        others = [g for g in gears_mapped if g != gear]
 
         for other in others:
-            ### see if the gear in question is a subset of anyone of the others, if true, remove it from the list
+            #
+            # see if the gear in question is a subset of anyone of the others,
+            # if true, remove it from the list
             if set(gear).issubset(other):
                 if gear in temp:
                     temp.remove(gear)
 
     gears_mapped = temp
-       
-    ### return only end values as in a list
+
+    #
+    # return only end values as in a list
     reduced = []
     for gear in gears_mapped:
         val = gear[-1]
         reduced.append(val)
     reduced = list(set(reduced))
     final = reduced + multiples
-    
-    return final
 
+    return final
 
 
 def reduce_to_specifics_with_multiples(gears, shiptype_dict):
-    if len(gears)==0:
+    """
+    Same as the function above but accepting multiple gears attached with '|'
+    """
+    if len(gears) == 0:
         return []
-    
-    ### reduce singles to specifics if possible
+
+    #
+    # reduce singles to specifics if possible
     gears = reduce_to_specifics(gears, shiptype_dict)
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-    
-    if len(multiples)>0:
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    if len(multiples) > 0:
         for multiple in multiples:
-            flags=[]
-            elems = multiple.split('|')
-            
+            flags = []
+            elems = multiple.split("|")
+
             for elem in elems:
-                ### look at elements of multiples if they can be reduced to specifics with single values
-                vals = [reduce_to_specifics([elem, single], shiptype_dict) for single in singles \
-                        if len(reduce_to_specifics([elem, single], shiptype_dict))==1]
-                if len(vals)==1:
+                #
+                # look at elements of multiples
+                # if they can be reduced to specifics
+                # with single values
+                vals = [
+                    reduce_to_specifics([elem, single], shiptype_dict)
+                    for single in singles
+                    if len(reduce_to_specifics([elem, single], shiptype_dict))
+                    == 1
+                ]
+                if len(vals) == 1:
                     flags.append(1)
                     reduced = vals[0]
                 else:
                     flags.append(0)
 
-            ### if it can be reduced, then remove this multiple and put this reduced values
-            if sum(flags)==1:
+            #
+            # if it can be reduced, then remove this multiple
+            # and put this reduced values
+            if sum(flags) == 1:
                 gears.remove(multiple)
                 gears = gears + reduced
-    
-    ### final clearing-up
+
+    #
+    # final clearing-up
     gears = reduce_to_specifics(gears, shiptype_dict)
-    
+
     return gears
 
 
 def reduce_to_general(gears, shiptype_dict):
-    '''
-    this module reduces the list of gear values only to contain general geartype values
-
-    --------
-    ARGUMENT
-    --------
-    gears: LIST of strings that are gear types predefined
-    --------
-
-    ------
-    RETURN
-    ------
-    values: LIST of string that are gear types predefined
+    """
+    This module reduces the list of gear values only to contain general
+    geartype values
 
-    '''
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
 
-    if len(gears)==0:
+    if len(gears) == 0:
         return []
-    
-    ### reduce only single gear values
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
 
-    ### mapped to shiptype dictionary values
-    gears_mapped = [shiptype_dict[gear] for gear in singles if shiptype_dict[gear]!=None]
+    #
+    # reduce only single gear values
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    #
+    # mapped to shiptype dictionary values
+    gears_mapped = [
+        shiptype_dict[gear]
+        for gear in singles
+        if shiptype_dict[gear] is not None
+    ]
 
     temp = list(gears_mapped)
     for gear in gears_mapped:
-        others = [g for g in gears_mapped if g!=gear]
+        others = [g for g in gears_mapped if g != gear]
 
         for other in others:
-            ### see if anyone of the others is a subset of gear in question, if true, remove the gear (more detailed one) from the list
+            #
+            # see if anyone of the others is a subset of gear in question,
+            # if true, remove the gear (more detailed one) from the list
             if set(other).issubset(gear):
                 if gear in temp:
                     temp.remove(gear)
-       
+
     gears_mapped = temp
-    
-    ### return only end values as in a list
+
+    #
+    # return only end values as in a list
     reduced = []
     for gear in gears_mapped:
         val = gear[-1]
@@ -382,65 +487,94 @@ def reduce_to_general(gears, shiptype_dict):
 
 
 def reduce_to_general_with_multiples(gears, shiptype_dict):
-    '''
-    returns general (less detailed) gear types only if gear values can be reduced according to shiptype yaml file
-    '''
-    
-    if len(gears)==0:
+    """
+    Returns general (less detailed) gear types
+    only if gear values can be reduced according to shiptype yaml file
+
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
+
+    if len(gears) == 0:
         return []
-    
-    ### reduce singles to specifics if possible
+
+    #
+    # reduce singles to specifics if possible
     gears = reduce_to_general(gears, shiptype_dict)
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-    
-    if len(multiples)>0:
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    if len(multiples) > 0:
         for multiple in multiples:
-            flags=[]
-            elems = multiple.split('|')
-            
+            flags = []
+            elems = multiple.split("|")
+
             for elem in elems:
-                ### look at elements of multiples if they can be reduced to specifics with single values
-                vals = [reduce_to_general([elem, single], shiptype_dict) for single in singles \
-                        if len(reduce_to_general([elem, single], shiptype_dict))==1]
-                if len(vals)==1:
+                #
+                # look at elements of multiples if they can be reduced
+                # to specifics with single values
+                vals = [
+                    reduce_to_general([elem, single], shiptype_dict)
+                    for single in singles
+                    if len(reduce_to_general([elem, single], shiptype_dict))
+                    == 1
+                ]
+                if len(vals) == 1:
                     flags.append(1)
                     reduced = vals[0]
                 else:
                     flags.append(0)
-            
-            ### if it can be reduced, then remove this multiple and put this reduced values
-            if sum(flags)>0:
+
+            #
+            # if it can be reduced, then remove this multiple
+            # and put this reduced values
+            if sum(flags) > 0:
                 gears.remove(multiple)
                 gears = gears + reduced
 
-    ### final clearing-up
+    #
+    # final clearing-up
     gears = reduce_to_general(gears, shiptype_dict)
-    
-    return gears
 
+    return gears
 
 
 def is_fishing_vessel(gear, shiptype_dict):
-    if (gear=='')|(gear==None)|(gear!=gear):
+    """
+    A function that determines if the given vessel class is a fishing vessel
+
+    :param gear: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: BOOL, whether the vessel is a fishing vessel
+    """
+    if (gear == "") | (gear is None) | (gear != gear):
         return None
 
     else:
-        gear = gear.replace(' ','')
-        gear_mapped=[]
-        gears = gear.split('|')
-        
-        ## create a list of gears mapped to 0s (non-fishing gear) or 1s (fishing gear)
+        gear = gear.replace(" ", "")
+        gear_mapped = []
+        gears = gear.split("|")
+
+        #
+        # create a list of gears mapped to
+        # 0s (non-fishing gear) or 1s (fishing gear)
         for gear in gears:
-            if shiptype_dict[gear][0]=='fishing':
+            if shiptype_dict[gear][0] == "fishing":
                 gear_mapped.append(1)
             else:
                 gear_mapped.append(0)
-        if np.prod(gear_mapped)==1: ## if all mapped gears are 1s (therefore fishing vessel)
+        if (
+            np.prod(gear_mapped) == 1
+        ):  # if all mapped gears are 1s (therefore fishing vessel)
             isfishingvessel = True
-        elif sum(gear_mapped)==0: ## if all mapped gears are 0s (therefore non-fishing vessel)
+        elif (
+            sum(gear_mapped) == 0
+        ):  # if all mapped gears are 0s (therefore non-fishing vessel)
             isfishingvessel = False
-        else: ## not determinable, return None
+        else:  # not determinable, return None
             return None
-            
+
     return isfishingvessel
diff --git a/shipdataprocess/standardize.py b/shipdataprocess/standardize.py
index 67108c4..e2a41dc 100644
--- a/shipdataprocess/standardize.py
+++ b/shipdataprocess/standardize.py
@@ -5,10 +5,27 @@
 import pandas as pd
 import numpy as np
 import re
-from django.utils.encoding import smart_str
 from unidecode import unidecode
 
 
+def smart_str(s):
+    """
+    This module finds the right encoding of the given string
+
+    :param s: STRING, a text in which we do not know the type of encoding
+    :return: STRING, standardized string
+    """
+    if issubclass(type(s), str):
+        return s
+    if isinstance(s, bytes):
+        try:
+            str(s, "utf-8", "strict")
+        except UnicodeDecodeError:
+            str(s, "iso-8859-1", "strict")
+    else:
+        return str(s)
+
+
 def imo_checksum(n):
     """
     This function for IMO numbers that are designed as 7-digit integer number
@@ -32,12 +49,14 @@ def imo_checksum(n):
 
     #
     # IMO checksum formula
-    if ((n // 1000000 % 10) * 7 +
-            (n // 100000 % 10) * 6 +
-            (n // 10000 % 10) * 5 +
-            (n // 1000 % 10) * 4 +
-            (n // 100 % 10) * 3 +
-            (n // 10 % 10) * 2) % 10 == (n % 10):
+    if (
+        (n // 1000000 % 10) * 7
+        + (n // 100000 % 10) * 6
+        + (n // 10000 % 10) * 5
+        + (n // 1000 % 10) * 4
+        + (n // 100 % 10) * 3
+        + (n // 10 % 10) * 2
+    ) % 10 == (n % 10):
         return True
     else:
         return False
@@ -47,7 +66,7 @@ def standardize_imo(elem, check_field=True):
     """
     Standardize IMO numbers (ignore all letters and characters but numbers)
     If it comes with pandas Series or DataFrame, make sure
-    it saves IMO numbers in STRING, as pandas Seires or DataFrame usually
+    it saves IMO numbers in STRING, as pandas Series or DataFrame usually
     turn INTEGER to FLOAT in the presence of NULL in the same column.
 
     :param elem: Pandas Series, Series that contains a string field
@@ -58,69 +77,87 @@ def standardize_imo(elem, check_field=True):
     if check_field:
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: re.sub(r'[^\d\.]', '', str(x))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                lambda x: re.sub(r"[^\d.]", "", str(x))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(
                 lambda x: str(int(float(x)))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(lambda x: x if imo_checksum(x) else None)
             return elem
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: re.sub(r'[^\d\.]', '', str(x))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                lambda x: re.sub(r"[^\d.]", "", str(x))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(
                 lambda x: str(int(float(x)))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(lambda x: x if imo_checksum(x) else None)
             return elem
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return None
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            elem = re.sub(r'[^\d\.]', '', str(elem))
+            elem = re.sub(r"[^\d.]", "", str(elem))
             if elem == "":
                 return None
             else:
                 elem = str(int(float(elem)))
-                if checksum(elem):
+                if imo_checksum(elem):
                     return elem
                 else:
                     return None
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize floating numbers. 
-# Make sure to remove all comma separators (,). 
-#
 def standardize_float(elem, check_field=True):
+    """
+    This module standardizes floating numbers.
+    Make sure to remove all comma separators (,).
+
+    :param elem: Pandas Series, DataFrame, STR, FLOAT, INT, types
+    that contain a string field
+    :param check_field: Boolean, field that contains a float number
+    :return: Same type as the elem input
+    """
     if check_field:
         if type(elem) == pd.core.series.Series:
             return elem.apply(
-                lambda x: float(str(x).replace(',', ''))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else np.nan)
+                lambda x: float(str(x).replace(",", ""))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else np.nan
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             return elem[check_field].apply(
-                lambda x: float(str(x).replace(',', ''))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else np.nan)
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+                lambda x: float(str(x).replace(",", ""))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else np.nan
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return np.nan
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            return float(str(elem).replace(',', ''))
+            return float(str(elem).replace(",", ""))
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return np.nan
 
 
 def smart_upper(text):
     """
-    Selective upper sensitive to upper/lower cases
-    when it's related to URLs
-    Source: https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
+    Selective upper sensitive to upper/lower cases, particularly
+    when it's related to URLs, do not turn the URL to upper cases
+    Source: "https://stackoverflow.com/questions/6038061/
+    regular-expression-to-find-urls-within-a-string"
 
     :param text: String, giv en text
     :return: String, Upper cased text except the URL part
@@ -129,7 +166,10 @@ def smart_upper(text):
     #
     # Find URLs in the given string and upper-case only the other texts
     # to preserve caps of URLs
-    regex_for_url = r"((http|ftp|https)\:\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
+    regex_for_url = (
+        r"((http|ftp|https)\:\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))"
+        r"([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
+    )
     matched = re.finditer(regex_for_url, text)
     prev_end = 0
     for m in matched:
@@ -137,11 +177,9 @@ def smart_upper(text):
         start = m.start()
         end = m.end()
 
-        text = \
-            text[:prev_end] + \
-            text[prev_end:start].upper() + \
-            url + \
-            text[end:]
+        text = (
+            text[:prev_end] + text[prev_end:start].upper() + url + text[end:]
+        )
         prev_end = end
 
     text = text[:prev_end] + text[prev_end:].upper()
@@ -163,158 +201,310 @@ def standardize_str(elem, check_field=True):
     if check_field:
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: smart_upper(re.sub(r'\s+', ' ', smart_str(x)).strip())
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: smart_upper(
+                    re.sub(r"\s+", " ", smart_str(x)).strip()
+                )
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: smart_upper(re.sub(r'\s+', ' ', smart_str(x)).strip())
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: smart_upper(
+                    re.sub(r"\s+", " ", smart_str(x)).strip()
+                )
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return None
         elif type(elem) == str:
-            return smart_upper(re.sub(r'\s+', ' ', elem).strip())
+            return smart_upper(re.sub(r"\s+", " ", elem).strip())
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize owner's names. Remove all variations of CO. LTD or similar types of suffixes
-# and unionize all "fishery' to "fisheries".
-#
 def standardize_owner(elem, check_field=True):
+    """
+    This module standardizes owner's names which removes all variations of
+    suffix such as CO. LTD or similar types
+    and unionize "fishery' to "fisheries".
+
+    :param elem: Pandas Series, DataFrame, STRING, a data type
+    that contains a string field
+    :param check_field: Boolean, field that contains the given strings
+    :return:
+    """
     if check_field:
         elem = standardize_str(elem, check_field)
 
-        text_to_remove = \
-            ['CO LTD', 'COLTD', 'COMPANY LTD', 'CO LIMITED', 'COMPANY LIMITED', 'CO LIMTED', 'CO LTTD', 'CV LIMITADA',
-             'LTD SA($)', 'LTD S A($)', 'CO SA($)', 'CO S A($)', 'CO AB($)', 'CO A B($)', 'CO PTY LTD($)', 'CO LRD($)',
-             'PTY LIMITED($)', 'PTY LTD($)', 'SA PTY LTD($)', 'CORP LTD($)', 'LTDA EPP($)', 'JOINT STOCK COMPANY($)',
-             'JOINTSTOCK COMPANY($)', 'CORPORATION PTE LTD($)', 'CORPORATION PTE($)', 'CORP PTE($)', 'CORP SA($)',
-             'CORP INC($)', 'CORPORATION($)', 'CORP($)', 'INCORPORATED($)', 'INC($)', 'AP PTE LTD', 'CO PTE LTD',
-             'GMBH CO', 'GMBH($)', 'LTD($)', 'LTDA($)', 'LIMITED($)', 'PTE($)', 'LIMITADA($)', 'LDA($)', 'LLC($)',
-             'COMPANY NV($)', 'COMPANY N V($)', 'COMPANY BV($)', 'COMPANY B V($)', 'CO BV($)', 'CO B V($)', 'CO NV($)',
-             'CO N V($)', 'SA DE CV($)', 'S A DE C V($)', 'SCL DE CV($)', 'S C L DE C V($)', 'SCL($)', 'S C L($)',
-             'S C DE R L($)', 'S R L DE C V($)', 'SAC($)', 'S A C($)', 'EIRL($)', 'E I R L($)', 'SRL($)', 'S R L($)',
-             ' CIA($)', 'EURL($)', '(^)EURL', 'SARL($)', '(^)SARL', 'SNC($)', '(^)SNC', 'SPC($)', '(^)SPC', 'SPA($)',
-             'SAS($)', ' SA($)', ' S A($)', ' SL($)', ' S L($)', ' SC($)', ' S C($)', 'CO WLL($)', 'CO LIB($)',
-             ' AS($)', ' A S($)', 'PJSC($)', 'P JSC($)', 'OJSC($)', 'CJSC($)' 'JSC($)', ' EPP($)', ' CB($)', ' C B($)',
-             ' CA($)', ' C A($)', ' GIE($)', 'KABUSHIKI KAISHA($)', ' KK($)', 'K K($)', ' BV($)', ' B V($)',
-             'YUGEN KAISHA', 'YUGEN', 'KAISHA', 'KAISYA', 'YUGEN KAISYA', 'GYOGYO', 'GYOGYOU', 'GAISHA', ' JU($)',
-             'OOO($)', '(^)OOO', 'CO PVT($)', 'COMPANY PVT($)', ' PT($)', ' P T($)', '(^)PT', ' CC($)',
-             ' CO($)',  'COMPANY($)', ' NV($)', ' N V($)', '^NA($)', '^N A($)', 'RPTD SOLD.*', 'OWNER UNKNOWN*',
-             'CO LT', 'EHF($)', '(^)EHF']
-        text_to_remove = '|'.join(text_to_remove)
+        text_to_remove = [
+            "CO LTD",
+            "COLTD",
+            "COMPANY LTD",
+            "CO LIMITED",
+            "COMPANY LIMITED",
+            "CO LIMTED",
+            "CO LTTD",
+            "CV LIMITADA",
+            "LTD SA($)",
+            "LTD S A($)",
+            "CO SA($)",
+            "CO S A($)",
+            "CO AB($)",
+            "CO A B($)",
+            "CO PTY LTD($)",
+            "CO LRD($)",
+            "PTY LIMITED($)",
+            "PTY LTD($)",
+            "SA PTY LTD($)",
+            "CORP LTD($)",
+            "LTDA EPP($)",
+            "JOINT STOCK COMPANY($)",
+            "JOINTSTOCK COMPANY($)",
+            "CORPORATION PTE LTD($)",
+            "CORPORATION PTE($)",
+            "CORP PTE($)",
+            "CORP SA($)",
+            "CORP INC($)",
+            "CORPORATION($)",
+            "CORP($)",
+            "INCORPORATED($)",
+            "INC($)",
+            "AP PTE LTD",
+            "CO PTE LTD",
+            "GMBH CO",
+            "GMBH($)",
+            "LTD($)",
+            "LTDA($)",
+            "LIMITED($)",
+            "PTE($)",
+            "LIMITADA($)",
+            "LDA($)",
+            "LLC($)",
+            "COMPANY NV($)",
+            "COMPANY N V($)",
+            "COMPANY BV($)",
+            "COMPANY B V($)",
+            "CO BV($)",
+            "CO B V($)",
+            "CO NV($)",
+            "CO N V($)",
+            "SA DE CV($)",
+            "S A DE C V($)",
+            "SCL DE CV($)",
+            "S C L DE C V($)",
+            "SCL($)",
+            "S C L($)",
+            "S C DE R L($)",
+            "S R L DE C V($)",
+            "SAC($)",
+            "S A C($)",
+            "EIRL($)",
+            "E I R L($)",
+            "SRL($)",
+            "S R L($)",
+            " CIA($)",
+            "EURL($)",
+            "(^)EURL",
+            "SARL($)",
+            "(^)SARL",
+            "SNC($)",
+            "(^)SNC",
+            "SPC($)",
+            "(^)SPC",
+            "SPA($)",
+            "SAS($)",
+            " SA($)",
+            " S A($)",
+            " SL($)",
+            " S L($)",
+            " SC($)",
+            " S C($)",
+            "CO WLL($)",
+            "CO LIB($)",
+            " AS($)",
+            " A S($)",
+            "PJSC($)",
+            "P JSC($)",
+            "OJSC($)",
+            "CJSC($)" "JSC($)",
+            " EPP($)",
+            " CB($)",
+            " C B($)",
+            " CA($)",
+            " C A($)",
+            " GIE($)",
+            "KABUSHIKI KAISHA($)",
+            " KK($)",
+            "K K($)",
+            " BV($)",
+            " B V($)",
+            "YUGEN KAISHA",
+            "YUGEN",
+            "KAISHA",
+            "KAISYA",
+            "YUGEN KAISYA",
+            "GYOGYO",
+            "GYOGYOU",
+            "GAISHA",
+            " JU($)",
+            "OOO($)",
+            "(^)OOO",
+            "CO PVT($)",
+            "COMPANY PVT($)",
+            " PT($)",
+            " P T($)",
+            "(^)PT",
+            " CC($)",
+            " CO($)",
+            "COMPANY($)",
+            " NV($)",
+            " N V($)",
+            "^NA($)",
+            "^N A($)",
+            "RPTD SOLD.*",
+            "OWNER UNKNOWN*",
+            "CO LT",
+            "EHF($)",
+            "(^)EHF",
+        ]
+        text_to_remove = "|".join(text_to_remove)
 
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: unidecode(re.sub(r'\(.+\)', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"\(.+\)", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: unidecode(re.sub(r'[^\w]+', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"[^\w]+", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: re.sub(text_to_remove, ' ', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(text_to_remove, " ", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: re.sub(r'\s+', ' ', x).strip() if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(r"\s+", " ", x).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem.apply(
-                lambda x: re.sub('FISHERY', 'FISHERIES', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub("FISHERY", "FISHERIES", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: unidecode(re.sub(r'\(.+\)', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"\(.+\)", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: unidecode(re.sub(r'[^\w]+', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"[^\w]+", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: re.sub(text_to_remove, ' ', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(text_to_remove, " ", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: re.sub(r'\s+', ' ', x).strip() if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(r"\s+", " ", x).strip()
+                if (x == x) & (x is not None) * (x != "")
+                else None
+            )
             return elem[check_field].apply(
-                lambda x: re.sub('FISHERY', 'FISHERIES', x) if (x == x) & (x != None) * (x != '') else None)
-        elif (elem != elem) | (elem == None) | (elem == '') | (elem == 0):
+                lambda x: re.sub("FISHERY", "FISHERIES", x)
+                if (x == x) & (x is not None) * (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return np.nan
         elif type(elem) == str:
-            elem = unidecode(re.sub(r'\(.+\)', ' ', elem)).strip()
-            elem = unidecode(re.sub(r'[^\w]+', ' ', elem)).strip()
-            elem = re.sub(text_to_remove, ' ', elem)
-            elem = re.sub(r'\s+', ' ', elem).strip()
-            return re.sub('FISHERY', 'FISHERIES', elem)
+            elem = unidecode(re.sub(r"\(.+\)", " ", elem)).strip()
+            elem = unidecode(re.sub(r"[^\w]+", " ", elem)).strip()
+            elem = re.sub(text_to_remove, " ", elem)
+            elem = re.sub(r"\s+", " ", elem).strip()
+            return re.sub("FISHERY", "FISHERIES", elem)
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize Integer in a form of string
-# because Pandas Series or DataFrame considers
-# a column of integers with Nulls as a column of float
-# Save it as a string column so that it can be uploaded
-# as integer columns when uploading to BigQuery.
-#
 def standardize_int_str(elem, check_field=True):
+    """
+    This module standardizes an integer in the form of string
+    because Pandas Series or DataFrame considers a column of integers
+    with Nulls as a column of float. Save it as a string column so that
+    it can be uploaded as integer columns when uploading to BigQuery.
+
+    :param elem: Pandas Series, DataFrame, STRING, INT, FLOAT, a data type
+    that contains a string field
+    :param check_field: Boolean, field that contains the given strings
+    :return: Same as the input elem type
+    """
     if check_field:
         if type(elem) == pd.core.series.Series:
             return elem.apply(
-                lambda x: str(int(float(re.sub('[^\d\.]', '', str(x)))))
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: str(int(float(re.sub(r"[^\d.]", "", str(x)))))
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             return elem[check_field].apply(
-                lambda x: str(int(float(re.sub('[^\d\.]', '', str(x)))))
-                if (x == x) & (x is not None) & (x != '') else None)
-        elif (elem != elem) | (elem is None) | (elem == ''):
+                lambda x: str(int(float(re.sub(r"[^\d.]", "", str(x)))))
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == ""):
             return None
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            return str(int(float(re.sub(r'[^\d\.]', '', str(elem)))))
+            return str(int(float(re.sub(r"[^\d.]", "", str(elem)))))
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize timestamp
-#
 def standardize_time(elem, check_field=True):
-    if check_field:
-        if type(elem)==pd.core.series.Series:
-            return elem.apply(lambda x: pd.to_datetime(x, errors='coerce') if (x==x)&(x!=None)&(x!='') else None)
-        elif type(elem)==pd.core.frame.DataFrame:
-            return elem[check_field].apply(lambda x: pd.to_datetime(x, errors='coerce') if (x==x)&(x!=None)&(x!='') else None)
-        elif (elem!=elem)|(elem==None)|(elem=='')|(elem==0):
-            return np.nan
-        elif (type(elem)==str)|(type(elem)==pd.Timestamp):
-            return pd.to_datetime(elem, errors='coerce')
-        else:
-            raise ValueError('Unknown type received')
-    else:
-        return None
-
-
-def clean_uvi(x):
-    if (type(x)==float)|(type(x)==int):
-        if (not np.isnan(x))&(x==x)&(x!=None):
-            return str(int(x))
-        else:
-            return np.nan
-    else:
-        return re.sub('\s+', ' ', x).strip().upper()
+    """
+    This modules standardizes a timestamp
 
+    :param elem: Pandas DATAFRAME, SERIES, STRING, a data type containing
+    time stamp information
+    :param check_field: Boolean, whether the field that contains
+    the timestamp information
+    :return: Same type as the elem input
+    """
 
-def standardize_uvi(elem, check_field=True):
     if check_field:
-        if type(elem)==pd.core.series.Series:
-            return elem.apply(lambda x: clean_uvi(x))  
-        elif type(elem)==pd.core.frame.DataFrame:
-            return elem[check_field].apply(lambda x: clean_uvi(x))
-        elif (elem!=elem)|(elem==None)|(elem=='')|(elem==0):
-            return None
-        elif (type(elem)==int)|(type(elem)==float):
-            return str(int(elem))
-        elif type(elem)==str:
-            return re.sub('\s+',' ',elem).strip().upper()
+        if type(elem) == pd.core.series.Series:
+            return elem.apply(
+                lambda x: pd.to_datetime(x, errors="coerce")
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif type(elem) == pd.core.frame.DataFrame:
+            return elem[check_field].apply(
+                lambda x: pd.to_datetime(x, errors="coerce")
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
+            return np.nan
+        elif (type(elem) == str) | (type(elem) == pd.Timestamp):
+            return pd.to_datetime(elem, errors="coerce")
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
@@ -323,10 +513,10 @@ def standardize_flag(df, field, rules):
     """
     Flag mapping based on YAML mapping file per registry
 
-    :param df:
-    :param field:
-    :param rules:
-    :return:
+    :param df: Pandas DataFrame, a dataframe containing flag information field
+    :param field: STRING, the name of the field containing flag information
+    :param rules: DICT, the YAML mapping rule
+    :return: Pandas Series or STRING
     """
     if field:
         if rules:
@@ -334,17 +524,19 @@ def standardize_flag(df, field, rules):
             # In case it's explicitly "ALL" as an option,
             # returns the preset value
             if "ALL" in rules:
-                return rules['ALL']
+                return rules["ALL"]
             #
             # If it's "SAME" option, use the values in the flag field
-            elif 'SAME' in rules:
+            elif "SAME" in rules:
                 return df[field]
             #
             # iso3 country code - note that all is turned to upper cases
             else:
                 return df[field].apply(
                     lambda x: rules[unidecode(str(x)).strip().upper()]
-                    if (x == x) & (x is not None) & (x != '') else None)
+                    if (x == x) & (x is not None) & (x != "")
+                    else None
+                )
         else:
             return None
     else:
@@ -355,16 +547,17 @@ def standardize_geartype(df, field, rules):
     """
     Geartype mapping  based on YAML mapping file per registry
 
-    :param df:
-    :param field:
-    :param rules:
-    :return:
+    :param df: Pandas DataFrame, a DataFrame containing geartype
+    information field
+    :param field: STRING, the name of the field containing geartype information
+    :param rules: DICT, the YAML mapping rule
+    :return: Pandas Series or STRING
     """
     if field:
         if rules:
-            if 'ALL' in rules:
-                return rules['ALL']
-            elif 'SAME' in rules:
+            if "ALL" in rules:
+                return rules["ALL"]
+            elif "SAME" in rules:
                 return df[field]
             #
             # note that when mapping geartype,
@@ -372,8 +565,40 @@ def standardize_geartype(df, field, rules):
             else:
                 return df[field].apply(
                     lambda x: rules[unidecode(str(x)).strip().lower()]
-                    if (x == x) & (x is not None) & (x != '') else None)
+                    if (x == x) & (x is not None) & (x != "")
+                    else None
+                )
         else:
             return None
     else:
         return None
+
+
+#
+# Below is not used.
+# def clean_uvi(x):
+#     if (type(x) == float) | (type(x) == int):
+#         if (not np.isnan(x)) & (x == x) & (x is not None):
+#             return str(int(x))
+#         else:
+#             return np.nan
+#     else:
+#         return re.sub("\s+", " ", x).strip().upper()
+#
+#
+# def standardize_uvi(elem, check_field=True):
+#     if check_field:
+#         if type(elem) == pd.core.series.Series:
+#             return elem.apply(lambda x: clean_uvi(x))
+#         elif type(elem) == pd.core.frame.DataFrame:
+#             return elem[check_field].apply(lambda x: clean_uvi(x))
+#         elif (elem != elem) | (elem == None) | (elem == "") | (elem == 0):
+#             return None
+#         elif (type(elem) == int) | (type(elem) == float):
+#             return str(int(elem))
+#         elif type(elem) == str:
+#             return re.sub("\s+", " ", elem).strip().upper()
+#         else:
+#             raise ValueError("Unknown type received")
+#     else:
+#         return None
diff --git a/tests/test_normalize_shipname.py b/tests/test_normalize_shipname.py
index e7779f5..497fa15 100644
--- a/tests/test_normalize_shipname.py
+++ b/tests/test_normalize_shipname.py
@@ -1,49 +1,98 @@
 from shipdataprocess.normalize import normalize_shipname
 
+
 def test_normalize_shipname_none():
     result = normalize_shipname(None)
-    assert result == None
+    assert result is None
+
 
 def test_normalize_shipname_upcase():
     result = normalize_shipname("MixEd")
     assert result == "MIXED"
 
+
+def test_normalize_shipname_num():
+    result = normalize_shipname(123456)
+    assert result == "123456"
+
+
+def test_normalize_shipname_float():
+    result = normalize_shipname(123.456)
+    assert result is None
+
+
 def test_normalize_shipname_symbols():
     result = normalize_shipname("weird -+%()<>$;!&'`\\.#/")
     assert result == "WEIRD"
 
+
 def test_normalize_shipname_spaces():
     result = normalize_shipname("  \tspaced  \nname      ")
     assert result == "SPACEDNAME"
 
+
 def test_normalize_shipname_FB():
     result = normalize_shipname("f/b boat f/v othername")
     assert result == "BOATOTHERNAME"
 
+
 def test_normalize_shipname_RV():
     result = normalize_shipname("r/v boat othername")
     assert result == "BOATOTHERNAME"
 
+
 def test_normalize_shipname_nodot():
     result = normalize_shipname("no. boat")
     assert result == "BOAT"
 
+
 def test_normalize_shipname_nonumber():
     result = normalize_shipname("no537 boat")
-    assert result == 'BOAT537'
+    assert result == "BOAT537"
+
 
 def test_normalize_shipname_romans():
     result = normalize_shipname("boat IX")
     assert result == "BOAT9"
 
+
 def test_normalize_shipname_empty():
     result = normalize_shipname("")
-    assert result == None
+    assert result is None
+
+
+def test_normalize_shipname_empty_space():
+    result = normalize_shipname(" ")
+    assert result is None
+
 
 def test_normalize_shipname_1c():
     result = normalize_shipname("a")
     assert result == "A"
 
+
 def test_normalize_shipname_no():
     result = normalize_shipname("no")
     assert result == "NO"
+
+
+#
+# Below are added in Jan 2022 for encoding tests
+def test_normalize_shipname_utf8():
+    result = normalize_shipname("ÆØÅæøå")
+    assert result == "AEOAAEOA"
+
+
+def test_normalize_shipname_utf8_b():
+    result = normalize_shipname("ÇÊÎŞÛ")
+    assert result == "CEISU"
+
+
+def test_normalize_shipname_utf8_encoded():
+    result = normalize_shipname(b"pyth\xc3\xb6n!")
+    assert result == "PYTHON"
+
+
+def test_normalize_shipname_latin_encoded():
+    result = normalize_shipname(b"\xe1")
+    assert result == "A"

From 29a70d0152719fae42c9d7596b90936f0349f295 Mon Sep 17 00:00:00 2001
From: jaeyoonpark <jaeyoon.park13@gmail.com>
Date: Wed, 26 Jan 2022 13:30:35 +0100
Subject: [PATCH 2/4] update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5c63ab8..355a3ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ sdist/
 var/
 wheels/
 *.egg-info/
+/*.egg-info/*
 .installed.cfg
 *.egg
 MANIFEST

From 3bfc2bbfcc814ba25fba9a681ced179ee5e343cc Mon Sep 17 00:00:00 2001
From: jaeyoonpark <jaeyoon.park13@gmail.com>
Date: Wed, 26 Jan 2022 14:05:25 +0100
Subject: [PATCH 3/4] update 0.7.0

---
 CHANGES.md                               |   3 +-
 build/lib/shipdataprocess/__init__.py    |  10 +-
 build/lib/shipdataprocess/collapse.py    | 192 +++++--
 build/lib/shipdataprocess/normalize.py   | 325 ++++++-----
 build/lib/shipdataprocess/shiptype.py    | 658 ++++++++++++++---------
 build/lib/shipdataprocess/standardize.py | 529 ++++++++++++------
 shipdataprocess/__init__.py              |   2 +-
 7 files changed, 1106 insertions(+), 613 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index e6711a1..2129f6f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -38,5 +38,4 @@ v0.6.15, 2020-11-06 -- Fix a bug in normalize_shipname() and normalize_callsign(
 v0.6.16, 2020-11-26 -- Make smart_upper() to capture multiple URLs not to capitalize them
 v0.6.17, 2021-07-30 -- Add Indonesian prefix and Chinese HAO
 v0.6.18, 2021-08-04 -- Fix a bug in normalize_callsign() regarding NULL/NONE
-
-
+v0.7.0, 2022-01-26 -- Fix it to work only in Python 3.6 or above, codes are compliant with PEP8, dependencies are clearer (Django removed)
diff --git a/build/lib/shipdataprocess/__init__.py b/build/lib/shipdataprocess/__init__.py
index 7b322e5..444767d 100644
--- a/build/lib/shipdataprocess/__init__.py
+++ b/build/lib/shipdataprocess/__init__.py
@@ -3,15 +3,15 @@
 """
 
 
-__version__ = '0.6.18'
-__author__ = 'Jaeyoon Park'
-__email__ = 'jaeyoon.park13@gmail.com'
-__source__ = 'https://github.com/GlobalFishingWatch/shipdataprocess'
+__version__ = "0.7.0"
+__author__ = "Jaeyoon Park"
+__email__ = "jaeyoon@globalfishingwatch.org"
+__source__ = "https://github.com/GlobalFishingWatch/shipdataprocess"
 __license__ = """
 Copyright 2017 Global Fishing Watch Inc.
 Authors:
 
-Jaeyoon Park <jaeyoon.park13@gmail.com>
+Jaeyoon Park <jaeyoon@globalfishingwatch.org>
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/build/lib/shipdataprocess/collapse.py b/build/lib/shipdataprocess/collapse.py
index b18a0ce..823a580 100644
--- a/build/lib/shipdataprocess/collapse.py
+++ b/build/lib/shipdataprocess/collapse.py
@@ -1,32 +1,51 @@
+"""
+This file contains functions that help collapse (multiple) rows
+for each vessel found in the process of producing Global Fishing Watch's
+internal vessel database.
+
+Last updated: 2022-01-24
+Jaeyoon Park
+"""
+
 import pandas as pd
 import numpy as np
 import re
 from collections import Counter
 
 
-### helper functions for collapsing rows by vessel
-
 def non_zero_mean(x):
     try:
-        x = x[(x!=0)&(x!=None)]
-        if len(x)==0: return 0.0
-        else: return x.mean()
-    except:
+        x = x[(x != 0) & (x is not None)]
+        if len(x) == 0:
+            return 0.0
+        else:
+            return x.mean()
+    except AttributeError:
         return 0.0
-    
+
+
 def non_zero_std(x):
     try:
-        x = x[(x!=0)&(x!=None)]
-        if len(x)<2: return 0.0
-        else: return x.std()
-    except: 
+        x = x[(x != 0) & (x is not None)]
+        if len(x) < 2:
+            return 0.0
+        else:
+            return x.std()
+    except AttributeError:
         return 0.0
-    
-def most_common_value(x): ## remove if standard deviation is too big compared to mean value of all numbers
-    '''remove if standard deviation is too big compared to mean value of all numbers'''
-    if (type(x)==list)&(len(x)>0):
+
+
+def most_common_value(x):
+    """
+    Remove if standard deviation is too big compared to mean value of
+    all numbers. The standard deviation threshold is set to be 10%.
+
+    x: Pandas Series or list, a list of numerical values
+    (for length, tonnage, engine power)
+    """
+    if (type(x) == list) & (len(x) > 0):
         x = pd.Series(x)
-    if (type(x)==pd.core.series.Series)&(len(x.dropna())>0):
+    if (type(x) == pd.core.series.Series) & (len(x.dropna()) > 0):
         x_mean = non_zero_mean(x)
         x_std = non_zero_std(x)
         if x_std > x_mean * 0.1:
@@ -36,95 +55,156 @@ def most_common_value(x): ## remove if standard deviation is too big compared to
     else:
         return np.nan
 
+
 def most_common_value_with_confidence(cx):
-    '''same functionality as most_common_value() but with confidence level taken account'''
-    if (type(cx)==pd.core.series.Series)&(len(cx)>0):
-        if len(cx.dropna())==0:
+    """
+    same functionality as most_common_value() but with confidence level
+    taken into account
+
+    cx: Pandas Series or list, a list of numerical values
+    (for length, tonnage, engine power)
+    with a confidence level indicator attached with '-' in front of the value.
+    """
+    if (type(cx) == pd.core.series.Series) & (len(cx) > 0):
+        if len(cx.dropna()) == 0:
             return np.nan
         else:
             cx = list(cx.values)
-    if (type(cx)==list)&(len(cx)>0):
-        clist = [int(elem.split('-')[0]) for elem in cx if (elem==elem)&(elem!=None)]
-        xlist = [elem for elem in cx if (elem==elem)&(elem!=None)]
-        if len(clist)>0:
+    if (type(cx) == list) & (len(cx) > 0):
+        clist = [
+            int(elem.split("-")[0])
+            for elem in cx
+            if (elem == elem) & (elem is not None)
+        ]
+        xlist = [elem for elem in cx if (elem == elem) & (elem is not None)]
+        if len(clist) > 0:
             max_c = max(clist)
-            x = [float(elem.split('-')[1]) for elem in xlist if int(elem.split('-')[0])==max_c]
+            x = [
+                float(elem.split("-")[1])
+                for elem in xlist
+                if int(elem.split("-")[0]) == max_c
+            ]
+            # Call the function to return the most common value
             return most_common_value(x)
         else:
             return np.nan
     else:
         return np.nan
-    
-def most_common_num(x): ## mostly for imo collapsing
+
+
+def most_common_num(x):
+    """
+    Return the most common number (mostly for imo collapsing).
+
+    x: Pandas Series, a list of numbers
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
             vals = x.values
-            vs = [v for v in vals if (v!=0)]
-        #vs = list(set(vs))
-            if len(vs)==0:
+            vs = [v for v in vals if (v != 0)]
+            # vs = list(set(vs))
+            if len(vs) == 0:
                 return np.nan
-            else: 
+            else:
                 data = Counter(vs)
                 return max(vs, key=data.get)
-    except:
+    except AttributeError:
         return np.nan
-    
+
+
 def most_common_str(x):
+    """
+    Return the most common string.
+
+    x: Pandas Series, a list of values in string
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
-            vals = x.values
-            vs = [re.sub('\s+',' ',str(v)).strip().upper() for v in x.values]
-            vs = [v for v in vs if v!='']
-        #vs = list(set(vs))
-            if len(vs)==0:
+            vs = [
+                re.sub(r"\s+", " ", str(v)).strip().upper() for v in x.values
+            ]
+            vs = [v for v in vs if v != ""]
+            # vs = list(set(vs))
+            if len(vs) == 0:
                 return np.nan
             else:
                 data = Counter(vs)
                 return max(vs, key=data.get)
 
-        #if len(vs)==1:
-        #    return vs[0]
-        #else:
-        #    return None
-    except:
+    except AttributeError:
         return np.nan
-    
-def str_attached(x): ## join all strings
+
+
+def str_attached(x):
+    """
+    Return all strings joined. If the values are in numbers, convert them
+    to string and combined.
+
+    :param x: Pandas Series or list
+    :return: A joined string
+    """
     try:
         x = x.dropna()
-        if len(x)==0:
+        if len(x) == 0:
             return np.nan
         else:
-            x = x.apply(lambda v: str(int(v)) if (type(v)==float)|(type(v)==int)|(type(v)==long) else v)  
+            x = x.apply(
+                lambda v: str(int(v))
+                if (type(v) == float) | (type(v) == int)
+                else v
+            )
             vals = x.values.tolist()
-        #vs = [str(v).strip() for v in vals if (v==v)&(v!=None)&(v!='')]
-        #vs = [v for v in vs if (v!='')]
+            # vs = [str(v).strip() for v in vals if (v==v)&(v!=None)&(v!='')]
+            # vs = [v for v in vs if (v!='')]
             vs = list(set(vals))
-            return ', '.join(sorted(vs))
-    except:
+            return ", ".join(sorted(vs))
+    except AttributeError:
         return np.nan
-    
+
+
 def min_time(x):
+    """
+    Return the minimum time
+
+    :param x: Pandas Series
+    :return: Timestamp
+    """
     vals = x.values
-    vs = [v for v in vals if (v==v)&(v!=None)&(v!='')]
+    vs = [v for v in vals if (v == v) & (v is not None) & (v != "")]
     vs = pd.Series(vs)
+
     return vs.min()
 
+
 def max_time(x):
+    """
+    Return the maximum time
+
+    :param x: Pandas Series
+    :return: Timestamp
+    """
     vals = x.values
-    vs = [v for v in vals if (v==v)&(v!=None)&(v!='')]
+    vs = [v for v in vals if (v == v) & (v is not None) & (v != "")]
     vs = pd.Series(vs)
+
     return vs.max()
 
+
 def highest_confidence(x):
+    """
+    Return the maximum confidence if none return 1 (the lowest).
+
+    :param x: Pandas Series or list
+    :return: Integer
+    """
     x = x.dropna()
-    if len(x)>0:
+    if len(x) > 0:
         return max(x.tolist())
     else:
         return 1
diff --git a/build/lib/shipdataprocess/normalize.py b/build/lib/shipdataprocess/normalize.py
index 29104a9..c95a2cd 100644
--- a/build/lib/shipdataprocess/normalize.py
+++ b/build/lib/shipdataprocess/normalize.py
@@ -1,205 +1,260 @@
+"""
+This file provides functions that normalize ship name and call sign of a vessel
+either recorded in registries or in vessel tracking data. The normalization, or
+standardization of string, will ensure that the strings are comparable to other
+strings despite various ways of recording names of the same vessel.
+It also removes all non-essential characters or white spaces.
+"""
 from unidecode import unidecode
 import roman
 import re
-import sys
 
 
 def normalize_shipname(name):
-        
-    if (name is None)|(name != name)|(name == ''):
-        return None
+    """
+    Return a normalized ship name by removing all non-essential characters,
+    prefix, and suffix, and standardizing roman numerals or other parts
+    of the vessel name.
 
-    #
-    # Turn to upper cases
-    name = name.upper()
-    
+    :param name: String, an original vessel name
+    :return: String, a normalized vessel name
+    """
 
+    if (name is None) | (name != name) | (name == ""):
+        return None
+    print(name)
     #
     # Remove nasty characters and white spaces
-    if sys.version_info[0] < 3:
+    # try:
+    #     name = unidecode(str(name.decode("utf-8")))
+    # except UnicodeDecodeError:
+    #     name = unidecode(str(name.decode("iso_8859-1")))
+
+    if issubclass(type(name), str):
+        name = unidecode(name)
+    elif isinstance(name, bytes):
         try:
-            name = unidecode(str(name)) # get rid of nasty characters, but sometimes this fails
-        except:
-            try:
-                name = unidecode(str(name.decode('utf8')))
-            except:
-                name = unidecode(str(name.decode('iso_8859-1')))
+            name = unidecode(str(name, "utf-8", "strict"))
+        except UnicodeDecodeError:
+            name = unidecode(str(name, "iso-8859-1", "strict"))
+    elif isinstance(name, int):
+        name = str(name)
     else:
-        name = unidecode(str(name))
+        return None
+    print(name)
+    #
+    # Turn to upper cases
+    name = name.upper()
 
-    name = re.sub('\s+',' ',name)
+    name = re.sub(r"\s+", " ", name)
     name = name.strip()
-    name = name.replace('\n','').replace('\r','')
- 
+    name = name.replace("\n", "").replace("\r", "")
+
     #
     # Remove fishing vessel code
-    name = re.sub('MFV[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('MPV[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('HMS[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub('LPG[/|C]*[\W]*|LNG[/|C]*[\W]*', ' ', name)  # LPG/LNG variations
-    
-    name = re.sub('(\s|^)F[^\w\s]*V[^\w]*', ' ', name)  # fishing vessel code in English (F/V, F-V, F.V, FV: etc)
-    name = re.sub('^F[^\w\s]*B[^\w]+', ' ', name)  # fishing vessel code in English
-    name = re.sub(' F[^\w\s]*B[^\w]*(\s|$)', ' ', name)    
-    name = re.sub('^M[^\w\s]*P[^\w]+', ' ', name)  # fishing vessel code in Italy/Spain
-    name = re.sub(' M[^\w\s]*P[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*B[^\w]+', ' ', name)  # fishing vessel code in Italy/Spain
-    name = re.sub(' M[^\w\s]*B[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^G[^\w\s]*V[^\w]+', ' ', name)  # mostly in UK
-    name = re.sub('S+F+[^\w]+G[^\w\s]*V[^\w]*', ' ', name)
-    name = re.sub(' G[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*V[^\w]+', ' ', name)  # in English
-    name = re.sub(' M[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]+S[^\w]+', ' ', name)  # Merchant Ship
-    name = re.sub(' M[^\w\s]+S[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^M[^\w\s]*K[^\w]+', ' ', name)  # mostly in northern europe
-    name = re.sub(' M[^\w\s]+K[^\w]*(\s|$)', ' ', name)
-    name = re.sub('^R[^\w\s]*V[^\w]+', ' ', name)  # Research Vessel
-    name = re.sub(' R[^\w\s]*V[^\w]*(\s|$)', ' ', name)
-    
-    name = re.sub('^T[^\w\s]*T[^\w]+', ' ', name)  # Tender To
-    name = re.sub(' T[^\w\s]*T[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*Y[^\w]+', ' ', name)  # Steam Yacht
-    name = re.sub(' S[^\w\s]*Y[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]*F[^\w]+', ' ', name)  # Motor Ferry
-    name = re.sub(' M[^\w\s]*F[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*S[^\w]+', ' ', name)  # Steam Ship
-    name = re.sub(' S[^\w\s]*S[^\w]*($)', ' ', name)
-    name = re.sub('^S[^\w\s]*V[^\w]+', ' ', name)  # Sailing Vessel
-    name = re.sub(' S[^\w\s]*V[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]*T[^\w]+', ' ', name)  # Motor Tanker
-    name = re.sub(' M[^\w\s]*T[^\w]*($)', ' ', name)
-    name = re.sub('^M[^\w\s]+Y[^\w]+', ' ', name)  # Motor Yacht
-    name = re.sub(' M[^\w\s]+Y[^\w]*($)', ' ', name)
-    name = re.sub('^[A-Z]/[A-Z][^\w]+', ' ', name)  # All other types of X/X
-    name = re.sub(' [A-Z]/[A-Z]($)', ' ', name)
-    name = re.sub('^[A-Z]\\\\[A-Z][^\w]+', ' ', name) ## All other types of X\X
-    name = re.sub(' [A-Z]\\\\[A-Z]($)', ' ', name)
-    name = re.sub('^KM[^\w]+', ' ', name)  # Indonesia K.M
-    name = re.sub('^E.B. ', ' ', name)  # Dutch E.B. equivalent to NO.
-
-    name = re.sub('\(.+\)', ' ', name)  # All additional information in parentheses
-    name = re.sub('\[.+\]', ' ', name)
-    
+    name = re.sub(r"MFV[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(r"MPV[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(r"HMS[^\w]+", " ", name)  # fishing vessel code in English
+    name = re.sub(
+        r"LPG[/|C]*[\W]*|LNG[/|C]*[\W]*", " ", name
+    )  # LPG/LNG variations
+
+    name = re.sub(
+        r"(\s|^)F[^\w\s]*V[^\w]*", " ", name
+    )  # fishing vessel code in English (F/V, F-V, F.V, FV: etc)
+    name = re.sub(
+        r"^F[^\w\s]*B[^\w]+", " ", name
+    )  # fishing vessel code in English
+    name = re.sub(r" F[^\w\s]*B[^\w]*(\s|$)", " ", name)
+    name = re.sub(
+        r"^M[^\w\s]*P[^\w]+", " ", name
+    )  # fishing vessel code in Italy/Spain
+    name = re.sub(r" M[^\w\s]*P[^\w]*(\s|$)", " ", name)
+    name = re.sub(
+        r"^M[^\w\s]*B[^\w]+", " ", name
+    )  # fishing vessel code in Italy/Spain
+    name = re.sub(r" M[^\w\s]*B[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^G[^\w\s]*V[^\w]+", " ", name)  # mostly in UK
+    name = re.sub(r"S+F+[^\w]+G[^\w\s]*V[^\w]*", " ", name)
+    name = re.sub(r" G[^\w\s]*V[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]*V[^\w]+", " ", name)  # in English
+    name = re.sub(r" M[^\w\s]*V[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]+S[^\w]+", " ", name)  # Merchant Ship
+    name = re.sub(r" M[^\w\s]+S[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^M[^\w\s]*K[^\w]+", " ", name)  # mostly in northern europe
+    name = re.sub(r" M[^\w\s]+K[^\w]*(\s|$)", " ", name)
+    name = re.sub(r"^R[^\w\s]*V[^\w]+", " ", name)  # Research Vessel
+    name = re.sub(r" R[^\w\s]*V[^\w]*(\s|$)", " ", name)
+
+    name = re.sub(r"^T[^\w\s]*T[^\w]+", " ", name)  # Tender To
+    name = re.sub(r" T[^\w\s]*T[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*Y[^\w]+", " ", name)  # Steam Yacht
+    name = re.sub(r" S[^\w\s]*Y[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]*F[^\w]+", " ", name)  # Motor Ferry
+    name = re.sub(r" M[^\w\s]*F[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*S[^\w]+", " ", name)  # Steam Ship
+    name = re.sub(r" S[^\w\s]*S[^\w]*($)", " ", name)
+    name = re.sub(r"^S[^\w\s]*V[^\w]+", " ", name)  # Sailing Vessel
+    name = re.sub(r" S[^\w\s]*V[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]*T[^\w]+", " ", name)  # Motor Tanker
+    name = re.sub(r" M[^\w\s]*T[^\w]*($)", " ", name)
+    name = re.sub(r"^M[^\w\s]+Y[^\w]+", " ", name)  # Motor Yacht
+    name = re.sub(r" M[^\w\s]+Y[^\w]*($)", " ", name)
+    name = re.sub(r"^[A-Z]/[A-Z][^\w]+", " ", name)  # All other types of X/X
+    name = re.sub(r" [A-Z]/[A-Z]($)", " ", name)
+    name = re.sub(
+        r"^[A-Z]\\\\[A-Z][^\w]+", " ", name
+    )  # All other types of X\X
+    name = re.sub(r" [A-Z]\\\\[A-Z]($)", " ", name)
+    name = re.sub(r"^KM[^\w]+", " ", name)  # Indonesia K.M
+    name = re.sub(r"^E.B. ", " ", name)  # Dutch E.B. equivalent to NO.
+
+    name = re.sub(
+        r"\(.+\)", " ", name
+    )  # All additional information in parentheses
+    name = re.sub(r"\[.+\]", " ", name)
+
     #
     # Numbers in letters
-    name = re.sub(' ONE($)| UNO($)| UN($)', ' 1', name)
-    name = re.sub(' TWO($)| DOS($)| DEUX($)', ' 2', name)
-    name = re.sub(' THREE($)| TRES($)| TROIS($)', ' 3', name)
-    name = re.sub(' FOUR($)| CUATRO($)| QUATRE($)', ' 4', name)
-    name = re.sub(' FIVE($)| CINCO($)| CINQ($)', ' 5', name)
-    name = re.sub(' SIX($)| SEIS($)| SIX($)', ' 6', name)
-    name = re.sub(' SEVEN($)| SIETE($)| SEPT($)', ' 7', name)
-    name = re.sub(' EIGHT($)| OCHO($)| HUIT($)', ' 8', name)
-    name = re.sub(' NINE($)| NUEVE($)| NEUF($)', ' 9', name)
-    name = re.sub(' TEN($)| DIEZ($)| DIX($)', ' 10', name)
-    name = re.sub(' ELEVEN($)| ONCE($)| ONZE($)', ' 11', name)
-    name = re.sub(' TWELVE($)| DOCE($)| DOUZE($)', ' 12', name)
-    name = re.sub(' THIRTEEN($)| TRECE($)| TREIZE($)', ' 13', name)
-    name = re.sub(' FOURTEEN($)| CATORCE($)| QUATORZE($)', ' 14', name)
-    name = re.sub(' FIFTEEN($)| QUINCE($)| QUINZE($)', ' 15', name)
-
-    name = re.sub('1ST ', 'FIRST ', name)
-    name = re.sub('2ND ', 'SECOND ', name)
-    name = re.sub('3RD ', 'THIRD ', name)
-    name = re.sub('4TH ', 'FOURTH ', name)
-    name = re.sub('5TH ', 'FIFTH ', name)
+    name = re.sub(r" ONE($)| UNO($)| UN($)", " 1", name)
+    name = re.sub(r" TWO($)| DOS($)| DEUX($)", " 2", name)
+    name = re.sub(r" THREE($)| TRES($)| TROIS($)", " 3", name)
+    name = re.sub(r" FOUR($)| CUATRO($)| QUATRE($)", " 4", name)
+    name = re.sub(r" FIVE($)| CINCO($)| CINQ($)", " 5", name)
+    name = re.sub(r" SIX($)| SEIS($)", " 6", name)
+    name = re.sub(r" SEVEN($)| SIETE($)| SEPT($)", " 7", name)
+    name = re.sub(r" EIGHT($)| OCHO($)| HUIT($)", " 8", name)
+    name = re.sub(r" NINE($)| NUEVE($)| NEUF($)", " 9", name)
+    name = re.sub(r" TEN($)| DIEZ($)| DIX($)", " 10", name)
+    name = re.sub(r" ELEVEN($)| ONCE($)| ONZE($)", " 11", name)
+    name = re.sub(r" TWELVE($)| DOCE($)| DOUZE($)", " 12", name)
+    name = re.sub(r" THIRTEEN($)| TRECE($)| TREIZE($)", " 13", name)
+    name = re.sub(r" FOURTEEN($)| CATORCE($)| QUATORZE($)", " 14", name)
+    name = re.sub(r" FIFTEEN($)| QUINCE($)| QUINZE($)", " 15", name)
+
+    name = re.sub("1ST ", "FIRST ", name)
+    name = re.sub("2ND ", "SECOND ", name)
+    name = re.sub("3RD ", "THIRD ", name)
+    name = re.sub("4TH ", "FOURTH ", name)
+    name = re.sub("5TH ", "FIFTH ", name)
 
     #
     # Country specific appendix (S. Korea and China)
-    name = re.sub('\d+\s*HO($)', ' ', name)
-    name = re.sub('\d+\s*HAO($)', ' ', name)
+    name = re.sub(r"\d+\s*HO($)", " ", name)
+    name = re.sub(r"\d+\s*HAO($)", " ", name)
 
     #
     # Remove NO.s such in NO.5, NO5, NO:5, NO. 5, NO 5, N5, N-5 etc
-    name = re.sub('NO[^\w\s]*[\s]*(?=\d+)', '', name)
-    name = re.sub('[\s]+N[\W_0]*(?=\d+)', '', name)
-    name = re.sub('NO\.\s*(?=[^0-9]+)', '', name)
-    
+    name = re.sub(r"NO[^\w\s]*[\s]*(?=\d+)", "", name)
+    name = re.sub(r"[\s]+N[\W_0]*(?=\d+)", "", name)
+    name = re.sub(r"NO\.\s*(?=[^0-9]+)", "", name)
+
     #
     # Turn '&' to 'AND'
-    name = re.sub('(?<=[A-Z])\s+&\s+(?=[A-Z])', ' AND ', name)  # replace 'BLACK & WHITE' to 'BLACK AND WHITE'
-    
+    name = re.sub(
+        r"(?<=[A-Z])\s+&\s+(?=[A-Z])", " AND ", name
+    )  # replace 'BLACK & WHITE' to 'BLACK AND WHITE'
+
     #
     # Deromanization
-    vs = re.split('\s+|-|(?<=[A-Z]{3})\.',name)
+    vs = re.split(r"\s+|-|(?<=[A-Z]{3})\.", name)
     try:
         #
         # If last word from the name text has L/C/D/M then do not deromanize
-        if re.search('[LCDM]', vs[-1]).group(0): pass
-    except:
+        if re.search(r"[LCDM]", vs[-1]).group(0):
+            pass
+    except AttributeError:
         #
         # Try to deromanize the last word from the name text
         try:
             vs[-1] = roman.fromRoman(vs[-1])
             vs[-1] = str(int(vs[-1]))
-        except:
+        except roman.InvalidRomanNumeralError:
+            #
+            # No corresponding roman numeral found. Let's leave it as is.
             pass
-    
+
     #
     # Attach the deromanized digits to the end
-    name = ''.join(vs)
+    name = "".join(vs)
 
-        
     #
     # Now, remove all special characters
-    name = re.sub('[\W_]', '', name)
-    
+    name = re.sub(r"[\W_]", "", name)
+
     #
     # Check if the name starts with digits, if yes move it to the end
-    try: 
-        first_digit = re.search('^\d+', name).group(0)
-        name = re.sub('^\d+', '', name) + str(first_digit)
-    except:
-        pass
+    obj = re.search(r"^\d+", name)
+    if obj:
+        first_digit = obj.group(0)
+        name = re.sub(r"^\d+", "", name) + str(first_digit)
 
     #
     # Remove 0s from the numbers starting with 0s
-    try:
-        last_digit = re.search('\d+$', name).group(0)
-        non_zeros = re.sub('^0+', '', last_digit)
-        name = re.sub('\d+$', '', name) + str(non_zeros)
-    except:
-        pass
+    obj = re.search(r"\d+$", name)
+    if obj:
+        last_digit = obj.group(0)
+        non_zeros = re.sub("^0+", "", last_digit)
+        name = re.sub(r"\d+$", "", name) + str(non_zeros)
+
+    #
+    # Remove all excessive white spaces
+    name = re.sub(r"\s+", " ", name)
 
-    if name == '':
+    if name == "" or name == " ":
         return None
-    
-    return name
+    else:
+        return name
 
 
 def normalize_callsign(callsign):
+    """
+    Return a normalized International Radio Call Sign by removing non-essential
+    characters and ignoring meaningless call sign including 'NONE', 'UNKNOWN'
+
+    :param callsign: String, an original call sign
+    :return: String, a normalized call sign
+    """
 
-    if (callsign is None) | (callsign != callsign) | (callsign == '') | \
-            (callsign == "NONE") | (callsign == "UNKNOWN") | (callsign == "NIL") | (callsign == "NULL"):
+    if (
+        (callsign is None)
+        | (callsign != callsign)
+        | (callsign == "")
+        | (callsign == "NONE")
+        | (callsign == "UNKNOWN")
+        | (callsign == "NIL")
+        | (callsign == "NULL")
+    ):
         return None
 
     #
     # Turn to upper cases
     callsign = callsign.upper()
-    
+
     #
     # Remove nasty characters, white space
     try:
-        callsign = unidecode(str(callsign))  # get rid of nasty characters, but sometimes this fails
-    except:
+        #
+        # get rid of nasty characters, but sometimes this fails
+        callsign = unidecode(str(callsign))
+    except UnicodeDecodeError:
         try:
-            callsign = unidecode(str(callsign.decode('utf8')))
-        except:
-            callsign = unidecode(str(callsign.decode('iso_8859-1')))
+            callsign = unidecode(str(callsign.decode("utf8")))
+        except UnicodeDecodeError:
+            callsign = unidecode(str(callsign.decode("iso_8859-1")))
 
     callsign = callsign.strip()
-    callsign = re.sub('\s+',' ',callsign)
+    callsign = re.sub(r"\s+", " ", callsign)
 
     #
     # Get rid of all non-word characters
-    callsign = re.sub('[\W_]', '', callsign) 
-    
+    callsign = re.sub(r"[\W_]", "", callsign)
+
     #
     # Remove 0s from callsign starting with 0s
-    callsign = re.sub('^0+', '', callsign)
-    
-    if callsign == '':
-        return None
+    callsign = re.sub(r"^0+", "", callsign)
 
-    return callsign
\ No newline at end of file
+    if callsign == "":
+        return None
+    else:
+        return callsign
diff --git a/build/lib/shipdataprocess/shiptype.py b/build/lib/shipdataprocess/shiptype.py
index 70eff72..8ee5773 100644
--- a/build/lib/shipdataprocess/shiptype.py
+++ b/build/lib/shipdataprocess/shiptype.py
@@ -1,226 +1,305 @@
-import pandas as pd
+"""
+This file provides functions that process operations with regard to vessel
+types defined by Global Fishing Watch (There are about 40 ship types
+pre-defined). See here
+https://globalfishingwatch.org/datasets-and-code-vessel-identity/
+
+Last updates: 2022-01-25
+Jaeyoon Park
+"""
 import numpy as np
 
 
-
 def determine_shiptype(gears, shiptype_dict):
-    ''' 
-    determinte_shiptype module receives multiple types of ship and returns the most specific ship type.
-    
-    --------
-    ARGUMENT
-    --------
-    gears: SERIES, LIST, OR STR, single or multiple combination of ship types joined by '|' (OR) 
-    (examples: fixed_gear|set_longlines, cargo)
-    --------
-
-    ------
-    RETURN 
-    ------
-    STR or None, select the most detailed type among the ship types received if they are all in one category, 
+    """
+    This module receives multiple types of ship and returns the most specific
+    ship type in the pre-defined vessel classification hierarchy.
+    https://globalfishingwatch.org/datasets-and-code-vessel-identity/
+
+    :param gears: SERIES, LIST, OR STR, single or multiple combination of ship
+    type joined by '|' (OR) (examples: fixed_gear|set_longlines, cargo)
+    :param shiptype_dict: DICT, a geartype dictionary containing 'path'
+    information in the vessel class hierarchy
+    :return: STR or None, select the most detailed type among the ship types
+    received if they are all in one category,
     otherwise a combination of ship types.
-    (examples: fixed_gear|set_longlines -> set_longlines, trawler|fixed_gear|set_longlines -> trawler|set_longlines)
-    ------
-    '''
-    
+    (examples: fixed_gear|set_longlines -> set_longlines,
+    trawler|fixed_gear|set_longlines -> trawler|set_longlines)
+    """
 
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return None
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove Nones
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    
-    ### take only specific ones if there are several possibly duplicated ones (example: trawlers, trawlers|purse_seines)
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove Nones
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+
+    #
+    # take only specific ones if there are several possibly duplicated ones
+    # (example: trawlers, trawlers|purse_seines)
     gears = reduce_to_specifics_with_multiples(gears, shiptype_dict)
 
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories
+    # (broader/ ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    if final_value=='':
+    final_value = "|".join(gears)
+    if final_value == "":
         return None
     else:
         return final_value
 
 
-
 def determine_shiptype_simple(gears, shiptype_dict):
-    ''' 
-    same as determinte_shiptype module but without reducing multiple gears to specific (this is for testing).
-    '''
+    """
+    same as determine_shiptype module but without reducing multiple gears
+    to specific (this is for testing).
+
+    :param gears: SERIES, LIST, OR STR, single or multiple combination of
+    ship types joined by '|' (OR) (examples: fixed_gear|set_longlines, cargo)
+    :param shiptype_dict: DICT, ship type dictionary containing 'path' of
+    gear type in the hierarchy
+    :return: STR or None, select the most detailed type among the ship types
+    received if they are all in one category,
+    otherwise a combination of ship types.
+    (examples: fixed_gear|set_longlines -> set_longlines,
+    trawler|fixed_gear|set_longlines -> trawler|set_longlines)
+    """
 
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return None
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove Nones
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove Nones
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories (broader ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    if final_value=='':
+    final_value = "|".join(gears)
+    if final_value == "":
         return None
     else:
         return final_value
 
 
 def tag_confidence_level(x, c):
-    if (x==x)&(x!=None)&(x!=0)&(x!=''):
-        return str(c) + '-' + str(x)
+    """
+    Helper function to add confidence level to geartype
+
+    :param x: STRING, geartype
+    :param c: INT, confidence level (1 to 4)
+    :return: STRING, geartype attached with confidence level by a dash ('-')
+    """
+    if (x == x) & (x is not None) & (x != 0) & (x != ""):
+        return str(c) + "-" + str(x)
     else:
         return np.nan
 
 
 def determine_shiptype_with_confidence(gears, shiptype_dict):
-    ''' 
-    same as determine_shiptype but with confidence level taken into account
-    '''
-
-    ## if there is no information on gears, then return None
-    if len(gears)==0:
+    """
+    same as the determine_shiptype module above
+    but with confidence level taken into account
+    """
+
+    #
+    # if there is no information on gears, then return None
+    if len(gears) == 0:
         return np.nan
-    
-    ### make sure the entry is a list of strings
-    if type(gears)==str:
+
+    #
+    # make sure the entry is a list of strings
+    if type(gears) == str:
         gears = [gears]
-    elif type(gears)==list:
+    elif type(gears) == list:
         pass
-    else: gears = gears.tolist()
-    
-    ### remove NaN/None
-    gears = [gear.replace(' ','').strip() for gear in gears if (gear!=None)&(gear==gear)&(gear!='')]
-    if len(gears)==0:
+    else:
+        gears = gears.tolist()
+
+    #
+    # remove NaN/None
+    gears = [
+        gear.replace(" ", "").strip()
+        for gear in gears
+        if (gear is not None) & (gear == gear) & (gear != "")
+    ]
+    if len(gears) == 0:
         return np.nan
-    
-    ### remove all gear values from lists of less confidence level
-    levels = [int(gear.split('-')[0]) for gear in gears]
-    if len(levels)>0:
+
+    #
+    # remove all gear values from lists of less confidence level
+    levels = [int(gear.split("-")[0]) for gear in gears]
+    if len(levels) > 0:
         highest_level = max(levels)
-        if (highest_level==3)&(2 in levels):
-            gears_3 = [gear.split('-')[1] for gear in gears if ('3' in gear)] 
-            gears_2 = [gear.split('-')[1] for gear in gears if ('2' in gear)] 
-            gears = [gear.split('-')[1] for gear in gears if ('2' in gear)|('3' in gear)]
+        if (highest_level == 3) & (2 in levels):
+            gears_3 = [gear.split("-")[1] for gear in gears if ("3" in gear)]
+            gears_2 = [gear.split("-")[1] for gear in gears if ("2" in gear)]
+            gears = [
+                gear.split("-")[1]
+                for gear in gears
+                if ("2" in gear) | ("3" in gear)
+            ]
         else:
-            gears = [gear.split('-')[1] for gear in gears if str(highest_level) in gear]
-
-    ### take only specific ones if there are several possibly duplicated ones (example: trawlers, trawlers|purse_seines)
+            gears = [
+                gear.split("-")[1]
+                for gear in gears
+                if str(highest_level) in gear
+            ]
+
+    #
+    # take only specific ones if there are several possibly duplicated ones
+    # (example: trawlers, trawlers|purse_seines)
     gears = reduce_to_specifics_with_multiples(gears, shiptype_dict)
 
-    ### get rid of '|' and take all possible gears individually  
-    gears_split=[]
+    #
+    # get rid of '|' and take all possible gears individually
+    gears_split = []
     for g in gears:
-        if '|' in g:
-            gears_split += g.split('|')
+        if "|" in g:
+            gears_split += g.split("|")
         else:
             gears_split.append(g)
-    
-    ### map geartype_dict to compare categories (broader ones to be removed)
+
+    #
+    # map geartype_dict to compare categories (broader ones to be removed)
     gears = reduce_to_specifics(gears_split, shiptype_dict)
 
-    ### remove redundant values and join together with '|'
+    #
+    # remove redundant values and join together with '|'
     gears = sorted(list(set(gears)))
-    final_value = '|'.join(gears)
-    
-    ### check the case of combination of level 2 and 3
-    if (highest_level==3)&(2 in levels):
+    final_value = "|".join(gears)
+
+    #
+    # check the case of combination of level 2 and 3
+    if (highest_level == 3) & (2 in levels):
         final_value_3 = determine_shiptype(gears_3, shiptype_dict)
         final_value_2 = determine_shiptype(gears_2, shiptype_dict)
-        if (not final_value in final_value_3)&(final_value in final_value_2):
+        if (final_value not in final_value_3) & (final_value in final_value_2):
             pass
-        else: 
+        else:
             final_value = final_value_3
-    
-    ### output
-    if final_value=='':
+
+    #
+    # output
+    if final_value == "":
         return np.nan
     else:
-        final_value = str(highest_level) + '-' + final_value
+        final_value = str(highest_level) + "-" + final_value
         return final_value
 
 
 def select_high_confidence_geartype(x, y, shiptype_dict):
-    '''return a geartype that has higher confidence level'''
-
-    if (x==x)&(x!=None)&(y==y)&(y!=None):
-        x_level = int(x.split('-')[0]) 
-        x_value = x.split('-')[1]
-        y_level = int(y.split('-')[0])
-        y_value = y.split('-')[1]
-        ## if x confidence level is higher, return x
+    """
+    Return a geartype that has higher confidence level
+
+    :param x: STRING, geartype attached with a confidence to compare
+    :param y: STRING, geartype attached with a confidence to compare
+    :param shiptype_dict: DICT, a geartype dictionary containing 'path'
+    info in the hierarchy
+    :return: STRING, geartype attached with a higher confidence between x and y
+    """
+
+    if (x == x) & (x is not None) & (y == y) & (y is not None):
+        x_level = int(x.split("-")[0])
+        x_value = x.split("-")[1]
+        y_level = int(y.split("-")[0])
+        y_value = y.split("-")[1]
+        #
+        # if x confidence level is higher, return x
         if x_level > y_level:
             return x
-        ## if confidence levels are the same, determine shiptype and return
+        #
+        # if confidence levels are the same, determine shiptype and return
         elif x_level == y_level:
-            return str(x_level) + '-' + determine_shiptype([x_value, y_value], shiptype_dict)
-        ## if y confidence level is higher, return y
+            return (
+                str(x_level)
+                + "-"
+                + determine_shiptype([x_value, y_value], shiptype_dict)
+            )
+        #
+        # if y confidence level is higher, return y
         else:
             return y
-    elif (x==x)&(x!=None):
+    elif (x == x) & (x is not None):
         return x
-    elif (y==y)&(y!=None):
+    elif (y == y) & (y is not None):
         return y
     else:
         return np.nan
 
 
-### function that makes geartype dictionary from shiptypes yaml file
 def make_shiptype_dict(shiptypes):
-    '''
-    This module returns a categorical dictionary of ship types from a ship type yml file received.
-    Values of the dictionary show where a specific ship type is situated in the ship type category tree. 
-
-    --------
-    ARGUMENT
-    --------
-    shiptypes: DICT, usually loaded from a .yml file that place categorically all possible ship types as a tree
-    --------
-
-    ------
-    RETURN
-    ------
-    shiptype_dict: DICT, shiptype categorical dictionary
-    (examples: (key, value) -> (set_longlines, (fishing, fixed_gear, set_longlines)))
-    ------
-    '''
-        
-    ### create a geartype dictionary where each gear has categorical information
+    """
+    This module returns a categorical dictionary of ship types
+    from a ship type yml file received. Values of the dictionary show
+    where a specific ship type is situated in the ship type category tree.
+
+    :param shiptypes: DICT, usually loaded from a .yml file that place
+    categorically all possible ship types as a tree
+    :return shiptype_dict: DICT, shiptype categorical dictionary
+    (examples:
+    (key, value) -> (set_longlines, (fishing, fixed_gear, set_longlines)))
+    """
+
+    #
+    # create a geartype dictionary where each gear has categorical information
     shiptype_dict = {}
     for stype in shiptypes:
         for l1 in shiptypes[stype]:
@@ -233,143 +312,169 @@ def make_shiptype_dict(shiptypes):
                             shiptype_dict[l3] = [stype, l1, l2, l3]
                             if shiptypes[stype][l1][l2][l3] is not None:
                                 for l4 in shiptypes[stype][l1][l2][l3]:
-                                    shiptype_dict[l4] = [stype, l1, l2, l3, l4]
-    
-    ### other_fishing, other_not_fishing, unknown_fishing can be replaced by other more specific gears
-    shiptype_dict['fishing'] = ['fishing']
-    shiptype_dict['non_fishing'] = ['non_fishing']
-    shiptype_dict['unknown'] = None
-    shiptype_dict[''] = None
-    
+                                    shiptype_dict[l4] = [
+                                        stype,
+                                        l1,
+                                        l2,
+                                        l3,
+                                        l4,
+                                    ]
+
+    #
+    # other_fishing, other_not_fishing, unknown_fishing
+    # can be replaced by other more specific gears
+    shiptype_dict["fishing"] = ["fishing"]
+    shiptype_dict["non_fishing"] = ["non_fishing"]
+    shiptype_dict["unknown"] = None
+    shiptype_dict[""] = None
+
     return shiptype_dict
 
 
-### function to choose only specific gear values if broader level values exist with specific level values
 def reduce_to_specifics(gears, shiptype_dict):
-    '''
-    this module reduces the list of gear values only to contain specific gear values if there are broader gear values together
-
-    --------
-    ARGUMENT
-    --------
-    gears: LIST of strings that are gear types predefined
-    --------
-
-    ------
-    RETURN
-    ------
-    values: LIST of string that are gear types predefined
-
-    '''
-    if len(gears)==0:
+    """
+    This module reduces the list of gear values only to contain specific
+    gear values if there are broader gear values together
+
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
+    if len(gears) == 0:
         return []
-    
-    ### reduce only single gear values
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-
-    ### mapped to shiptype dictionary values
-    gears_mapped = [shiptype_dict[gear] for gear in singles if shiptype_dict[gear]!=None]
-    
+
+    #
+    # reduce only single gear values
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    #
+    # mapped to shiptype dictionary values
+    gears_mapped = [
+        shiptype_dict[gear]
+        for gear in singles
+        if shiptype_dict[gear] is not None
+    ]
+
     temp = list(gears_mapped)
     for gear in gears_mapped:
-        others = [g for g in gears_mapped if g!=gear]
+        others = [g for g in gears_mapped if g != gear]
 
         for other in others:
-            ### see if the gear in question is a subset of anyone of the others, if true, remove it from the list
+            #
+            # see if the gear in question is a subset of anyone of the others,
+            # if true, remove it from the list
             if set(gear).issubset(other):
                 if gear in temp:
                     temp.remove(gear)
 
     gears_mapped = temp
-       
-    ### return only end values as in a list
+
+    #
+    # return only end values as in a list
     reduced = []
     for gear in gears_mapped:
         val = gear[-1]
         reduced.append(val)
     reduced = list(set(reduced))
     final = reduced + multiples
-    
-    return final
 
+    return final
 
 
 def reduce_to_specifics_with_multiples(gears, shiptype_dict):
-    if len(gears)==0:
+    """
+    Same as the function above but accepting multiple gears attached with '|'
+    """
+    if len(gears) == 0:
         return []
-    
-    ### reduce singles to specifics if possible
+
+    #
+    # reduce singles to specifics if possible
     gears = reduce_to_specifics(gears, shiptype_dict)
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-    
-    if len(multiples)>0:
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    if len(multiples) > 0:
         for multiple in multiples:
-            flags=[]
-            elems = multiple.split('|')
-            
+            flags = []
+            elems = multiple.split("|")
+
             for elem in elems:
-                ### look at elements of multiples if they can be reduced to specifics with single values
-                vals = [reduce_to_specifics([elem, single], shiptype_dict) for single in singles \
-                        if len(reduce_to_specifics([elem, single], shiptype_dict))==1]
-                if len(vals)==1:
+                #
+                # look at elements of multiples
+                # if they can be reduced to specifics
+                # with single values
+                vals = [
+                    reduce_to_specifics([elem, single], shiptype_dict)
+                    for single in singles
+                    if len(reduce_to_specifics([elem, single], shiptype_dict))
+                    == 1
+                ]
+                if len(vals) == 1:
                     flags.append(1)
                     reduced = vals[0]
                 else:
                     flags.append(0)
 
-            ### if it can be reduced, then remove this multiple and put this reduced values
-            if sum(flags)==1:
+            #
+            # if it can be reduced, then remove this multiple
+            # and put this reduced values
+            if sum(flags) == 1:
                 gears.remove(multiple)
                 gears = gears + reduced
-    
-    ### final clearing-up
+
+    #
+    # final clearing-up
     gears = reduce_to_specifics(gears, shiptype_dict)
-    
+
     return gears
 
 
 def reduce_to_general(gears, shiptype_dict):
-    '''
-    this module reduces the list of gear values only to contain general geartype values
-
-    --------
-    ARGUMENT
-    --------
-    gears: LIST of strings that are gear types predefined
-    --------
-
-    ------
-    RETURN
-    ------
-    values: LIST of string that are gear types predefined
+    """
+    This module reduces the list of gear values only to contain general
+    geartype values
 
-    '''
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
 
-    if len(gears)==0:
+    if len(gears) == 0:
         return []
-    
-    ### reduce only single gear values
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
 
-    ### mapped to shiptype dictionary values
-    gears_mapped = [shiptype_dict[gear] for gear in singles if shiptype_dict[gear]!=None]
+    #
+    # reduce only single gear values
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    #
+    # mapped to shiptype dictionary values
+    gears_mapped = [
+        shiptype_dict[gear]
+        for gear in singles
+        if shiptype_dict[gear] is not None
+    ]
 
     temp = list(gears_mapped)
     for gear in gears_mapped:
-        others = [g for g in gears_mapped if g!=gear]
+        others = [g for g in gears_mapped if g != gear]
 
         for other in others:
-            ### see if anyone of the others is a subset of gear in question, if true, remove the gear (more detailed one) from the list
+            #
+            # see if anyone of the others is a subset of gear in question,
+            # if true, remove the gear (more detailed one) from the list
             if set(other).issubset(gear):
                 if gear in temp:
                     temp.remove(gear)
-       
+
     gears_mapped = temp
-    
-    ### return only end values as in a list
+
+    #
+    # return only end values as in a list
     reduced = []
     for gear in gears_mapped:
         val = gear[-1]
@@ -382,65 +487,94 @@ def reduce_to_general(gears, shiptype_dict):
 
 
 def reduce_to_general_with_multiples(gears, shiptype_dict):
-    '''
-    returns general (less detailed) gear types only if gear values can be reduced according to shiptype yaml file
-    '''
-    
-    if len(gears)==0:
+    """
+    Returns general (less detailed) gear types
+    only if gear values can be reduced according to shiptype yaml file
+
+    :param gears: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: LIST of string that are gear types predefined
+    """
+
+    if len(gears) == 0:
         return []
-    
-    ### reduce singles to specifics if possible
+
+    #
+    # reduce singles to specifics if possible
     gears = reduce_to_general(gears, shiptype_dict)
-    singles = [gear for gear in gears if '|' not in gear]
-    multiples = [gear for gear in gears if '|' in gear]
-    
-    if len(multiples)>0:
+    singles = [gear for gear in gears if "|" not in gear]
+    multiples = [gear for gear in gears if "|" in gear]
+
+    if len(multiples) > 0:
         for multiple in multiples:
-            flags=[]
-            elems = multiple.split('|')
-            
+            flags = []
+            elems = multiple.split("|")
+
             for elem in elems:
-                ### look at elements of multiples if they can be reduced to specifics with single values
-                vals = [reduce_to_general([elem, single], shiptype_dict) for single in singles \
-                        if len(reduce_to_general([elem, single], shiptype_dict))==1]
-                if len(vals)==1:
+                #
+                # look at elements of multiples if they can be reduced
+                # to specifics with single values
+                vals = [
+                    reduce_to_general([elem, single], shiptype_dict)
+                    for single in singles
+                    if len(reduce_to_general([elem, single], shiptype_dict))
+                    == 1
+                ]
+                if len(vals) == 1:
                     flags.append(1)
                     reduced = vals[0]
                 else:
                     flags.append(0)
-            
-            ### if it can be reduced, then remove this multiple and put this reduced values
-            if sum(flags)>0:
+
+            #
+            # if it can be reduced, then remove this multiple
+            # and put this reduced values
+            if sum(flags) > 0:
                 gears.remove(multiple)
                 gears = gears + reduced
 
-    ### final clearing-up
+    #
+    # final clearing-up
     gears = reduce_to_general(gears, shiptype_dict)
-    
-    return gears
 
+    return gears
 
 
 def is_fishing_vessel(gear, shiptype_dict):
-    if (gear=='')|(gear==None)|(gear!=gear):
+    """
+    A function that determines if the given vessel class is a fishing vessel
+
+    :param gear: LIST, list of strings that are gear types predefined
+    :param shiptype_dict: DICT, geartype dictionary containing 'path'
+    information in the hierarchy
+    :return: BOOL, whether the vessel is a fishing vessel
+    """
+    if (gear == "") | (gear is None) | (gear != gear):
         return None
 
     else:
-        gear = gear.replace(' ','')
-        gear_mapped=[]
-        gears = gear.split('|')
-        
-        ## create a list of gears mapped to 0s (non-fishing gear) or 1s (fishing gear)
+        gear = gear.replace(" ", "")
+        gear_mapped = []
+        gears = gear.split("|")
+
+        #
+        # create a list of gears mapped to
+        # 0s (non-fishing gear) or 1s (fishing gear)
         for gear in gears:
-            if shiptype_dict[gear][0]=='fishing':
+            if shiptype_dict[gear][0] == "fishing":
                 gear_mapped.append(1)
             else:
                 gear_mapped.append(0)
-        if np.prod(gear_mapped)==1: ## if all mapped gears are 1s (therefore fishing vessel)
+        if (
+            np.prod(gear_mapped) == 1
+        ):  # if all mapped gears are 1s (therefore fishing vessel)
             isfishingvessel = True
-        elif sum(gear_mapped)==0: ## if all mapped gears are 0s (therefore non-fishing vessel)
+        elif (
+            sum(gear_mapped) == 0
+        ):  # if all mapped gears are 0s (therefore non-fishing vessel)
             isfishingvessel = False
-        else: ## not determinable, return None
+        else:  # not determinable, return None
             return None
-            
+
     return isfishingvessel
diff --git a/build/lib/shipdataprocess/standardize.py b/build/lib/shipdataprocess/standardize.py
index 67108c4..e2a41dc 100644
--- a/build/lib/shipdataprocess/standardize.py
+++ b/build/lib/shipdataprocess/standardize.py
@@ -5,10 +5,27 @@
 import pandas as pd
 import numpy as np
 import re
-from django.utils.encoding import smart_str
 from unidecode import unidecode
 
 
+def smart_str(s):
+    """
+    This module finds the right encoding of the given string
+
+    :param s: STRING, a text in which we do not know the type of encoding
+    :return: STRING, standardized string
+    """
+    if issubclass(type(s), str):
+        return s
+    if isinstance(s, bytes):
+        try:
+            str(s, "utf-8", "strict")
+        except UnicodeDecodeError:
+            str(s, "iso-8859-1", "strict")
+    else:
+        return str(s)
+
+
 def imo_checksum(n):
     """
     This function for IMO numbers that are designed as 7-digit integer number
@@ -32,12 +49,14 @@ def imo_checksum(n):
 
     #
     # IMO checksum formula
-    if ((n // 1000000 % 10) * 7 +
-            (n // 100000 % 10) * 6 +
-            (n // 10000 % 10) * 5 +
-            (n // 1000 % 10) * 4 +
-            (n // 100 % 10) * 3 +
-            (n // 10 % 10) * 2) % 10 == (n % 10):
+    if (
+        (n // 1000000 % 10) * 7
+        + (n // 100000 % 10) * 6
+        + (n // 10000 % 10) * 5
+        + (n // 1000 % 10) * 4
+        + (n // 100 % 10) * 3
+        + (n // 10 % 10) * 2
+    ) % 10 == (n % 10):
         return True
     else:
         return False
@@ -47,7 +66,7 @@ def standardize_imo(elem, check_field=True):
     """
     Standardize IMO numbers (ignore all letters and characters but numbers)
     If it comes with pandas Series or DataFrame, make sure
-    it saves IMO numbers in STRING, as pandas Seires or DataFrame usually
+    it saves IMO numbers in STRING, as pandas Series or DataFrame usually
     turn INTEGER to FLOAT in the presence of NULL in the same column.
 
     :param elem: Pandas Series, Series that contains a string field
@@ -58,69 +77,87 @@ def standardize_imo(elem, check_field=True):
     if check_field:
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: re.sub(r'[^\d\.]', '', str(x))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                lambda x: re.sub(r"[^\d.]", "", str(x))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(
                 lambda x: str(int(float(x)))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(lambda x: x if imo_checksum(x) else None)
             return elem
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: re.sub(r'[^\d\.]', '', str(x))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                lambda x: re.sub(r"[^\d.]", "", str(x))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(
                 lambda x: str(int(float(x)))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else None)
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else None
+            )
             elem = elem.apply(lambda x: x if imo_checksum(x) else None)
             return elem
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return None
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            elem = re.sub(r'[^\d\.]', '', str(elem))
+            elem = re.sub(r"[^\d.]", "", str(elem))
             if elem == "":
                 return None
             else:
                 elem = str(int(float(elem)))
-                if checksum(elem):
+                if imo_checksum(elem):
                     return elem
                 else:
                     return None
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize floating numbers. 
-# Make sure to remove all comma separators (,). 
-#
 def standardize_float(elem, check_field=True):
+    """
+    This module standardizes floating numbers.
+    Make sure to remove all comma separators (,).
+
+    :param elem: Pandas Series, DataFrame, STR, FLOAT, INT, types
+    that contain a string field
+    :param check_field: Boolean, field that contains a float number
+    :return: Same type as the elem input
+    """
     if check_field:
         if type(elem) == pd.core.series.Series:
             return elem.apply(
-                lambda x: float(str(x).replace(',', ''))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else np.nan)
+                lambda x: float(str(x).replace(",", ""))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else np.nan
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             return elem[check_field].apply(
-                lambda x: float(str(x).replace(',', ''))
-                if (x == x) & (x is not None) & (x != '') & (x != 0) else np.nan)
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+                lambda x: float(str(x).replace(",", ""))
+                if (x == x) & (x is not None) & (x != "") & (x != 0)
+                else np.nan
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return np.nan
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            return float(str(elem).replace(',', ''))
+            return float(str(elem).replace(",", ""))
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return np.nan
 
 
 def smart_upper(text):
     """
-    Selective upper sensitive to upper/lower cases
-    when it's related to URLs
-    Source: https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
+    Selective upper sensitive to upper/lower cases, particularly
+    when it's related to URLs, do not turn the URL to upper cases
+    Source: "https://stackoverflow.com/questions/6038061/
+    regular-expression-to-find-urls-within-a-string"
 
     :param text: String, giv en text
     :return: String, Upper cased text except the URL part
@@ -129,7 +166,10 @@ def smart_upper(text):
     #
     # Find URLs in the given string and upper-case only the other texts
     # to preserve caps of URLs
-    regex_for_url = r"((http|ftp|https)\:\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
+    regex_for_url = (
+        r"((http|ftp|https)\:\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))"
+        r"([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
+    )
     matched = re.finditer(regex_for_url, text)
     prev_end = 0
     for m in matched:
@@ -137,11 +177,9 @@ def smart_upper(text):
         start = m.start()
         end = m.end()
 
-        text = \
-            text[:prev_end] + \
-            text[prev_end:start].upper() + \
-            url + \
-            text[end:]
+        text = (
+            text[:prev_end] + text[prev_end:start].upper() + url + text[end:]
+        )
         prev_end = end
 
     text = text[:prev_end] + text[prev_end:].upper()
@@ -163,158 +201,310 @@ def standardize_str(elem, check_field=True):
     if check_field:
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: smart_upper(re.sub(r'\s+', ' ', smart_str(x)).strip())
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: smart_upper(
+                    re.sub(r"\s+", " ", smart_str(x)).strip()
+                )
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: smart_upper(re.sub(r'\s+', ' ', smart_str(x)).strip())
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: smart_upper(
+                    re.sub(r"\s+", " ", smart_str(x)).strip()
+                )
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem
-        elif (elem != elem) | (elem is None) | (elem == '') | (elem == 0):
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return None
         elif type(elem) == str:
-            return smart_upper(re.sub(r'\s+', ' ', elem).strip())
+            return smart_upper(re.sub(r"\s+", " ", elem).strip())
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize owner's names. Remove all variations of CO. LTD or similar types of suffixes
-# and unionize all "fishery' to "fisheries".
-#
 def standardize_owner(elem, check_field=True):
+    """
+    This module standardizes owner's names which removes all variations of
+    suffix such as CO. LTD or similar types
+    and unionize "fishery' to "fisheries".
+
+    :param elem: Pandas Series, DataFrame, STRING, a data type
+    that contains a string field
+    :param check_field: Boolean, field that contains the given strings
+    :return:
+    """
     if check_field:
         elem = standardize_str(elem, check_field)
 
-        text_to_remove = \
-            ['CO LTD', 'COLTD', 'COMPANY LTD', 'CO LIMITED', 'COMPANY LIMITED', 'CO LIMTED', 'CO LTTD', 'CV LIMITADA',
-             'LTD SA($)', 'LTD S A($)', 'CO SA($)', 'CO S A($)', 'CO AB($)', 'CO A B($)', 'CO PTY LTD($)', 'CO LRD($)',
-             'PTY LIMITED($)', 'PTY LTD($)', 'SA PTY LTD($)', 'CORP LTD($)', 'LTDA EPP($)', 'JOINT STOCK COMPANY($)',
-             'JOINTSTOCK COMPANY($)', 'CORPORATION PTE LTD($)', 'CORPORATION PTE($)', 'CORP PTE($)', 'CORP SA($)',
-             'CORP INC($)', 'CORPORATION($)', 'CORP($)', 'INCORPORATED($)', 'INC($)', 'AP PTE LTD', 'CO PTE LTD',
-             'GMBH CO', 'GMBH($)', 'LTD($)', 'LTDA($)', 'LIMITED($)', 'PTE($)', 'LIMITADA($)', 'LDA($)', 'LLC($)',
-             'COMPANY NV($)', 'COMPANY N V($)', 'COMPANY BV($)', 'COMPANY B V($)', 'CO BV($)', 'CO B V($)', 'CO NV($)',
-             'CO N V($)', 'SA DE CV($)', 'S A DE C V($)', 'SCL DE CV($)', 'S C L DE C V($)', 'SCL($)', 'S C L($)',
-             'S C DE R L($)', 'S R L DE C V($)', 'SAC($)', 'S A C($)', 'EIRL($)', 'E I R L($)', 'SRL($)', 'S R L($)',
-             ' CIA($)', 'EURL($)', '(^)EURL', 'SARL($)', '(^)SARL', 'SNC($)', '(^)SNC', 'SPC($)', '(^)SPC', 'SPA($)',
-             'SAS($)', ' SA($)', ' S A($)', ' SL($)', ' S L($)', ' SC($)', ' S C($)', 'CO WLL($)', 'CO LIB($)',
-             ' AS($)', ' A S($)', 'PJSC($)', 'P JSC($)', 'OJSC($)', 'CJSC($)' 'JSC($)', ' EPP($)', ' CB($)', ' C B($)',
-             ' CA($)', ' C A($)', ' GIE($)', 'KABUSHIKI KAISHA($)', ' KK($)', 'K K($)', ' BV($)', ' B V($)',
-             'YUGEN KAISHA', 'YUGEN', 'KAISHA', 'KAISYA', 'YUGEN KAISYA', 'GYOGYO', 'GYOGYOU', 'GAISHA', ' JU($)',
-             'OOO($)', '(^)OOO', 'CO PVT($)', 'COMPANY PVT($)', ' PT($)', ' P T($)', '(^)PT', ' CC($)',
-             ' CO($)',  'COMPANY($)', ' NV($)', ' N V($)', '^NA($)', '^N A($)', 'RPTD SOLD.*', 'OWNER UNKNOWN*',
-             'CO LT', 'EHF($)', '(^)EHF']
-        text_to_remove = '|'.join(text_to_remove)
+        text_to_remove = [
+            "CO LTD",
+            "COLTD",
+            "COMPANY LTD",
+            "CO LIMITED",
+            "COMPANY LIMITED",
+            "CO LIMTED",
+            "CO LTTD",
+            "CV LIMITADA",
+            "LTD SA($)",
+            "LTD S A($)",
+            "CO SA($)",
+            "CO S A($)",
+            "CO AB($)",
+            "CO A B($)",
+            "CO PTY LTD($)",
+            "CO LRD($)",
+            "PTY LIMITED($)",
+            "PTY LTD($)",
+            "SA PTY LTD($)",
+            "CORP LTD($)",
+            "LTDA EPP($)",
+            "JOINT STOCK COMPANY($)",
+            "JOINTSTOCK COMPANY($)",
+            "CORPORATION PTE LTD($)",
+            "CORPORATION PTE($)",
+            "CORP PTE($)",
+            "CORP SA($)",
+            "CORP INC($)",
+            "CORPORATION($)",
+            "CORP($)",
+            "INCORPORATED($)",
+            "INC($)",
+            "AP PTE LTD",
+            "CO PTE LTD",
+            "GMBH CO",
+            "GMBH($)",
+            "LTD($)",
+            "LTDA($)",
+            "LIMITED($)",
+            "PTE($)",
+            "LIMITADA($)",
+            "LDA($)",
+            "LLC($)",
+            "COMPANY NV($)",
+            "COMPANY N V($)",
+            "COMPANY BV($)",
+            "COMPANY B V($)",
+            "CO BV($)",
+            "CO B V($)",
+            "CO NV($)",
+            "CO N V($)",
+            "SA DE CV($)",
+            "S A DE C V($)",
+            "SCL DE CV($)",
+            "S C L DE C V($)",
+            "SCL($)",
+            "S C L($)",
+            "S C DE R L($)",
+            "S R L DE C V($)",
+            "SAC($)",
+            "S A C($)",
+            "EIRL($)",
+            "E I R L($)",
+            "SRL($)",
+            "S R L($)",
+            " CIA($)",
+            "EURL($)",
+            "(^)EURL",
+            "SARL($)",
+            "(^)SARL",
+            "SNC($)",
+            "(^)SNC",
+            "SPC($)",
+            "(^)SPC",
+            "SPA($)",
+            "SAS($)",
+            " SA($)",
+            " S A($)",
+            " SL($)",
+            " S L($)",
+            " SC($)",
+            " S C($)",
+            "CO WLL($)",
+            "CO LIB($)",
+            " AS($)",
+            " A S($)",
+            "PJSC($)",
+            "P JSC($)",
+            "OJSC($)",
+            "CJSC($)" "JSC($)",
+            " EPP($)",
+            " CB($)",
+            " C B($)",
+            " CA($)",
+            " C A($)",
+            " GIE($)",
+            "KABUSHIKI KAISHA($)",
+            " KK($)",
+            "K K($)",
+            " BV($)",
+            " B V($)",
+            "YUGEN KAISHA",
+            "YUGEN",
+            "KAISHA",
+            "KAISYA",
+            "YUGEN KAISYA",
+            "GYOGYO",
+            "GYOGYOU",
+            "GAISHA",
+            " JU($)",
+            "OOO($)",
+            "(^)OOO",
+            "CO PVT($)",
+            "COMPANY PVT($)",
+            " PT($)",
+            " P T($)",
+            "(^)PT",
+            " CC($)",
+            " CO($)",
+            "COMPANY($)",
+            " NV($)",
+            " N V($)",
+            "^NA($)",
+            "^N A($)",
+            "RPTD SOLD.*",
+            "OWNER UNKNOWN*",
+            "CO LT",
+            "EHF($)",
+            "(^)EHF",
+        ]
+        text_to_remove = "|".join(text_to_remove)
 
         if type(elem) == pd.core.series.Series:
             elem = elem.apply(
-                lambda x: unidecode(re.sub(r'\(.+\)', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"\(.+\)", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: unidecode(re.sub(r'[^\w]+', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"[^\w]+", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: re.sub(text_to_remove, ' ', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(text_to_remove, " ", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem.apply(
-                lambda x: re.sub(r'\s+', ' ', x).strip() if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(r"\s+", " ", x).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             return elem.apply(
-                lambda x: re.sub('FISHERY', 'FISHERIES', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub("FISHERY", "FISHERIES", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             elem = elem[check_field].apply(
-                lambda x: unidecode(re.sub(r'\(.+\)', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"\(.+\)", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: unidecode(re.sub(r'[^\w]+', ' ', x)).strip() if (x == x) & (x != None) & (x != '') else None)
+                lambda x: unidecode(re.sub(r"[^\w]+", " ", x)).strip()
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: re.sub(text_to_remove, ' ', x) if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(text_to_remove, " ", x)
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
             elem = elem[check_field].apply(
-                lambda x: re.sub(r'\s+', ' ', x).strip() if (x == x) & (x != None) * (x != '') else None)
+                lambda x: re.sub(r"\s+", " ", x).strip()
+                if (x == x) & (x is not None) * (x != "")
+                else None
+            )
             return elem[check_field].apply(
-                lambda x: re.sub('FISHERY', 'FISHERIES', x) if (x == x) & (x != None) * (x != '') else None)
-        elif (elem != elem) | (elem == None) | (elem == '') | (elem == 0):
+                lambda x: re.sub("FISHERY", "FISHERIES", x)
+                if (x == x) & (x is not None) * (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
             return np.nan
         elif type(elem) == str:
-            elem = unidecode(re.sub(r'\(.+\)', ' ', elem)).strip()
-            elem = unidecode(re.sub(r'[^\w]+', ' ', elem)).strip()
-            elem = re.sub(text_to_remove, ' ', elem)
-            elem = re.sub(r'\s+', ' ', elem).strip()
-            return re.sub('FISHERY', 'FISHERIES', elem)
+            elem = unidecode(re.sub(r"\(.+\)", " ", elem)).strip()
+            elem = unidecode(re.sub(r"[^\w]+", " ", elem)).strip()
+            elem = re.sub(text_to_remove, " ", elem)
+            elem = re.sub(r"\s+", " ", elem).strip()
+            return re.sub("FISHERY", "FISHERIES", elem)
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize Integer in a form of string
-# because Pandas Series or DataFrame considers
-# a column of integers with Nulls as a column of float
-# Save it as a string column so that it can be uploaded
-# as integer columns when uploading to BigQuery.
-#
 def standardize_int_str(elem, check_field=True):
+    """
+    This module standardizes an integer in the form of string
+    because Pandas Series or DataFrame considers a column of integers
+    with Nulls as a column of float. Save it as a string column so that
+    it can be uploaded as integer columns when uploading to BigQuery.
+
+    :param elem: Pandas Series, DataFrame, STRING, INT, FLOAT, a data type
+    that contains a string field
+    :param check_field: Boolean, field that contains the given strings
+    :return: Same as the input elem type
+    """
     if check_field:
         if type(elem) == pd.core.series.Series:
             return elem.apply(
-                lambda x: str(int(float(re.sub('[^\d\.]', '', str(x)))))
-                if (x == x) & (x is not None) & (x != '') else None)
+                lambda x: str(int(float(re.sub(r"[^\d.]", "", str(x)))))
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
         elif type(elem) == pd.core.frame.DataFrame:
             return elem[check_field].apply(
-                lambda x: str(int(float(re.sub('[^\d\.]', '', str(x)))))
-                if (x == x) & (x is not None) & (x != '') else None)
-        elif (elem != elem) | (elem is None) | (elem == ''):
+                lambda x: str(int(float(re.sub(r"[^\d.]", "", str(x)))))
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == ""):
             return None
         elif (type(elem) == str) | (type(elem) == int) | (type(elem) == float):
-            return str(int(float(re.sub(r'[^\d\.]', '', str(elem)))))
+            return str(int(float(re.sub(r"[^\d.]", "", str(elem)))))
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
 
-#
-# Standardize timestamp
-#
 def standardize_time(elem, check_field=True):
-    if check_field:
-        if type(elem)==pd.core.series.Series:
-            return elem.apply(lambda x: pd.to_datetime(x, errors='coerce') if (x==x)&(x!=None)&(x!='') else None)
-        elif type(elem)==pd.core.frame.DataFrame:
-            return elem[check_field].apply(lambda x: pd.to_datetime(x, errors='coerce') if (x==x)&(x!=None)&(x!='') else None)
-        elif (elem!=elem)|(elem==None)|(elem=='')|(elem==0):
-            return np.nan
-        elif (type(elem)==str)|(type(elem)==pd.Timestamp):
-            return pd.to_datetime(elem, errors='coerce')
-        else:
-            raise ValueError('Unknown type received')
-    else:
-        return None
-
-
-def clean_uvi(x):
-    if (type(x)==float)|(type(x)==int):
-        if (not np.isnan(x))&(x==x)&(x!=None):
-            return str(int(x))
-        else:
-            return np.nan
-    else:
-        return re.sub('\s+', ' ', x).strip().upper()
+    """
+    This modules standardizes a timestamp
 
+    :param elem: Pandas DATAFRAME, SERIES, STRING, a data type containing
+    time stamp information
+    :param check_field: Boolean, whether the field that contains
+    the timestamp information
+    :return: Same type as the elem input
+    """
 
-def standardize_uvi(elem, check_field=True):
     if check_field:
-        if type(elem)==pd.core.series.Series:
-            return elem.apply(lambda x: clean_uvi(x))  
-        elif type(elem)==pd.core.frame.DataFrame:
-            return elem[check_field].apply(lambda x: clean_uvi(x))
-        elif (elem!=elem)|(elem==None)|(elem=='')|(elem==0):
-            return None
-        elif (type(elem)==int)|(type(elem)==float):
-            return str(int(elem))
-        elif type(elem)==str:
-            return re.sub('\s+',' ',elem).strip().upper()
+        if type(elem) == pd.core.series.Series:
+            return elem.apply(
+                lambda x: pd.to_datetime(x, errors="coerce")
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif type(elem) == pd.core.frame.DataFrame:
+            return elem[check_field].apply(
+                lambda x: pd.to_datetime(x, errors="coerce")
+                if (x == x) & (x is not None) & (x != "")
+                else None
+            )
+        elif (elem != elem) | (elem is None) | (elem == "") | (elem == 0):
+            return np.nan
+        elif (type(elem) == str) | (type(elem) == pd.Timestamp):
+            return pd.to_datetime(elem, errors="coerce")
         else:
-            raise ValueError('Unknown type received')
+            raise ValueError("Unknown type received")
     else:
         return None
 
@@ -323,10 +513,10 @@ def standardize_flag(df, field, rules):
     """
     Flag mapping based on YAML mapping file per registry
 
-    :param df:
-    :param field:
-    :param rules:
-    :return:
+    :param df: Pandas DataFrame, a dataframe containing flag information field
+    :param field: STRING, the name of the field containing flag information
+    :param rules: DICT, the YAML mapping rule
+    :return: Pandas Series or STRING
     """
     if field:
         if rules:
@@ -334,17 +524,19 @@ def standardize_flag(df, field, rules):
             # In case it's explicitly "ALL" as an option,
             # returns the preset value
             if "ALL" in rules:
-                return rules['ALL']
+                return rules["ALL"]
             #
             # If it's "SAME" option, use the values in the flag field
-            elif 'SAME' in rules:
+            elif "SAME" in rules:
                 return df[field]
             #
             # iso3 country code - note that all is turned to upper cases
             else:
                 return df[field].apply(
                     lambda x: rules[unidecode(str(x)).strip().upper()]
-                    if (x == x) & (x is not None) & (x != '') else None)
+                    if (x == x) & (x is not None) & (x != "")
+                    else None
+                )
         else:
             return None
     else:
@@ -355,16 +547,17 @@ def standardize_geartype(df, field, rules):
     """
     Geartype mapping  based on YAML mapping file per registry
 
-    :param df:
-    :param field:
-    :param rules:
-    :return:
+    :param df: Pandas DataFrame, a DataFrame containing geartype
+    information field
+    :param field: STRING, the name of the field containing geartype information
+    :param rules: DICT, the YAML mapping rule
+    :return: Pandas Series or STRING
     """
     if field:
         if rules:
-            if 'ALL' in rules:
-                return rules['ALL']
-            elif 'SAME' in rules:
+            if "ALL" in rules:
+                return rules["ALL"]
+            elif "SAME" in rules:
                 return df[field]
             #
             # note that when mapping geartype,
@@ -372,8 +565,40 @@ def standardize_geartype(df, field, rules):
             else:
                 return df[field].apply(
                     lambda x: rules[unidecode(str(x)).strip().lower()]
-                    if (x == x) & (x is not None) & (x != '') else None)
+                    if (x == x) & (x is not None) & (x != "")
+                    else None
+                )
         else:
             return None
     else:
         return None
+
+
+#
+# Below is not used.
+# def clean_uvi(x):
+#     if (type(x) == float) | (type(x) == int):
+#         if (not np.isnan(x)) & (x == x) & (x is not None):
+#             return str(int(x))
+#         else:
+#             return np.nan
+#     else:
+#         return re.sub("\s+", " ", x).strip().upper()
+#
+#
+# def standardize_uvi(elem, check_field=True):
+#     if check_field:
+#         if type(elem) == pd.core.series.Series:
+#             return elem.apply(lambda x: clean_uvi(x))
+#         elif type(elem) == pd.core.frame.DataFrame:
+#             return elem[check_field].apply(lambda x: clean_uvi(x))
+#         elif (elem != elem) | (elem == None) | (elem == "") | (elem == 0):
+#             return None
+#         elif (type(elem) == int) | (type(elem) == float):
+#             return str(int(elem))
+#         elif type(elem) == str:
+#             return re.sub("\s+", " ", elem).strip().upper()
+#         else:
+#             raise ValueError("Unknown type received")
+#     else:
+#         return None
diff --git a/shipdataprocess/__init__.py b/shipdataprocess/__init__.py
index de625df..444767d 100644
--- a/shipdataprocess/__init__.py
+++ b/shipdataprocess/__init__.py
@@ -3,7 +3,7 @@
 """
 
 
-__version__ = "0.7.1"
+__version__ = "0.7.0"
 __author__ = "Jaeyoon Park"
 __email__ = "jaeyoon@globalfishingwatch.org"
 __source__ = "https://github.com/GlobalFishingWatch/shipdataprocess"

From 869fc834dbf05ac12248265b7cd9d07af1b49d7d Mon Sep 17 00:00:00 2001
From: jaeyoonpark <jaeyoon.park13@gmail.com>
Date: Fri, 28 Jan 2022 00:22:45 +0100
Subject: [PATCH 4/4] print bug fixed

---
 CHANGES.md                             | 1 +
 build/lib/shipdataprocess/__init__.py  | 2 +-
 build/lib/shipdataprocess/normalize.py | 9 ++-------
 shipdataprocess/__init__.py            | 2 +-
 shipdataprocess/normalize.py           | 9 ++-------
 5 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 2129f6f..e43ea06 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -39,3 +39,4 @@ v0.6.16, 2020-11-26 -- Make smart_upper() to capture multiple URLs not to capita
 v0.6.17, 2021-07-30 -- Add Indonesian prefix and Chinese HAO
 v0.6.18, 2021-08-04 -- Fix a bug in normalize_callsign() regarding NULL/NONE
 v0.7.0, 2022-01-26 -- Fix it to work only in Python 3.6 or above, codes are compliant with PEP8, dependencies are clearer (Django removed)
+v0.7.1, 2022-01-27 -- Bug fixed
diff --git a/build/lib/shipdataprocess/__init__.py b/build/lib/shipdataprocess/__init__.py
index 444767d..de625df 100644
--- a/build/lib/shipdataprocess/__init__.py
+++ b/build/lib/shipdataprocess/__init__.py
@@ -3,7 +3,7 @@
 """
 
 
-__version__ = "0.7.0"
+__version__ = "0.7.1"
 __author__ = "Jaeyoon Park"
 __email__ = "jaeyoon@globalfishingwatch.org"
 __source__ = "https://github.com/GlobalFishingWatch/shipdataprocess"
diff --git a/build/lib/shipdataprocess/normalize.py b/build/lib/shipdataprocess/normalize.py
index c95a2cd..a374ba7 100644
--- a/build/lib/shipdataprocess/normalize.py
+++ b/build/lib/shipdataprocess/normalize.py
@@ -22,14 +22,9 @@ def normalize_shipname(name):
 
     if (name is None) | (name != name) | (name == ""):
         return None
-    print(name)
+
     #
     # Remove nasty characters and white spaces
-    # try:
-    #     name = unidecode(str(name.decode("utf-8")))
-    # except UnicodeDecodeError:
-    #     name = unidecode(str(name.decode("iso_8859-1")))
-
     if issubclass(type(name), str):
         name = unidecode(name)
     elif isinstance(name, bytes):
@@ -41,7 +36,7 @@ def normalize_shipname(name):
         name = str(name)
     else:
         return None
-    print(name)
+
     #
     # Turn to upper cases
     name = name.upper()
diff --git a/shipdataprocess/__init__.py b/shipdataprocess/__init__.py
index 444767d..de625df 100644
--- a/shipdataprocess/__init__.py
+++ b/shipdataprocess/__init__.py
@@ -3,7 +3,7 @@
 """
 
 
-__version__ = "0.7.0"
+__version__ = "0.7.1"
 __author__ = "Jaeyoon Park"
 __email__ = "jaeyoon@globalfishingwatch.org"
 __source__ = "https://github.com/GlobalFishingWatch/shipdataprocess"
diff --git a/shipdataprocess/normalize.py b/shipdataprocess/normalize.py
index c95a2cd..a374ba7 100644
--- a/shipdataprocess/normalize.py
+++ b/shipdataprocess/normalize.py
@@ -22,14 +22,9 @@ def normalize_shipname(name):
 
     if (name is None) | (name != name) | (name == ""):
         return None
-    print(name)
+
     #
     # Remove nasty characters and white spaces
-    # try:
-    #     name = unidecode(str(name.decode("utf-8")))
-    # except UnicodeDecodeError:
-    #     name = unidecode(str(name.decode("iso_8859-1")))
-
     if issubclass(type(name), str):
         name = unidecode(name)
     elif isinstance(name, bytes):
@@ -41,7 +36,7 @@ def normalize_shipname(name):
         name = str(name)
     else:
         return None
-    print(name)
+
     #
     # Turn to upper cases
     name = name.upper()