From 86fcdc1b49e3fe7fe492cdd56b6a3aeb0cca907a Mon Sep 17 00:00:00 2001
From: Marius Pahl <mpahl@diw.de>
Date: Thu, 12 Sep 2019 15:59:25 +0200
Subject: [PATCH] fix issues

---
 collect_stata/dataset.py    |  6 ++++--
 collect_stata/read_stata.py |  8 ++++----
 collect_stata/write_json.py | 34 ++++++++++++++++++----------------
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/collect_stata/dataset.py b/collect_stata/dataset.py
index 3a47eda..6b5576e 100644
--- a/collect_stata/dataset.py
+++ b/collect_stata/dataset.py
@@ -1,6 +1,8 @@
 """Read stata files and write out json files.
 
-The class Dataset contains two method from external files.
+Read_stata and write_json are imported.
+The class variables dataset and metadata are filled by the
+read in data of read_stata and are used to write out the json file.
 
 Example:
     dataset = Dataset()
@@ -17,7 +19,7 @@ class Dataset:
     """
     Dataset allows the user to read, test and export data in different formats.
 
-    Args:
+    Attributes:
         dataset: Contains the data in tabular format.
         metadata: Contains the metadata in json format.
     """
diff --git a/collect_stata/read_stata.py b/collect_stata/read_stata.py
index f7ca439..6722595 100644
--- a/collect_stata/read_stata.py
+++ b/collect_stata/read_stata.py
@@ -11,7 +11,7 @@
 import pandas as pd
 
 
-def cat_values(varscale: dict, data) -> list:
+def cat_values(varscale: dict, data: pd.io.stata.StataReader) -> list:
     """Extract categorical metadata from stata files.
 
     Args:
@@ -35,7 +35,7 @@ def cat_values(varscale: dict, data) -> list:
     return cat_list
 
 
-def scale_var(varname: str, varscale: dict, datatable) -> str:
+def scale_var(varname: str, varscale: dict, datatable: pd.DataFrame) -> str:
     """Rename types of variables to cat, number and string.
 
     Args:
@@ -59,7 +59,7 @@ def scale_var(varname: str, varscale: dict, datatable) -> str:
     return var_type
 
 
-def generate_tdp(data, stata_name: str):
+def generate_tdp(data: pd.io.stata.StataReader, stata_name: str) -> (pd.DataFrame, dict):
     """Generate tabular data package file.
 
     Args:
@@ -96,7 +96,7 @@ def generate_tdp(data, stata_name: str):
     return datatable, metadata
 
 
-def read_stata(stata_name):
+def read_stata(stata_name: str) -> (pd.DataFrame, dict):
     """Logging and reading stata files.
 
     Args:
diff --git a/collect_stata/write_json.py b/collect_stata/write_json.py
index 32c5f12..16e81a9 100644
--- a/collect_stata/write_json.py
+++ b/collect_stata/write_json.py
@@ -9,7 +9,9 @@
 import pandas as pd
 
 
-def sorting_dataframe(values, labels, missings, frequencies) -> dict:
+def sorting_dataframe(
+    values: list, labels: list, missings: list, frequencies: list
+) -> dict:
     """Function to sort values and labels and return sorted dict.
 
     Args:
@@ -37,7 +39,7 @@ def sorting_dataframe(values, labels, missings, frequencies) -> dict:
     return dataframe.to_dict("list")
 
 
-def uni_cat(elem, data):
+def uni_cat(elem: dict, data: pd.DataFrame) -> dict:
     """Generate dict with frequencies and labels for categorical variables.
 
     Args:
@@ -73,7 +75,7 @@ def uni_cat(elem, data):
     return sorting_dataframe(values, labels, missings, frequencies)
 
 
-def uni_string():
+def uni_string() -> OrderedDict:
     """Generate dict with frequencies for nominal variables.
 
     Returns:
@@ -84,7 +86,7 @@ def uni_string():
     return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[])
 
 
-def uni_number():
+def uni_number() -> OrderedDict:
     """Generate dict with frequencies for numerical variables.
 
     Returns:
@@ -95,7 +97,7 @@ def uni_number():
     return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[])
 
 
-def stats_cat(elem, data):
+def stats_cat(elem: dict, data: pd.DataFrame) -> dict:
     """Generate dict with statistics for categorical variables.
 
     Args:
@@ -115,7 +117,7 @@ def stats_cat(elem, data):
     return {"valid": valid, "invalid": invalid}
 
 
-def stats_string(elem, data):
+def stats_string(elem: dict, data: pd.DataFrame) -> dict:
     """Generate dict with statistics for nominal variables.
 
     Args:
@@ -134,7 +136,7 @@ def stats_string(elem, data):
     return {"valid": int(valid), "invalid": int(invalid)}
 
 
-def stats_number(elem, data):
+def stats_number(elem: dict, data: pd.DataFrame) -> dict:
     """Generate dict with statistics for numerical variables
 
     Args:
@@ -142,7 +144,7 @@ def stats_number(elem, data):
         data (pandas.DataFrame): Datatable of imported data.
 
     Returns:
-        {...} (OrderedDict): Calculations for numerical variables.
+        {...} (dict): Calculations for numerical variables.
     """
 
     data_withoutmissings = data[data[elem["name"]] >= 0][elem["name"]]
@@ -166,7 +168,7 @@ def stats_number(elem, data):
     }
 
 
-def uni_statistics(elem, data):
+def uni_statistics(elem: dict, data: pd.DataFrame) -> dict:
     """Call function to generate statistics depending on the variable type.
 
     Args:
@@ -174,7 +176,7 @@ def uni_statistics(elem, data):
         data (pandas.DataFrame): Datatable of imported data.
 
     Returns:
-        statistics (OrderedDict):
+        statistics (dict):
         Statistics for either categorical, nominal or numerical variables.
     """
 
@@ -196,7 +198,7 @@ def uni_statistics(elem, data):
     return statistics
 
 
-def uni(elem, data):
+def uni(elem: dict, data: pd.DataFrame) -> OrderedDict:
     """Call function to generate frequencies depending on the variable type.
 
     Args:
@@ -221,7 +223,7 @@ def uni(elem, data):
     return statistics
 
 
-def stat_dict(elem, data, metadata, study: str):
+def stat_dict(elem: dict, data: pd.DataFrame, metadata: dict, study: str) -> OrderedDict:
     """Fill variables with metadata of the dataset.
 
     Args:
@@ -231,7 +233,7 @@ def stat_dict(elem, data, metadata, study: str):
         study (str): Name of the study.
 
     Returns:
-        meta_dict (OrderedDict): Combine calculations and meta information.
+        meta_dict (OrderedDict): Combined calculations and meta information.
     """
 
     scale = elem["type"][0:3]
@@ -257,7 +259,7 @@ def stat_dict(elem, data, metadata, study: str):
     return meta_dict
 
 
-def generate_stat(data, metadata, study: str):
+def generate_stat(data: pd.DataFrame, metadata: dict, study: str) -> list:
     """Prepare statistics for every variable.
 
     Args:
@@ -266,7 +268,7 @@ def generate_stat(data, metadata, study: str):
         study (str): Name of the study.
 
     Returns:
-        stat (OrderedDict): Combine calculations and meta information.
+        stat (list): Combine calculations and meta information.
     """
 
     stat = list()
@@ -282,7 +284,7 @@ def generate_stat(data, metadata, study: str):
     return stat
 
 
-def write_json(data, metadata, filename, study=""):
+def write_json(data: pd.DataFrame, metadata: dict, filename: str, study: str):
     """Main function to write json.
 
     Args: