From 86fcdc1b49e3fe7fe492cdd56b6a3aeb0cca907a Mon Sep 17 00:00:00 2001 From: Marius Pahl Date: Thu, 12 Sep 2019 15:59:25 +0200 Subject: [PATCH] fix issues --- collect_stata/dataset.py | 6 ++++-- collect_stata/read_stata.py | 8 ++++---- collect_stata/write_json.py | 34 ++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/collect_stata/dataset.py b/collect_stata/dataset.py index 3a47eda..6b5576e 100644 --- a/collect_stata/dataset.py +++ b/collect_stata/dataset.py @@ -1,6 +1,8 @@ """Read stata files and write out json files. -The class Dataset contains two method from external files. +Read_stata and write_json are imported. +The class variables dataset and metadata are filled by the +read in data of read_stata and are used to write out the json file. Example: dataset = Dataset() @@ -17,7 +19,7 @@ class Dataset: """ Dataset allows the user to read, test and export data in different formats. - Args: + Attributes: dataset: Contains the data in tabular format. metadata: Contains the metadata in json format. """ diff --git a/collect_stata/read_stata.py b/collect_stata/read_stata.py index f7ca439..6722595 100644 --- a/collect_stata/read_stata.py +++ b/collect_stata/read_stata.py @@ -11,7 +11,7 @@ import pandas as pd -def cat_values(varscale: dict, data) -> list: +def cat_values(varscale: dict, data: pd.io.stata.StataReader) -> list: """Extract categorical metadata from stata files. Args: @@ -35,7 +35,7 @@ def cat_values(varscale: dict, data) -> list: return cat_list -def scale_var(varname: str, varscale: dict, datatable) -> str: +def scale_var(varname: str, varscale: dict, datatable: pd.DataFrame) -> str: """Rename types of variables to cat, number and string. Args: @@ -59,7 +59,7 @@ def scale_var(varname: str, varscale: dict, datatable) -> str: return var_type -def generate_tdp(data, stata_name: str): +def generate_tdp(data: pd.io.stata.StataReader, stata_name: str) -> (pd.DataFrame, dict): """Generate tabular data package file. Args: @@ -96,7 +96,7 @@ def generate_tdp(data, stata_name: str): return datatable, metadata -def read_stata(stata_name): +def read_stata(stata_name: str) -> (pd.DataFrame, dict): """Logging and reading stata files. Args: diff --git a/collect_stata/write_json.py b/collect_stata/write_json.py index 32c5f12..16e81a9 100644 --- a/collect_stata/write_json.py +++ b/collect_stata/write_json.py @@ -9,7 +9,9 @@ import pandas as pd -def sorting_dataframe(values, labels, missings, frequencies) -> dict: +def sorting_dataframe( + values: list, labels: list, missings: list, frequencies: list +) -> dict: """Function to sort values and labels and return sorted dict. Args: @@ -37,7 +39,7 @@ def sorting_dataframe(values, labels, missings, frequencies) -> dict: return dataframe.to_dict("list") -def uni_cat(elem, data): +def uni_cat(elem: dict, data: pd.DataFrame) -> dict: """Generate dict with frequencies and labels for categorical variables. Args: @@ -73,7 +75,7 @@ def uni_cat(elem, data): return sorting_dataframe(values, labels, missings, frequencies) -def uni_string(): +def uni_string() -> OrderedDict: """Generate dict with frequencies for nominal variables. Returns: @@ -84,7 +86,7 @@ def uni_string(): return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[]) -def uni_number(): +def uni_number() -> OrderedDict: """Generate dict with frequencies for numerical variables. Returns: @@ -95,7 +97,7 @@ def uni_number(): return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[]) -def stats_cat(elem, data): +def stats_cat(elem: dict, data: pd.DataFrame) -> dict: """Generate dict with statistics for categorical variables. Args: @@ -115,7 +117,7 @@ def stats_cat(elem, data): return {"valid": valid, "invalid": invalid} -def stats_string(elem, data): +def stats_string(elem: dict, data: pd.DataFrame) -> dict: """Generate dict with statistics for nominal variables. Args: @@ -134,7 +136,7 @@ def stats_string(elem, data): return {"valid": int(valid), "invalid": int(invalid)} -def stats_number(elem, data): +def stats_number(elem: dict, data: pd.DataFrame) -> dict: """Generate dict with statistics for numerical variables Args: @@ -142,7 +144,7 @@ def stats_number(elem, data): data (pandas.DataFrame): Datatable of imported data. Returns: - {...} (OrderedDict): Calculations for numerical variables. + {...} (dict): Calculations for numerical variables. """ data_withoutmissings = data[data[elem["name"]] >= 0][elem["name"]] @@ -166,7 +168,7 @@ def stats_number(elem, data): } -def uni_statistics(elem, data): +def uni_statistics(elem: dict, data: pd.DataFrame) -> dict: """Call function to generate statistics depending on the variable type. Args: @@ -174,7 +176,7 @@ def uni_statistics(elem, data): data (pandas.DataFrame): Datatable of imported data. Returns: - statistics (OrderedDict): + statistics (dict): Statistics for either categorical, nominal or numerical variables. """ @@ -196,7 +198,7 @@ def uni_statistics(elem, data): return statistics -def uni(elem, data): +def uni(elem: dict, data: pd.DataFrame) -> OrderedDict: """Call function to generate frequencies depending on the variable type. Args: @@ -221,7 +223,7 @@ def uni(elem, data): return statistics -def stat_dict(elem, data, metadata, study: str): +def stat_dict(elem: dict, data: pd.DataFrame, metadata: dict, study: str) -> OrderedDict: """Fill variables with metadata of the dataset. Args: @@ -231,7 +233,7 @@ def stat_dict(elem, data, metadata, study: str): study (str): Name of the study. Returns: - meta_dict (OrderedDict): Combine calculations and meta information. + meta_dict (OrderedDict): Combined calculations and meta information. """ scale = elem["type"][0:3] @@ -257,7 +259,7 @@ def stat_dict(elem, data, metadata, study: str): return meta_dict -def generate_stat(data, metadata, study: str): +def generate_stat(data: pd.DataFrame, metadata: dict, study: str) -> list: """Prepare statistics for every variable. Args: @@ -266,7 +268,7 @@ def generate_stat(data, metadata, study: str): study (str): Name of the study. Returns: - stat (OrderedDict): Combine calculations and meta information. + stat (list): Combine calculations and meta information. """ stat = list() @@ -282,7 +284,7 @@ def generate_stat(data, metadata, study: str): return stat -def write_json(data, metadata, filename, study=""): +def write_json(data: pd.DataFrame, metadata: dict, filename: str, study: str): """Main function to write json. Args: