Skip to content

Commit

Permalink
fix issues
Browse files Browse the repository at this point in the history
  • Loading branch information
mpahl committed Sep 12, 2019
1 parent b2c0905 commit 86fcdc1
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 22 deletions.
6 changes: 4 additions & 2 deletions collect_stata/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Read stata files and write out json files.
The class Dataset contains two method from external files.
Read_stata and write_json are imported.
The class variables dataset and metadata are filled by the
read in data of read_stata and are used to write out the json file.
Example:
dataset = Dataset()
Expand All @@ -17,7 +19,7 @@ class Dataset:
"""
Dataset allows the user to read, test and export data in different formats.
Args:
Attributes:
dataset: Contains the data in tabular format.
metadata: Contains the metadata in json format.
"""
Expand Down
8 changes: 4 additions & 4 deletions collect_stata/read_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pandas as pd


def cat_values(varscale: dict, data) -> list:
def cat_values(varscale: dict, data: pd.io.stata.StataReader) -> list:
"""Extract categorical metadata from stata files.
Args:
Expand All @@ -35,7 +35,7 @@ def cat_values(varscale: dict, data) -> list:
return cat_list


def scale_var(varname: str, varscale: dict, datatable) -> str:
def scale_var(varname: str, varscale: dict, datatable: pd.DataFrame) -> str:
"""Rename types of variables to cat, number and string.
Args:
Expand All @@ -59,7 +59,7 @@ def scale_var(varname: str, varscale: dict, datatable) -> str:
return var_type


def generate_tdp(data, stata_name: str):
def generate_tdp(data: pd.io.stata.StataReader, stata_name: str) -> (pd.DataFrame, dict):
"""Generate tabular data package file.
Args:
Expand Down Expand Up @@ -96,7 +96,7 @@ def generate_tdp(data, stata_name: str):
return datatable, metadata


def read_stata(stata_name):
def read_stata(stata_name: str) -> (pd.DataFrame, dict):
"""Logging and reading stata files.
Args:
Expand Down
34 changes: 18 additions & 16 deletions collect_stata/write_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import pandas as pd


def sorting_dataframe(values, labels, missings, frequencies) -> dict:
def sorting_dataframe(
values: list, labels: list, missings: list, frequencies: list
) -> dict:
"""Function to sort values and labels and return sorted dict.
Args:
Expand Down Expand Up @@ -37,7 +39,7 @@ def sorting_dataframe(values, labels, missings, frequencies) -> dict:
return dataframe.to_dict("list")


def uni_cat(elem, data):
def uni_cat(elem: dict, data: pd.DataFrame) -> dict:
"""Generate dict with frequencies and labels for categorical variables.
Args:
Expand Down Expand Up @@ -73,7 +75,7 @@ def uni_cat(elem, data):
return sorting_dataframe(values, labels, missings, frequencies)


def uni_string():
def uni_string() -> OrderedDict:
"""Generate dict with frequencies for nominal variables.
Returns:
Expand All @@ -84,7 +86,7 @@ def uni_string():
return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[])


def uni_number():
def uni_number() -> OrderedDict:
"""Generate dict with frequencies for numerical variables.
Returns:
Expand All @@ -95,7 +97,7 @@ def uni_number():
return OrderedDict(frequencies=[], labels=[], labels_de=[], missings=[], values=[])


def stats_cat(elem, data):
def stats_cat(elem: dict, data: pd.DataFrame) -> dict:
"""Generate dict with statistics for categorical variables.
Args:
Expand All @@ -115,7 +117,7 @@ def stats_cat(elem, data):
return {"valid": valid, "invalid": invalid}


def stats_string(elem, data):
def stats_string(elem: dict, data: pd.DataFrame) -> dict:
"""Generate dict with statistics for nominal variables.
Args:
Expand All @@ -134,15 +136,15 @@ def stats_string(elem, data):
return {"valid": int(valid), "invalid": int(invalid)}


def stats_number(elem, data):
def stats_number(elem: dict, data: pd.DataFrame) -> dict:
"""Generate dict with statistics for numerical variables
Args:
elem (dict): Name, label and type of numerical variables.
data (pandas.DataFrame): Datatable of imported data.
Returns:
{...} (OrderedDict): Calculations for numerical variables.
{...} (dict): Calculations for numerical variables.
"""

data_withoutmissings = data[data[elem["name"]] >= 0][elem["name"]]
Expand All @@ -166,15 +168,15 @@ def stats_number(elem, data):
}


def uni_statistics(elem, data):
def uni_statistics(elem: dict, data: pd.DataFrame) -> dict:
"""Call function to generate statistics depending on the variable type.
Args:
elem (dict): Contains information of one variable.
data (pandas.DataFrame): Datatable of imported data.
Returns:
statistics (OrderedDict):
statistics (dict):
Statistics for either categorical, nominal or numerical variables.
"""

Expand All @@ -196,7 +198,7 @@ def uni_statistics(elem, data):
return statistics


def uni(elem, data):
def uni(elem: dict, data: pd.DataFrame) -> OrderedDict:
"""Call function to generate frequencies depending on the variable type.
Args:
Expand All @@ -221,7 +223,7 @@ def uni(elem, data):
return statistics


def stat_dict(elem, data, metadata, study: str):
def stat_dict(elem: dict, data: pd.DataFrame, metadata: dict, study: str) -> OrderedDict:
"""Fill variables with metadata of the dataset.
Args:
Expand All @@ -231,7 +233,7 @@ def stat_dict(elem, data, metadata, study: str):
study (str): Name of the study.
Returns:
meta_dict (OrderedDict): Combine calculations and meta information.
meta_dict (OrderedDict): Combined calculations and meta information.
"""

scale = elem["type"][0:3]
Expand All @@ -257,7 +259,7 @@ def stat_dict(elem, data, metadata, study: str):
return meta_dict


def generate_stat(data, metadata, study: str):
def generate_stat(data: pd.DataFrame, metadata: dict, study: str) -> list:
"""Prepare statistics for every variable.
Args:
Expand All @@ -266,7 +268,7 @@ def generate_stat(data, metadata, study: str):
study (str): Name of the study.
Returns:
stat (OrderedDict): Combine calculations and meta information.
stat (list): Combine calculations and meta information.
"""

stat = list()
Expand All @@ -282,7 +284,7 @@ def generate_stat(data, metadata, study: str):
return stat


def write_json(data, metadata, filename, study=""):
def write_json(data: pd.DataFrame, metadata: dict, filename: str, study: str):
"""Main function to write json.
Args:
Expand Down

0 comments on commit 86fcdc1

Please sign in to comment.