From ea2a73e03930d2c070402a592fc24436403b0ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Wed, 8 Nov 2023 12:39:54 +0100 Subject: [PATCH] fix: fixed pandas typing --- src/quantifiedme/load/eeg.py | 6 +++--- src/quantifiedme/load/google_activity.py | 12 ++++++------ src/quantifiedme/load/location.py | 11 ++++++----- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/quantifiedme/load/eeg.py b/src/quantifiedme/load/eeg.py index e8fc65d..4f16a71 100644 --- a/src/quantifiedme/load/eeg.py +++ b/src/quantifiedme/load/eeg.py @@ -6,8 +6,8 @@ We will also load signal quality data, to help us filter out bad data. """ -from pathlib import Path from collections import defaultdict +from pathlib import Path import pandas as pd @@ -33,7 +33,7 @@ def load_data(): for timestamp, files in filesets.items(): result = load_session(files) # pprint(result) - df = df.append(result, ignore_index=True) + df = pd.concat([df, pd.DataFrame(result)], ignore_index=True) return df @@ -58,7 +58,7 @@ def load_session(files: dict) -> dict: # we have to deal with the channels, such as CP3_alpha, CP3_beta, etc. # for now, we will just average them all together channels, bands = zip(*[c.split("_") for c in df_pbb.columns]) - channels, bands = list(set(channels)), list(set(bands)) + channels, bands = tuple(set(channels)), tuple(set(bands)) df = pd.DataFrame(index=df_pbb.index) for band in bands: diff --git a/src/quantifiedme/load/google_activity.py b/src/quantifiedme/load/google_activity.py index 9ab0a65..e0df510 100644 --- a/src/quantifiedme/load/google_activity.py +++ b/src/quantifiedme/load/google_activity.py @@ -1,6 +1,8 @@ import json -import pandas as pd +from pathlib import Path +import matplotlib.pyplot as plt +import pandas as pd from quantifiedme.config import load_config @@ -11,11 +13,11 @@ def load_activity_history() -> pd.DataFrame: Specifically the search history, for now. """ config = load_config() - activity_file = config["data"]["google_takeout"]["activity"] + activity_file = Path(config["data"]["google_takeout"]["activity"]).expanduser() with open(activity_file) as f: activity = pd.DataFrame(json.load(f)) - activity["time"] = pd.to_datetime(activity["time"]) + activity["time"] = pd.to_datetime(activity["time"], format="ISO8601") # set the index to the time activity = activity.set_index("time") return activity @@ -28,8 +30,6 @@ def load_activity_history() -> pd.DataFrame: print(activity[:10][["title", "titleUrl"]]) print(f"Length: {len(activity)}") - import matplotlib.pyplot as plt - # plot a histogram with count of serches by hour of day # activity["hour"] = activity.index.hour # activity["hour"].hist(bins=24) @@ -41,6 +41,6 @@ def load_activity_history() -> pd.DataFrame: # plt.show() # now plot by year-month - activity["year-month"] = activity.index.strftime("%Y-%m") + activity["year-month"] = activity.index.map(lambda dt: dt.strftime("%Y-%m")) activity["year-month"].hist(bins=12 * 11) plt.show() diff --git a/src/quantifiedme/load/location.py b/src/quantifiedme/load/location.py index 8e7f012..1b7f28e 100644 --- a/src/quantifiedme/load/location.py +++ b/src/quantifiedme/load/location.py @@ -4,6 +4,7 @@ from pathlib import Path import click +import numpy as np import pandas as pd from matplotlib import pyplot as plt from tqdm import tqdm @@ -126,12 +127,12 @@ def _proximity_to_location( lat, lon = loc dist = ((df["lat"] - lat) ** 2 + (df["long"] - lon) ** 2) ** 0.5 dist = dist[dist < threshold_radius] - dist = pd.DataFrame(dist, columns=["dist"]) - dist["duration"] = 10 / 60 - dist = dist.resample("24H").apply({"duration": "sum"}) + df_dist = pd.DataFrame(dist, columns=["dist"]) + df_dist["duration"] = 10 / 60 + df_dist = df_dist.resample("24H").apply({"duration": np.sum}) # type: ignore if verbose: - print(dist) - return dist["duration"] + print(df_dist) + return df_dist["duration"] def plot_df_duration(df, title, save: str | None = None) -> None: