From 3de86772fe23d2dceca46e92da8616af1be37269 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= <erik@bjareho.lt>
Date: Tue, 23 May 2023 19:12:23 +0200
Subject: [PATCH] fix: more progress on `load_all_df`, added `derived.sleep`,
 finished Whoop sleep loading, fixed location loading, and many other fixes

---
 config.example.toml                    |  13 ++-
 src/quantifiedme/derived/all_df.py     | 137 +++++++++++++++++++++++--
 src/quantifiedme/derived/heartrate.py  |  38 ++++---
 src/quantifiedme/derived/screentime.py |  27 ++++-
 src/quantifiedme/derived/sleep.py      |  58 +++++++++++
 src/quantifiedme/load/fitbit.py        |   4 +
 src/quantifiedme/load/location.py      |  85 +++++++--------
 src/quantifiedme/load/oura.py          |  14 ++-
 src/quantifiedme/load/qslang.py        |  22 ++--
 src/quantifiedme/load/whoop.py         |  43 +++++++-
 tests/test_load.py                     |  34 ++++--
 11 files changed, 380 insertions(+), 95 deletions(-)
 create mode 100644 src/quantifiedme/derived/sleep.py

diff --git a/config.example.toml b/config.example.toml
index dad1e4d..8d3e5aa 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -8,18 +8,17 @@ name = "john"
 date_offset_hours = 5
 
 [data]
-categories= "categories.example.toml"
-habitbull = "~/Downloads/HabitBullData.csv"
-location = "~/location"
-oura = "~/Downloads/oura_2020-02-27T09-07-47.json"
+categories = "~/work/quantifiedme/quantifiedme/categories.example.toml"
+#habitbull = "~/Downloads/HabitBullData.csv"
+#location = "~/location"
+#oura = "~/Downloads/oura_2020-02-27T09-07-47.json"
 
 [data.activitywatch]
 port = 5666
 hostnames = ["fakedata"]
 
-[data.smartertime_buckets]
-example-hostname = '~/data/smartertime/smartertime_export_example-hostname_2020-01-01_bb7f26aa.awbucket.json'
-
+#[data.smartertime_buckets]
+#example-hostname = '~/data/smartertime/smartertime_export_example-hostname_2020-01-01_bb7f26aa.awbucket.json'
 
 [locations]
     [locations.gym]
diff --git a/src/quantifiedme/derived/all_df.py b/src/quantifiedme/derived/all_df.py
index 5be8e3f..a697e84 100644
--- a/src/quantifiedme/derived/all_df.py
+++ b/src/quantifiedme/derived/all_df.py
@@ -1,13 +1,136 @@
+import os
+import logging
+from typing import Literal, TypeAlias
+from datetime import date, datetime, timedelta, timezone
+
+import pandas as pd
+
 from aw_core import Event
-from typing import Literal
 
-from .heartrate import load_heartrate_daily_df
-from .screentime import load_category_df
+from ..load.location import load_daily_df as load_location_daily_df
+from ..load.qslang import load_daily_df as load_drugs_df
+
+from .heartrate import load_heartrate_summary_df
+from .screentime import load_screentime_cached, load_category_df
+from .sleep import load_sleep_df
+
+Sources = Literal["screentime", "heartrate", "drugs", "location", "sleep"]
 
-Sources = Literal["activitywatch", "heartrate"]
 
-def load_all_df(events: list[Event], ignore: list[Sources] = []):
-    df = load_category_df(events)
+def load_all_df(
+    fast=True, screentime_events: list[Event] | None = None, ignore: list[Sources] = []
+) -> pd.DataFrame:
+    """
+    Loads a bunch of data into a single dataframe with one row per day.
+    Serves as a useful starting point for further analysis.
+    """
+    df = pd.DataFrame()
+    since = datetime.now(tz=timezone.utc) - timedelta(days=30 if fast else 2 * 365)
+
+    if "screentime" not in ignore:
+        print("Adding screentime")
+        if screentime_events is None:
+            screentime_events = load_screentime_cached(fast=fast, since=since)
+        df_time = load_category_df(screentime_events)
+        df_time = df_time[["Work", "Media", "ActivityWatch"]]
+        df = join(df, df_time.add_prefix("time:"))
+
     if "heartrate" not in ignore:
-        df = df.join(load_heartrate_daily_df(events))
+        print("Adding heartrate")
+        df_hr = load_heartrate_summary_df(freq="D")
+        # translate daily datetime column to a date column
+        df_hr.index = df_hr.index.date  # type: ignore
+        df = join(df, df_hr)
+
+    if "drugs" not in ignore:
+        print("Adding drugs")
+        # keep only columns starting with "tag"
+        df_drugs = load_drugs_df()
+        df_drugs = df_drugs[df_drugs.columns[df_drugs.columns.str.startswith("tag")]]
+        df = join(df, df_drugs)
+
+    if "location" not in ignore:
+        print("Adding location")
+        # TODO: add boolean for if sleeping together
+        df_location = load_location_daily_df()
+        df_location.index = df_location.index.date  # type: ignore
+        df = join(df, df_location.add_prefix("loc:"))
+
+    if "sleep" not in ignore:
+        df_sleep = load_sleep_df()
+        df = join(df, df_sleep.add_prefix("sleep:"))
+
+    # look for all-na columns, emit a warning, and drop them
+    na_cols = df.columns[df.isna().all()]
+    if len(na_cols) > 0:
+        print(f"Warning: dropping all-NA columns: {str(list(na_cols))}")
+        df = df.drop(columns=na_cols)
+
     return df
+
+
+def join(df_target: pd.DataFrame, df_source: pd.DataFrame) -> pd.DataFrame:
+    if not df_target.empty:
+        check_new_data_in_range(df_source, df_target)
+    print(
+        f"Adding new columns: {str(list(df_source.columns.difference(df_target.columns)))}"
+    )
+    return df_target.join(df_source) if not df_target.empty else df_source
+
+
+DateLike: TypeAlias = datetime | date | pd.Timestamp
+
+
+def datelike_to_date(d: DateLike) -> date:
+    if isinstance(d, datetime) or isinstance(d, pd.Timestamp):
+        return d.date()
+    elif isinstance(d, date):
+        return d
+    else:
+        raise ValueError(f"Invalid type for datelike: {type(d)}")
+
+
+def check_new_data_in_range(df_source: pd.DataFrame, df_target: pd.DataFrame) -> None:
+    # check that source data covers target data, or emit warning
+    source_start = datelike_to_date(df_source.index.min())
+    source_end = datelike_to_date(df_source.index.max())
+    target_start = datelike_to_date(df_target.index.min())
+    target_end = datelike_to_date(df_target.index.max())
+
+    # check the worst case
+    if source_start > target_end or source_end < target_start:
+        print(
+            f"Warning: source data does not cover ANY of target data: ({source_start}/{source_end}) not in ({target_start}/{target_end})"
+        )
+    elif source_start > target_start:
+        print(
+            f"Warning: source data starts after target data (partial): {source_start} > {target_start}"
+        )
+    elif source_end < target_end:
+        print(
+            f"Warning: source data ends before target data (partial): {source_end} < {target_end}"
+        )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+
+    # print a summary of all data
+    df = load_all_df(fast=os.environ.get("FAST", "1") == "1")
+    print(df)
+    print(df.describe())
+
+    # check for missing data
+    df_days_na = df.isna().sum()
+    df_days_na = df_days_na[df_days_na > 0]
+    if len(df_days_na) > 0:
+        print(f"Missing data for {len(df_days_na)} out of {len(df.columns)} columns")
+        print(df_days_na)
+    print("Total days: ", len(df))
+
+    # keep days with full coverage
+    df = df.dropna()
+    print("Total days with full coverage: ", len(df))
+
+    print("Final dataframe:")
+    print(df)
diff --git a/src/quantifiedme/derived/heartrate.py b/src/quantifiedme/derived/heartrate.py
index c1cb17a..64187a3 100644
--- a/src/quantifiedme/derived/heartrate.py
+++ b/src/quantifiedme/derived/heartrate.py
@@ -26,27 +26,37 @@ def load_heartrate_df() -> pd.DataFrame:
     return df
 
 
-def load_heartrate_daily_df(
-    zones={"low": 100, "med": 140, "high": 160}, freq="D"
+def load_heartrate_minutes_df():
+    """We consider using minute-resolution a decent starting point for summary heartrate data.
+    
+    NOTE: ignores source, combines all sources into a single point per freq.
+    """
+    df = load_heartrate_df().drop(columns=["source"])
+    df = df.resample("1min").mean()
+    return df
+
+
+def load_heartrate_summary_df(
+    zones={"resting": 0, "low": 100, "med": 140, "high": 160}, freq="D"
 ) -> pd.DataFrame:
     """
-    Load heartrates, group into day, bin by zone, and return a dataframe.
-    
-    NOTE: Ignores source, combines all sources into a single point per freq.
+    Load heartrates, group into freq, bin by zone, and return a dataframe.
     """
-    source_df = load_heartrate_df().drop(columns=["source"])
+    source_df = load_heartrate_minutes_df()
     df = pd.DataFrame()
-    df["hr"] = source_df["hr"].groupby(pd.Grouper(freq=freq)).mean()
-    df["zone"] = pd.cut(
-        df["hr"], bins=[0, *zones.values(), 300], labels=["resting", *zones.keys()]
+    df["hr_mean"] = source_df["hr"].groupby(pd.Grouper(freq=freq)).mean()
+
+    # compute time spent in each zone
+    df_zones = pd.cut(
+        source_df["hr"], bins=[*zones.values(), 300], labels=[*zones.keys()]
     )
+    for zone in zones.keys():
+        df[f"hr_duration_{zone}"] = df_zones[df_zones == zone].groupby(
+            pd.Grouper(freq=freq)
+        ).count() * pd.Timedelta(minutes=1)
     return df
 
 
 if __name__ == "__main__":
-    df = load_heartrate_df()
-    print(df)
-    print(df.describe())
-
-    df = load_heartrate_daily_df()
+    df = load_heartrate_summary_df()
     print(df)
diff --git a/src/quantifiedme/derived/screentime.py b/src/quantifiedme/derived/screentime.py
index 3c67048..ec5f5ea 100644
--- a/src/quantifiedme/derived/screentime.py
+++ b/src/quantifiedme/derived/screentime.py
@@ -1,3 +1,4 @@
+import pickle
 import logging
 from datetime import datetime, timezone, timedelta
 from pathlib import Path
@@ -37,10 +38,10 @@ def _get_aw_client(testing: bool) -> ActivityWatchClient:
 
 
 def load_screentime(
-    since: datetime | None,
-    datasources: list[DatasourceType] | None,
-    hostnames: list[str] | None,
-    personal: bool,
+    since: datetime | None = None,
+    datasources: list[DatasourceType] | None = None,
+    hostnames: list[str] | None = None,
+    personal: bool = True,
     cache: bool = True,
     awc: ActivityWatchClient | None = None,
 ) -> list[Event]:
@@ -122,6 +123,24 @@ def load_screentime(
 
     return events
 
+def load_screentime_cached(*args, since: datetime | None = None, fast = False, **kwargs) -> list[Event]:
+    # returns screentime from picked cache produced by Dashboard.ipynb (or here)
+    path = Path(__file__).parent.parent.parent.parent / "notebooks" / ("events_fast.pickle" if fast else "events.pickle")
+    if path.exists():
+        print(f"Loading from cache: {path}")
+        with open(path, "rb") as f:
+            events = pickle.load(f)
+        # if fast didn't get us enough data to satisfy the query, we need to load the rest
+        if fast and since and events[-1].timestamp < since:
+            print("Fast couldn't satisfy since, trying again without fast")
+            events = load_screentime_cached(fast=False, **kwargs)
+        # trim according to since
+        if since:
+            events = [e for e in events if e.timestamp >= since]
+        return events
+    else:
+        return load_screentime(*args, **kwargs)
+    
 
 def _join_events(
     old_events: list[Event], new_events: list[Event], source: str
diff --git a/src/quantifiedme/derived/sleep.py b/src/quantifiedme/derived/sleep.py
new file mode 100644
index 0000000..5f7bafa
--- /dev/null
+++ b/src/quantifiedme/derived/sleep.py
@@ -0,0 +1,58 @@
+"""
+Aggregates sleep data from Fitbit, Oura, and Whoop into a single dataframe.
+"""
+
+from datetime import datetime, timedelta, timezone
+
+import pandas as pd
+
+from ..load.fitbit import load_sleep_df as load_fitbit_sleep_df
+from ..load.oura import load_sleep_df as load_oura_sleep_df
+from ..load.whoop import load_sleep_df as load_whoop_sleep_df
+
+
+def load_sleep_df(ignore: list[str] = []) -> pd.DataFrame:
+    """
+    Loads sleep data from Fitbit, Oura, and Whoop into a single dataframe.
+    """
+    df = pd.DataFrame()
+
+    # Fitbit
+    #df = join(df, load_fitbit_sleep_df(), rsuffix="_fitbit")
+
+    # Oura
+    if "oura" not in ignore:
+        df_oura = load_oura_sleep_df()
+        df = join(df, df_oura.add_suffix("_oura"))
+
+    # Whoop
+    if "whoop" not in ignore:
+        df_whoop = load_whoop_sleep_df()
+        df = join(df, df_whoop.add_suffix("_whoop"))
+
+    # perform some aggregations
+    keys = list(set(col.split("_")[0] for col in df.columns) & {"duration", "score"})
+    for key in keys:
+        subkeys = df.columns[df.columns.str.startswith(key)]
+        df[key] = df[subkeys].mean(axis=1)
+    df = df[keys]
+
+    return df
+
+
+def join(df_target, df_source, **kwargs) -> pd.DataFrame:
+    if df_target.empty:
+        return df_source
+    else:
+        return df_target.join(df_source, **kwargs)
+
+
+if __name__ == "__main__":
+    df = load_sleep_df()
+    print(df)
+    """
+    df["duration_whoop"].plot()
+    import matplotlib.pyplot as plt
+
+    plt.show()
+    """
\ No newline at end of file
diff --git a/src/quantifiedme/load/fitbit.py b/src/quantifiedme/load/fitbit.py
index c5a79d7..cf1e400 100644
--- a/src/quantifiedme/load/fitbit.py
+++ b/src/quantifiedme/load/fitbit.py
@@ -6,6 +6,10 @@
 import pandas as pd
 
 
+def load_sleep_df() -> pd.DataFrame:
+    raise NotImplementedError
+
+
 def _load_heartrate_file(filepath):
     # print(f"Loading {filepath}...")
     # json format is {"dateTime": "2020-01-01", "value": {"bpm": 60, "confidence": 0}}
diff --git a/src/quantifiedme/load/location.py b/src/quantifiedme/load/location.py
index 76e14bf..2d6a568 100644
--- a/src/quantifiedme/load/location.py
+++ b/src/quantifiedme/load/location.py
@@ -15,6 +15,42 @@
 memory = joblib.Memory(".cache")
 
 
+@memory.cache
+def load_all_dfs() -> dict[str, pd.DataFrame]:
+    dfs = {}
+    path = str(Path(load_config()["data"]["location"]).expanduser())
+    for filepath in glob.glob(path + "/*.json"):
+        name = Path(filepath).name.replace(".json", "")
+        df = location_history_to_df(filepath)
+        dfs[name] = df
+    return dfs
+
+
+def load_daily_df(whitelist: list[str] | None = None) -> pd.DataFrame:
+    """Returns a daily dataframe with how many hours were spent at each location or with each person."""
+    config = load_config()
+    me = config["me"]["name"]
+    locations = config["locations"]
+
+    df = pd.DataFrame(index=pd.DatetimeIndex([]))
+    dfs = load_all_dfs()
+
+    for location in (whitelist or [*locations.keys(), *dfs.keys()]):
+        if location == me:
+            continue
+        if location in locations:
+            loc = locations[location]
+            df[location] = _proximity_to_location(
+                dfs[me], (loc["lat"], loc["long"]), threshold_radius=loc["accuracy"]
+            )
+        elif location in dfs:
+            df[location] = colocate(dfs[me], dfs[location])
+        else:
+            raise ValueError(f"Unknown location {location}")
+
+    return df
+
+
 def location_history_to_df(fn, use_inferred_loc=False) -> pd.DataFrame:
     print(f"Loading location data from {fn}")
     with open(fn) as f:
@@ -70,18 +106,7 @@ def location_history_to_df(fn, use_inferred_loc=False) -> pd.DataFrame:
     return df
 
 
-@memory.cache
-def load_all_dfs() -> dict[str, pd.DataFrame]:
-    dfs = {}
-    path = str(Path(load_config()["data"]["location"]).expanduser())
-    for filepath in glob.glob(path + "/*.json"):
-        name = Path(filepath).name.replace(".json", "")
-        df = location_history_to_df(filepath)
-        dfs[name] = df
-    return dfs
-
-
-def colocate(df_person1, df_person2, verbose=False):
+def colocate(df_person1, df_person2, verbose=False) -> pd.DataFrame:
     df = df_person1.join(df_person2, lsuffix="_a", rsuffix="_b")
     df["dist"] = (
         (df["lat_a"] - df["lat_b"]) ** 2 + (df["long_a"] - df["long_b"]) ** 2
@@ -131,48 +156,24 @@ def plot_df_duration(df, title, save: str | None = None) -> None:
         plt.show()
 
 
-def main_plot(dfs, me, other, save=None, invert=False):
-    coords = load_config()["locations"]
-
-    df = dfs[me]
-
-    if other in coords:
-        loc = coords[other]
-        df = _proximity_to_location(
-            df, (loc["lat"], loc["long"]), threshold_radius=loc["accuracy"]
-        )
-    else:
-        # df = colocate(dfs[me], dfs[args.other], start=args.start)
-        df_other = dfs[other]
-        df = colocate(df, df_other)
-
-    if invert:
-        df = 24 - df
-
-    # print(df)
-    plot_df_duration(df, other, save)
-
-
 @click.command()
 @click.argument("name")
 @click.option("--start", default=None, type=click.DateTime(), help="query from date")
 @click.option("--save", is_flag=True)
-@click.option("--me", default=None)
 @click.option("--invert", is_flag=True)
 def locate(
     name: str, start: datetime, save: bool, me: str | None, invert: bool
 ) -> None:
     """Plot of when your location was proximate to some location NAME"""
-    if me is None:
-        me = load_config()["me"]["name"]
-
-    dfs = load_all_dfs()
-    df = dfs[me]
-
+    df = load_daily_df()
     if start:
         df = df[start < df.index]
 
-    main_plot(dfs, me, name, invert=invert)
+    if invert:
+        df = 24 - df
+
+    # print(df)
+    plot_df_duration(df, name, "location.png" if save else None)
 
 
 if __name__ == "__main__":
diff --git a/src/quantifiedme/load/oura.py b/src/quantifiedme/load/oura.py
index c58d1f7..bc7bf06 100644
--- a/src/quantifiedme/load/oura.py
+++ b/src/quantifiedme/load/oura.py
@@ -22,8 +22,18 @@ def load_sleep_df() -> pd.DataFrame:
     data = load_data()
     df = pd.DataFrame(data["sleep"])
     df["summary_date"] = pd.to_datetime(df["summary_date"])
-    df = df.set_index("summary_date")
-    return df
+    df = df.rename(columns={"summary_date": "timestamp"})
+    df = df.set_index("timestamp")
+    df["bedtime_start"] = pd.to_datetime(df["bedtime_start"])
+    df["bedtime_end"] = pd.to_datetime(df["bedtime_end"])
+    df = df.rename(
+        columns={
+            "bedtime_start": "start",
+            "bedtime_end": "end",
+        }
+    )
+    df["duration"] = df["end"] - df["start"]
+    return df[["start", "end", "duration", "score"]]
 
 
 def load_readiness_df() -> pd.DataFrame:
diff --git a/src/quantifiedme/load/qslang.py b/src/quantifiedme/load/qslang.py
index d687aa1..13890b7 100644
--- a/src/quantifiedme/load/qslang.py
+++ b/src/quantifiedme/load/qslang.py
@@ -139,19 +139,27 @@ def to_series(
     return series
 
 
-def to_df_daily(events: list[Event]):
+def load_daily_df(events: list[Event] | None = None) -> pd.DataFrame:
     """Returns a daily dataframe"""
+    if events is None:
+        events = load_events()
     df_src = load_df(events)
     df = pd.DataFrame()
+
     tags = {tag for e in events for tag in e.data.get("tags", [])}
-    print(tags)
-    for tag in tags:
-        df[f"tag:{tag}"] = to_series(df_src, tag=tag)
+    series_tags = {
+        f"tag:{tag}": to_series(df_src, tag=tag).replace(np.nan, 0)
+        for tag in tags
+    }
 
     substances = {s for s in df_src["substance"] if s}
-    for subst in substances:
-        colname = subst.lower().replace("-", "").replace(" ", "")
-        df[colname] = to_series(df_src, substance=subst)
+    series_subst = {
+        subst.lower().replace("-", "").replace(" ", ""): to_series(df_src, substance=subst)
+        for subst in substances
+    }
+    df = pd.concat([df, pd.DataFrame(series_tags), pd.DataFrame(series_subst)], axis=1)
+
+    return df
 
 
 def _missing_dates():
diff --git a/src/quantifiedme/load/whoop.py b/src/quantifiedme/load/whoop.py
index a10a5a0..9f15b8d 100644
--- a/src/quantifiedme/load/whoop.py
+++ b/src/quantifiedme/load/whoop.py
@@ -5,6 +5,7 @@
 """
 
 from pathlib import Path
+from datetime import timedelta
 
 import pandas as pd
 
@@ -37,10 +38,48 @@ def load_heartrate_df() -> pd.DataFrame:
     return df
 
 
-def test_load_whoop():
+def load_sleep_df() -> pd.DataFrame:
+    whoop_export_dir = load_config()["data"]["whoop"]
+    filename = Path(whoop_export_dir) / "Health" / "sleeps.csv"
+    df = pd.read_csv(filename.expanduser(), parse_dates=True)
+    import json
+
+    # df columns are: "created_at","updated_at","activity_id","score","quality_duration","latency","max_heart_rate","average_heart_rate","debt_pre","debt_post","need_from_strain","sleep_need","habitual_sleep_need","disturbances","time_in_bed","light_sleep_duration","slow_wave_sleep_duration","rem_sleep_duration","cycles_count","wake_duration","arousal_time","no_data_duration","in_sleep_efficiency","credit_from_naps","hr_baseline","respiratory_rate","sleep_consistency","algo_version","projected_score","projected_sleep","optimal_sleep_times","kilojoules","user_id","during","timezone_offset","survey_response_id","percent_recorded","auto_detected","state","responded","team_act_id","source","is_significant","is_normal","is_nap"
+    # we are interested in the "during" column, which is a JSON string of a 2-tuple with isoformat timestamps
+    def parse_during(x):
+        try:
+            return eval(x.replace(")", "]"))
+        except:
+            print(x)
+            raise
+    df["start"] = pd.to_datetime(df["during"].apply(lambda x: parse_during(x)[0]))
+    df["end"] = pd.to_datetime(df["during"].apply(lambda x: parse_during(x)[1]))
+    df["duration"] = df["end"] - df["start"]
+
+    # keep only the columns we want
+    df = df[["start", "end", "duration", "score"]]
+
+    # set index and sort
+    offset = timedelta(hours=8)
+    df = df.set_index(pd.DatetimeIndex(df["start"] - offset).date)  # type: ignore
+    df = df.sort_index()
+
+    # rename index to timestamp
+    df.index.name = "timestamp"
+
+    return df
+
+
+def test_load_whoop_heartrate():
     df = load_heartrate_df()
     print(df.head())
 
 
+def test_load_whoop_sleep():
+    df = load_sleep_df()
+    print(df.head())
+
+
 if __name__ == "__main__":
-    test_load_whoop()
+    test_load_whoop_sleep()
+    test_load_whoop_heartrate()
diff --git a/tests/test_load.py b/tests/test_load.py
index a619219..45dd492 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -22,13 +22,17 @@
 from quantifiedme.derived.all_df import load_all_df
 from quantifiedme.derived.screentime import classify
 
+from qslang import Event as QSEvent
+
 now = datetime.now(tz=timezone.utc)
 
+
 @pytest.fixture(scope="session", autouse=True)
 def setup():
-    pd.set_option('display.max_colwidth', None)
-    pd.set_option('display.max_columns', None)
-    #pd.set_option('display.max_rows', None)
+    pd.set_option("display.max_colwidth", None)
+    pd.set_option("display.max_columns", None)
+    # pd.set_option('display.max_rows', None)
+
 
 def load_example_events() -> list[Event]:
     events_cached_fast = Path("notebooks/events_fast.pickle")
@@ -51,11 +55,10 @@ def test_load_example_events():
 
 def test_load_all_df():
     events = load_example_events()
-    df = load_all_df(events, ignore=["heartrate"])
+    df = load_all_df(events, ignore=["heartrate", "location", "sleep"])
     print(df)
 
 
-
 @pytest.mark.skipif(not has_config(), reason="no config available for test data")
 def test_load_qslang():
     df = load_df()
@@ -76,7 +79,9 @@ def test_load_qslang():
         assert (10e-6 <= series_nonzero).all()
 
         # Less than 500mg
-        assert (series_nonzero <= 500e-6).all(), series_nonzero[series_nonzero >= 500e-6]
+        assert (series_nonzero <= 500e-6).all(), series_nonzero[
+            series_nonzero >= 500e-6
+        ]
 
     for subst in ["Phenibut"]:
         series = to_series(df, substance=subst)
@@ -95,11 +100,20 @@ def test_load_qslang():
 
 
 def test_qslang_unknown_dose():
-    from qslang import Event as QSEvent
     events = [
-        QSEvent(timestamp=now, type="dose", data={"substance": "Caffeine", "amount": "?g"}),
-        QSEvent(timestamp=now, type="dose", data={"substance": "Caffeine", "amount": "100mg"}),
-        QSEvent(timestamp=now, type="dose", data={"substance": "Caffeine", "amount": "200mg"}),
+        QSEvent(
+            timestamp=now, type="dose", data={"substance": "Caffeine", "amount": "?g"}
+        ),
+        QSEvent(
+            timestamp=now,
+            type="dose",
+            data={"substance": "Caffeine", "amount": "100mg"},
+        ),
+        QSEvent(
+            timestamp=now,
+            type="dose",
+            data={"substance": "Caffeine", "amount": "200mg"},
+        ),
     ]
     df = load_df(events)
     assert 0.00015 == df.iloc[0]["dose"]