From 4942264c0bc14a0b5655cc60f785b7c74ed5924c Mon Sep 17 00:00:00 2001
From: Torben <59419684+entorb@users.noreply.github.com>
Date: Sun, 15 Dec 2024 11:42:58 +0100
Subject: [PATCH] Streamlit improved

---
 README.md                                 |   4 +-
 cspell-words.txt                          |   1 +
 cspell.config.yaml                        |   3 +-
 src/app.py                                |  89 +++++++++---
 src/{api-v1 => deprecated}/1fetch_v1.py   |   0
 src/{api-v1 => deprecated}/2analyze_v1.py |   0
 src/{fetch_v2.py => fetch.py}             |  10 +-
 src/{analyze_v2.py => report.py}          | 159 +++++++++++++---------
 8 files changed, 172 insertions(+), 94 deletions(-)
 rename src/{api-v1 => deprecated}/1fetch_v1.py (100%)
 rename src/{api-v1 => deprecated}/2analyze_v1.py (100%)
 rename src/{fetch_v2.py => fetch.py} (81%)
 rename src/{analyze_v2.py => report.py} (71%)

diff --git a/README.md b/README.md
index 188251c..d4a02cf 100644
--- a/README.md
+++ b/README.md
@@ -8,8 +8,8 @@
 ## Run
 
 * set the start date in [config.toml](src/config.toml)
-* [fetch_v2.py](src/fetch_v2.py): download your Oura data to [data/](data/)
-* [analyze_v2.py](src/analyze_v2.py): analyze your Oura data
+* [fetch.py](src/fetch.py): download your Oura data to [data/](data/)
+* [report.py](src/report.py): analyze your Oura data
 * [app.py](src/app.py): Streamlit visualization
 
 ## Results
diff --git a/cspell-words.txt b/cspell-words.txt
index 6add028..10afd4c 100644
--- a/cspell-words.txt
+++ b/cspell-words.txt
@@ -2,6 +2,7 @@
 analize
 autoflake
 autoupdate
+DataFrame
 dayofweek
 figsize
 hypnogram
diff --git a/cspell.config.yaml b/cspell.config.yaml
index 2bd4d5e..ef1fb54 100644
--- a/cspell.config.yaml
+++ b/cspell.config.yaml
@@ -13,6 +13,5 @@ ignorePaths:
   - ".*"
   - "dist"
   - "build"
-  - "node_modules"
   - "requirements.txt"
-  - "api-v1"
+  - "src/deprecated"
diff --git a/src/app.py b/src/app.py
index c441dbc..c62a812 100644
--- a/src/app.py
+++ b/src/app.py
@@ -7,48 +7,70 @@
 from pathlib import Path
 
 import altair as alt
+import pandas as pd
 import streamlit as st
 
-from analyze_v2 import prep_data_sleep
+from report import prep_data_sleep
 
 st.set_page_config(page_title="Oura Sleep Report", page_icon=None, layout="wide")
 st.title("Oura Sleep Report")
 
 with (Path("src/config.toml")).open("rb") as f:
     config = tomllib.load(f)
-date_start_default = (dt.datetime.now(tz=dt.UTC) - dt.timedelta(days=21)).date()
+date_start_default = (dt.datetime.now(tz=dt.UTC) - dt.timedelta(weeks=4)).date()
 
 
 df = prep_data_sleep()
 df = df.reset_index()
-df = df.drop(columns=["id"])
+df = df.drop(columns=["bedtime_end", "bedtime_start", "sleep_phase_5_min"])
 df = df.sort_values("day", ascending=False)
 
 
-col1, col2, col3 = st.columns(3)
+col1, col2, col3, col4 = st.columns(4)
 
 sel_start_date = col1.date_input(
     "Start", value=date_start_default, format=config["date_format"]
 )
 if sel_start_date:
-    df = df.query(f"day >= '{sel_start_date}'")
+    df = df.query("day >= @sel_start_date")
+
+st.columns(1)
+
+d = {}
+for prop in ("score", "start of sleep", "end of sleep", "HR average", "HRV average"):
+    d[prop] = {}
+    d[prop]["week even"] = df.query("week_even == True")[prop].mean().round(1)  # type: ignore
+    d[prop]["week uneven"] = df.query("week_even == False")[prop].mean().round(1)  # type: ignore
+    lst = [0, 1, 2, 3, 4]
+    d[prop]["weekdays"] = df.query("dayofweek == @lst")[prop].mean().round(1)  # type: ignore
+    lst = [5, 6]
+    d[prop]["weekend"] = df.query("dayofweek == @lst")[prop].mean().round(1)  # type: ignore
+# st.write(d)
+df2 = pd.DataFrame.from_dict(d)
+st.write(df2.transpose())
 
-sel_weekend = col2.selectbox("Week or Weekend", options=("Su-Th", "Fr-Sa"), index=None)
+
+col1, col2, col3 = st.columns(3)
+sel_week_even = col1.selectbox("Week even.", options=("Even", "Uneven"), index=None)
+if sel_week_even:
+    if sel_week_even == "Even":
+        df = df.query("week_even == True")
+    elif sel_week_even == "Uneven":
+        df = df.query("week_even == False")
+
+sel_weekend = col2.selectbox("Weekend", options=("Weekday", "Weekend"), index=None)
 if sel_weekend:
-    if sel_weekend == "Su-Th":
+    if sel_weekend == "Weekday":  # Su-Th
         lst = [0, 1, 2, 3, 4]
-    elif sel_weekend == "Fr-Sa":
+    elif sel_weekend == "Weekend":  # Fr-Sa
         lst = [5, 6]
-
     df = df.query("dayofweek == @lst")
 
 
-# 0 -> Sunday
-sel_weekday = col3.selectbox(
-    "Weekday", options=("0", "1", "2", "3", "4", "5", "6"), index=None
-)
+day_map = {"Su": 0, "Mo": 1, "Tu": 2, "Wed": 3, "Th": 4, "Fr": 5, "Sa": 6}
+sel_weekday = col3.selectbox("Weekday", options=day_map.keys(), index=None)
 if sel_weekday:
-    df = df.query(f"dayofweek == '{sel_weekday}'")
+    df = df.query(f"dayofweek == {day_map[sel_weekday]}")
 
 
 # c = st.line_chart(data=df, x="day", y="duration of sleep", x_label=None)
@@ -65,17 +87,20 @@
 base = alt.Chart(df).encode(alt.X("day", title=None))
 
 
-for prop in ("duration of sleep", "HR average", "HRV average"):
-    c = base.mark_line().encode(
-        y=alt.Y(prop),
+for prop in ("score", "start of sleep", "sleep total h", "HR average", "HRV average"):
+    ymin = df[prop].min()
+    ymax = df[prop].max()
+    c = base.mark_point(size=100).encode(
+        y=alt.Y(prop, scale=alt.Scale(domain=[ymin, ymax])),
     )
     cr = c.transform_regression("day", prop).mark_line(color="grey", strokeDash=[4, 4])
-    layers = alt.layer(c, cr)  # .resolve_scale(y="independent")
+    cl = c.mark_line()
+    layers = alt.layer(c, cr, cl)  # .resolve_scale(y="independent")
     st.altair_chart(layers, use_container_width=True)  # type: ignore
 
 
 c1 = base.mark_line().encode(
-    y=alt.Y("duration of sleep"),
+    y=alt.Y("sleep total h"),
 )
 c2 = base.mark_line(color="red").encode(
     y=alt.Y("HRV average"),
@@ -84,7 +109,7 @@
 st.altair_chart(layers, use_container_width=True)  # type: ignore
 
 c1 = base.mark_line().encode(
-    y=alt.Y("duration of sleep"),
+    y=alt.Y("sleep total h"),
 )
 c2 = base.mark_line(color="red").encode(
     y=alt.Y("HR average"),
@@ -94,6 +119,30 @@
 
 
 st.columns(1)
+
+st.subheader("relevant data")
+st.dataframe(
+    data=df,
+    hide_index=True,
+    column_config={"day": st.column_config.DateColumn(format=config["date_format"])},
+    column_order=[
+        "day",
+        "start of sleep",
+        "end of sleep",
+        "score",
+        "HR mini",
+        "HR average",
+        "HRV average",
+        "time in bed h",
+        "sleep total h",
+        "sleep rem h",
+        "sleep deep h",
+        "temperature_deviation",
+    ],
+)
+
+
+st.subheader("all data")
 st.dataframe(
     data=df,
     hide_index=True,
diff --git a/src/api-v1/1fetch_v1.py b/src/deprecated/1fetch_v1.py
similarity index 100%
rename from src/api-v1/1fetch_v1.py
rename to src/deprecated/1fetch_v1.py
diff --git a/src/api-v1/2analyze_v1.py b/src/deprecated/2analyze_v1.py
similarity index 100%
rename from src/api-v1/2analyze_v1.py
rename to src/deprecated/2analyze_v1.py
diff --git a/src/fetch_v2.py b/src/fetch.py
similarity index 81%
rename from src/fetch_v2.py
rename to src/fetch.py
index 14fb8de..ab7f7b8 100644
--- a/src/fetch_v2.py
+++ b/src/fetch.py
@@ -33,8 +33,6 @@ def fetch_data_summaries() -> None:
     """
     for data_summary_set in ("sleep",):  # , "activity", "readiness"
         print(f"fetching {data_summary_set} data")
-        # url = "https://api.ouraring.com/v1/sleep"
-        # -> last week
         url = f"https://api.ouraring.com/v2/usercollection/{data_summary_set}?start_date={config['date_start']}"
         # start=YYYY-MM-DD
         # end=YYYY-MM-DD
@@ -51,13 +49,7 @@ def fetch_data_summaries() -> None:
             print(f"Error fetching {data_summary_set} data: {e}")
             continue
 
-        # Write raw data to file
-        raw_data_path = Path(f"data/data_raw_{data_summary_set}.json")
-        with raw_data_path.open(mode="w", encoding="utf-8", newline="\n") as fh:
-            fh.write(cont)
-
-        # Write formatted data to file
-        formatted_data_path = Path(f"data/data_formatted_{data_summary_set}.json")
+        formatted_data_path = Path(f"data/data_{data_summary_set}.json")
         with formatted_data_path.open(mode="w", encoding="utf-8", newline="\n") as fh:
             d = json.loads(cont)
             json.dump(d, fh, ensure_ascii=False, sort_keys=False, indent=True)
diff --git a/src/analyze_v2.py b/src/report.py
similarity index 71%
rename from src/analyze_v2.py
rename to src/report.py
index aeacc03..f4e799c 100644
--- a/src/analyze_v2.py
+++ b/src/report.py
@@ -39,96 +39,130 @@
 # '4' = awake
 
 
+def get_readiness_data(d: dict, key: str) -> int | None:
+    """Extract score and temperature_deviation from readiness subsection."""
+    res = None
+    if d and key in d:
+        value = d.get(key)
+        if value:
+            if key == "score":
+                res = int(value)
+            if key == "temperature_deviation":
+                res = round(value, 1)
+    return res
+
+
 def prep_data_sleep() -> pd.DataFrame:
     """
     Prepare sleep data.
     """
-    with Path("data/data_raw_sleep.json").open(encoding="utf-8") as fh:
-        d_json = json.load(fh)
-    d_json = d_json["data"]  # drop first level
+    with Path("data/data_sleep.json").open(encoding="utf-8") as fh:
+        d1 = json.load(fh)
+    d1 = d1["data"]  # drop first level
 
-    df = pd.DataFrame.from_dict(d_json)
+    df = pd.DataFrame.from_dict(d1)
 
     # filter on sleep period=0
     # df = df[df["period"] == 0]
     # better:
     # filter on >4h sleep
-    df = df[df["time_in_bed"] > 4 * 3600]
-
-    # remove 5min-interval time series
-    df = df.drop(columns=["heart_rate", "hrv", "movement_30_sec"])
-
-    # DateTime parsing
-    df["day"] = pd.to_datetime(df["day"])  # , format="ISO8601"
+    df = df.query(f"time_in_bed > {4 * 3600}")
+    # to prevent SettingWithCopyWarning
+    df = df.copy()
 
     # converting "bedtime_start": "2021-12-30T23:38:05+01:00"
     #  to datetime without timezone (=localtime)
     for col in ("bedtime_end", "bedtime_start"):
-        # # V1: proper approach using tz_convert(None)
-        # df[col] = pd.to_datetime(df[col], format="%Y-%m-%dT%H:%M:%S%z")
-        # df[col] = df[col].dt.tz_convert(None)
-        # throws: AttributeError: Can only use .dt accessor with datetime-like values.
-        #  Did you mean: 'at'?
-
-        # V2: simple removing the timezone offset
-        # Remove the timezone information by replacing the "+01:00", "+02:00", "-02:00",
-        #  etc. with an empty string
-        df[col] = df[col].str.replace(r"[+\-]\d{2}:\d{2}.*$", "", regex=True)
+        df[col] = pd.to_datetime(df[col], format="ISO8601", utc=True)
+        df[col] = df[col].dt.tz_convert(tz="Europe/Berlin")
+        df[col] = df[col].dt.tz_localize(None)
 
-        # Parse the datetime column without timezone information
-        df[col] = pd.to_datetime(df[col], format="%Y-%m-%dT%H:%M:%S")
-        # note: now without the timezone %z info: format="%Y-%m-%dT%H:%M:%S%z"
-
-    df["dayofweek"] = df["day"].dt.dayofweek
-
-    # set date as index
-    df = df.set_index(["day"])
+    # remove 5min-interval time series
+    df = df.drop(columns=["heart_rate", "hrv", "movement_30_sec"])
 
+    # export original data as csv
     df.to_csv(
         path_or_buf="data/data_sleep_orig.tsv",
         sep="\t",
         lineterminator="\n",
     )
 
-    # Adding/calculating some data fields
-
-    df["REM sleep %"] = df["rem_sleep_duration"] / df["total_sleep_duration"] * 100
-    df["deep sleep %"] = df["deep_sleep_duration"] / df["total_sleep_duration"] * 100
-    df["light sleep %"] = df["light_sleep_duration"] / df["total_sleep_duration"] * 100
+    # flatten readiness sub section
+    df["score"] = df["readiness"].apply(lambda x: get_readiness_data(x, key="score"))
+    df["temperature_deviation"] = df["readiness"].apply(
+        lambda x: get_readiness_data(x, key="temperature_deviation")
+    )
 
-    # calc start of sleep as seconds since start of day -> decimal hours
-    df["start of sleep"] = (
-        df["bedtime_start"]
-        - df.index
-        + pd.Timedelta(days=1)  # 1 day offset, since bedtime starts on the prev day
-    ).dt.total_seconds() / 3600
+    # drop more columns
+    df = df.drop(columns=["id", "sleep_algorithm_version", "readiness"])
 
-    df["duration of sleep"] = df["total_sleep_duration"] / 3600
+    # DateTime parsing
+    df["day"] = pd.to_datetime(df["day"], format="%Y-%m-%d")  # , format="ISO8601"
+    df["dayofweek"] = df["day"].dt.dayofweek
+    df["week_no"] = df["day"].apply(lambda x: x.isocalendar()[1])
+    df["week_even"] = df["week_no"].apply(lambda x: x % 2 == 0)
 
-    df["efficiency %"] = df["efficiency"] * 100
+    #
+    # Adding/calculating some data fields
+    #
+    df["REM sleep %"] = (
+        df["rem_sleep_duration"] / df["total_sleep_duration"] * 100
+    ).round(1)
+    df["deep sleep %"] = (
+        df["deep_sleep_duration"] / df["total_sleep_duration"] * 100
+    ).round(1)
+    df["light sleep %"] = (
+        df["light_sleep_duration"] / df["total_sleep_duration"] * 100
+    ).round(1)
 
-    df["time to fall asleep"] = df["latency"] / 60
+    # calc start of sleep as seconds since start of day -> decimal hours
+    df["start of sleep"] = (
+        (
+            df["bedtime_start"]
+            - df["day"]
+            + pd.Timedelta(days=1)  # 1 day offset, since bedtime starts on the prev day
+        ).dt.total_seconds()
+        / 3600
+    ).round(1)
+    df["end of sleep"] = (
+        df["bedtime_end"].dt.hour + df["bedtime_end"].dt.minute / 60
+    ).round(1)
 
     # df["time to fall asleep"].where(df["time to fall asleep"]
     #                                 > 100, 100, inplace=True)
 
-    df["time awake"] = df["awake_time"] / 60
-
-    df = df.drop(
-        columns=[
-            "total_sleep_duration",
-            "efficiency",
-            "latency",
-            "awake_time",
-        ],
-    )
+    # round to 1 digit
+    for col in ("average_heart_rate", "average_breath"):
+        df[col] = df[col].round(1)
+
+    # sec to min
+    for col in ("latency",):
+        df[col] = (df[col] / 60).round(1)
+
+    # sec to hour
+    for col in (
+        "deep_sleep_duration",
+        "light_sleep_duration",
+        "rem_sleep_duration",
+        "total_sleep_duration",
+        "time_in_bed",
+        "awake_time",
+    ):
+        df[col] = (df[col] / 3600).round(1)
 
     # rename some columns
     df = df.rename(
         columns={
             "average_hrv": "HRV average",
             "average_heart_rate": "HR average",
-            "lowest_heart_rate": "HR min",
+            "lowest_heart_rate": "HR mini",
+            "deep_sleep_duration": "sleep deep h",
+            "light_sleep_duration": "sleep light h",
+            "rem_sleep_duration": "sleep rem h",
+            "total_sleep_duration": "sleep total h",
+            "time_in_bed": "time in bed h",
+            "awake_time": "time awake h",
+            "latency": "latency min",
         },
     )
 
@@ -137,6 +171,9 @@ def prep_data_sleep() -> pd.DataFrame:
         sep="\t",
         lineterminator="\n",
     )
+    # set date as index
+    df["day"] = df["day"].dt.date
+    df = df.set_index(["day"])
     return df
 
 
@@ -307,13 +344,13 @@ def plot_it(
     df = prep_data_sleep()
 
     interesting_properties = (
-        "duration of sleep",
+        "sleep total h",
         "HR average",
-        "HR min",
+        "HR mini",
         "HRV average",
-        "time to fall asleep",
-        "time awake",
-        "efficiency %",
+        "latency min",
+        "time awake h",
+        "efficiency",
         "REM sleep %",
         "deep sleep %",
         "light sleep %",
@@ -342,7 +379,7 @@ def plot_it(
 
     # 2. analize influence of sleep duration
 
-    was = "duration of sleep"
+    was = "sleep total h"
 
     d_results, l_corr_pos, l_corr_neg = correlation_tester(
         df=df,
@@ -357,7 +394,7 @@ def plot_it(
     fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))
     axes = df.plot.scatter(
         x="start of sleep",
-        y="HR min",
+        y="HR mini",
         c="dayofweek",
         colormap="viridis",
     )