From bd8148360c618a24f60f770caa2509e2dd40697c Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Wed, 3 Apr 2024 07:09:56 +0000 Subject: [PATCH 1/5] feat: add PCA on task and perf --- code/pages/3_Playground.py | 143 +++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 code/pages/3_Playground.py diff --git a/code/pages/3_Playground.py b/code/pages/3_Playground.py new file mode 100644 index 0000000..e25c9e2 --- /dev/null +++ b/code/pages/3_Playground.py @@ -0,0 +1,143 @@ +import s3fs +import streamlit as st +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +import pandas as pd +import numpy as np +import plotly.graph_objects as go +from plotly.subplots import make_subplots +from streamlit_plotly_events import plotly_events + +from util.streamlit import add_session_filter, data_selector + +ss = st.session_state + +fs = s3fs.S3FileSystem(anon=False) +cache_folder = 'aind-behavior-data/foraging_nwb_bonsai_processed/' + +@st.cache_data(ttl=24*3600) +def load_data(tables=['sessions']): + df = {} + for table in tables: + file_name = cache_folder + f'df_{table}.pkl' + if st.session_state.use_s3: + with fs.open(file_name) as f: + df[table + '_bonsai'] = pd.read_pickle(f) + else: + df[table + '_bonsai'] = pd.read_pickle(file_name) + return df + +def app(): + + with st.sidebar: + add_session_filter(if_bonsai=True) + data_selector() + + if not hasattr(ss, 'df'): + st.write('##### Data not loaded yet, start from Home:') + st.page_link('Home.py', label='Home', icon="🏠") + return + + df = load_data()['sessions_bonsai'] + + # -- get cols -- + col_task = [s for s in df.metadata.columns + if not any(ss in s for ss in ['lickspout', 'weight', 'water', 'time', 'rig', + 'user_name', 'experiment', 'task', 'notes'] + ) + ] + + col_perf = [s for s in df.session_stats.columns + if not any(ss in s for ss in ['performance'] + ) + ] + + do_pca(ss.df_session_filtered.loc[:, ['subject_id', 'session'] + col_perf], 'performance') + do_pca(ss.df_session_filtered.loc[:, ['subject_id', 'session'] + col_task], 'task') + + +def do_pca(df, name): + df = df.dropna(axis=0, how='any') + + # Standardize the features + x = StandardScaler().fit_transform(df.drop(columns=['subject_id', 'session'])) + + # Apply PCA + pca = PCA(n_components=10) # Reduce to 2 dimensions for visualization + principalComponents = pca.fit_transform(x) + + # Create a new DataFrame with the principal components + principalDf = pd.DataFrame(data=principalComponents) + principalDf.index = df.set_index(['subject_id', 'session']).index + + principalDf.reset_index(inplace=True) + + # -- trajectory -- + fig = go.Figure() + + for mouse_id in principalDf['subject_id'].unique(): + subset = principalDf[principalDf['subject_id'] == mouse_id] + + # Add a 3D scatter plot for the current group + fig.add_trace(go.Scatter3d( + x=subset[0], + y=subset[1], + z=subset[2], + mode='lines+markers', + marker=dict(size=subset['session'].apply( + lambda x: 5 + 15*(x/20))), + name=f'{mouse_id}', # Name the trace for the legend + )) + + fig.update_layout(title=name, + scene=dict( + xaxis_title='Dim1', + yaxis_title='Dim2', + zaxis_title='Dim3' + ), + width=1300, + height=1000, + font_size=15, + ) + st.plotly_chart(fig) + + # -- variance explained -- + var_explained = pca.explained_variance_ratio_ + fig = go.Figure() + fig.add_trace(go.Scatter( + x=np.arange(1, len(var_explained)+1), + y=np.cumsum(var_explained), + ) + ) + fig.update_layout(title='Variance Explained', + yaxis=dict(range=[0, 1]), + width=300, + height=400, + font_size=15, + ) + st.plotly_chart(fig) + + # -- pca components -- + pca_components = pd.DataFrame(pca.components_, + columns=df.drop(columns=['subject_id', 'session']).columns) + pca_components + fig = make_subplots(rows=3, cols=1) + + # In vertical subplots, each subplot show the components of a principal component + for i in range(3): + fig.add_trace(go.Bar( + x=pca_components.columns, + y=pca_components.loc[i], + name=f'PC{i+1}', + ), row=i+1, col=1) + + fig.update_xaxes(showticklabels=i==2, row=i+1, col=1) + + fig.update_layout(title='PCA weights', + width=1000, + height=800, + font_size=20, + ) + st.plotly_chart(fig) + +app() \ No newline at end of file From 88d5f230d158fb7efe064e44c21a2616673cc765 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Wed, 3 Apr 2024 07:21:47 +0000 Subject: [PATCH 2/5] build: rollback streamlit and aggrid version to resolve an issue --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index cc1a790..99671d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -72,8 +72,8 @@ six==1.16.0 smmap==5.0.0 statannotations==0.5.0 statsmodels==0.13.5 -streamlit==1.32.2 -streamlit-aggrid==1.0.1 +streamlit==1.31.0 +streamlit-aggrid==0.3.5 streamlit-nested-layout==0.1.1 streamlit-plotly-events==0.0.6 streamlit-profiler==0.2.4 From 9359322edd5b53051da76fb5e2de152057676227 Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Wed, 3 Apr 2024 07:21:56 +0000 Subject: [PATCH 3/5] build: update CO Docker --- environment/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/environment/Dockerfile b/environment/Dockerfile index 63e109d..555eb6a 100644 --- a/environment/Dockerfile +++ b/environment/Dockerfile @@ -1,5 +1,6 @@ # hash:sha256:51bda5f40316acb89ad85a82e996448f5a31d6f40b5b443e817e9b346eee2f67 -FROM registry.codeocean.allenneuraldynamics.org/codeocean/jupyterlab:3.0.9-miniconda4.9.2-python3.8-ubuntu20.04 +ARG REGISTRY_HOST +FROM $REGISTRY_HOST/codeocean/jupyterlab:3.6.1-miniconda4.12.0-python3.9-ubuntu20.04 ARG DEBIAN_FRONTEND=noninteractive @@ -65,8 +66,8 @@ RUN pip install -U --no-cache-dir \ semver==2.13.0 \ six==1.16.0 \ smmap==5.0.0 \ - streamlit==1.30.0 \ - streamlit-aggrid==0.3.3 \ + streamlit==1.31.0 \ + streamlit-aggrid==0.3.5 \ streamlit-nested-layout==0.1.1 \ streamlit-plotly-events==0.0.6 \ tenacity==8.1.0 \ @@ -87,7 +88,8 @@ RUN pip install -U --no-cache-dir \ statannotations \ seaborn \ pynwb --ignore-installed ruamel.yaml\ - git+https://github.com/AllenNeuralDynamics/aind-foraging-behavior-bonsai-automatic-training.git@main + git+https://github.com/AllenNeuralDynamics/aind-foraging-behavior-bonsai-automatic-training.git@main\ + pygwalker ADD "https://github.com/coder/code-server/releases/download/v4.9.0/code-server-4.9.0-linux-amd64.tar.gz" /.code-server/code-server.tar.gz From ce8f0e0227f3b918850cbf8d257daf845394eafb Mon Sep 17 00:00:00 2001 From: "houhan@gmail.com" Date: Wed, 3 Apr 2024 07:38:12 +0000 Subject: [PATCH 4/5] build: add scikit-learn --- environment/Dockerfile | 3 ++- requirements.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment/Dockerfile b/environment/Dockerfile index 555eb6a..5f1fd11 100644 --- a/environment/Dockerfile +++ b/environment/Dockerfile @@ -89,7 +89,8 @@ RUN pip install -U --no-cache-dir \ seaborn \ pynwb --ignore-installed ruamel.yaml\ git+https://github.com/AllenNeuralDynamics/aind-foraging-behavior-bonsai-automatic-training.git@main\ - pygwalker + pygwalker \ + scikit-learn==1.4.1 ADD "https://github.com/coder/code-server/releases/download/v4.9.0/code-server-4.9.0-linux-amd64.tar.gz" /.code-server/code-server.tar.gz diff --git a/requirements.txt b/requirements.txt index 99671d4..0f5330d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -66,6 +66,7 @@ ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 s3fs==2022.11.0 scipy==1.10.0 +scikit-learn==1.4.1 seaborn==0.11.2 semver==2.13.0 six==1.16.0 From 1b55d28255601d1663caf9922517646ebb2af254 Mon Sep 17 00:00:00 2001 From: hh_itx_win10 Date: Mon, 8 Apr 2024 00:41:04 -0700 Subject: [PATCH 5/5] build: decrease scikit-learn version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0f5330d..3bbe467 100644 --- a/requirements.txt +++ b/requirements.txt @@ -66,7 +66,7 @@ ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 s3fs==2022.11.0 scipy==1.10.0 -scikit-learn==1.4.1 +scikit-learn==1.3.2 seaborn==0.11.2 semver==2.13.0 six==1.16.0