Skip to content

Commit

Permalink
Merge pull request #54 from sensein/alistair/refactor_constants
Browse files Browse the repository at this point in the history
tie together the various constants used for parsing the redcap CSV
  • Loading branch information
ibevers authored Jun 12, 2024
2 parents c271df1 + 6c863b7 commit b6351f0
Show file tree
Hide file tree
Showing 6 changed files with 485 additions and 450 deletions.
118 changes: 103 additions & 15 deletions docs/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,16 @@
"# every user has a sessionschema which we can get info for the users from\n",
"qs = dataset.load_questionnaires('sessionschema')\n",
"q_dfs = []\n",
"for subject_id, questionnaire in qs.items():\n",
"for i, questionnaire in enumerate(qs):\n",
" # get the dataframe for this questionnaire\n",
" df = dataset.questionnaire_to_dataframe(questionnaire)\n",
" df['dataframe_number'] = i\n",
" q_dfs.append(df)\n",
" i += 1\n",
"\n",
"# concatenate all the dataframes\n",
"sessionschema_df = pd.concat(q_dfs)\n",
"sessionschema_df = pd.pivot(sessionschema_df, index='record_id', columns='linkId', values='valueString')\n",
"sessionschema_df = pd.pivot(sessionschema_df, index='dataframe_number', columns='linkId', values='valueString')\n",
"sessionschema_df"
]
},
Expand Down Expand Up @@ -108,7 +110,7 @@
"metadata": {},
"outputs": [],
"source": [
"demographics_df = dataset.load_and_pivot_questionnaire('demographics')\n",
"demographics_df = dataset.load_and_pivot_questionnaire('qgenericdemographicsschema')\n",
"demographics_df.head()"
]
},
Expand Down Expand Up @@ -231,14 +233,18 @@
"outputs": [],
"source": [
"session_schema = dataset.load_questionnaires('sessionschema')\n",
"# show the first item\n",
"record_id = list(session_schema.keys())[0]\n",
"\n",
"# Each element is a QuestionnaireResponse, a pydantic object\n",
"# you can serialize it to a python dictionary with .dict()\n",
"# and to a json with .json()\n",
"# otherwise attributes are accessible like any other python object\n",
"print(session_schema[record_id].json(indent=2))"
"questionnaire = session_schema[0]\n",
"\n",
"print(f'FHIR ID: {questionnaire.id}')\n",
"print(f'First item response: {questionnaire.item[0]}')\n",
"print('\\nAbridged questionnaire as JSON:')\n",
"# only print the first 600 characters of the JSON for brevity\n",
"print(questionnaire.json(indent=2)[:660], end='\\n...\\n')"
]
},
{
Expand All @@ -247,7 +253,7 @@
"metadata": {},
"outputs": [],
"source": [
"# helper function which loads in the above as a dataframe\n",
"# helper function which loads in questionnaires ending with a specific schema name as a dataframe\n",
"session_df = dataset.load_and_pivot_questionnaire('sessionschema')\n",
"session_df.head()"
]
Expand All @@ -265,15 +271,14 @@
"metadata": {},
"outputs": [],
"source": [
"session_confounders = dataset.load_questionnaires('confounders')\n",
"# show the first item\n",
"record_id = list(session_confounders.keys())[0]\n",
"session_confounders = dataset.load_questionnaires('qgenericconfoundersschema')\n",
"questionnaire = session_confounders[0]\n",
"\n",
"# Each element is a QuestionnaireResponse, a pydantic object\n",
"# you can serialize it to a python dictionary with .dict()\n",
"# and to a json with .json()\n",
"# otherwise attributes are accessible like any other python object\n",
"print(session_confounders[record_id].json(indent=2))"
"print(questionnaire.json(indent=2))"
]
},
{
Expand Down Expand Up @@ -310,10 +315,7 @@
"outputs": [],
"source": [
"acoustic_tasks = dataset.load_questionnaires('acoustictaskschema')\n",
"acoustic_tasks\n",
"# show the first item\n",
"record_id = list(acoustic_tasks.keys())[0]\n",
"print(acoustic_tasks[record_id].json(indent=2))"
"print(acoustic_tasks[0].json(indent=2))"
]
},
{
Expand Down Expand Up @@ -348,6 +350,92 @@
"source": [
"acoustic_tasks_df['acoustic_task_name'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Audio"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import numpy as np\n",
"from b2aiprep.process import Audio, specgram\n",
"import IPython.display as Ipd\n",
"\n",
"# configuration options\n",
"win_length = 512\n",
"hop_length = 256\n",
"\n",
"base_path = Path('/Users/alistairewj/git/b2aiprep/output/')\n",
"audio_file = base_path.joinpath(\n",
" 'sub-1f9475bb-f13b-4f68-969b-28f20455b3e7',\n",
" 'ses-CB8A74EE-0C8C-4B15-B322-D93A79ADB40A',\n",
" 'audio',\n",
" 'sub-1f9475bb-f13b-4f68-969b-28f20455b3e7_ses-CB8A74EE-0C8C-4B15-B322-D93A79ADB40A_Audio-Check_rec-Audio-Check-1.wav'\n",
")\n",
"audio = Audio.from_file(str(audio_file))\n",
"audio = audio.to_16khz()\n",
"\n",
"# convert to uint32 - probably should use the bits_per_sample from the original metadata!\n",
"signal = audio.signal.squeeze()\n",
"signal = (np.iinfo(np.uint32).max * (signal - signal.min())) / (signal.max() - signal.min())\n",
"# display a widget to play the audio file\n",
"Ipd.display(Ipd.Audio(data=signal, rate=audio.sample_rate))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also calculate a spectrogram of the data to visualize the frequency components over time. A spectrogram is essentially a frequency spectrum repeated `N` times spaced out throughout the original audio."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"win_length = 30\n",
"hop_length = 10\n",
"nfft = 512\n",
"features_specgram = specgram(audio, win_length=win_length, hop_length=hop_length, n_fft=nfft)\n",
"features_specgram = features_specgram.numpy()\n",
"# convert to db\n",
"log_spec = 10.0 * np.log10(np.maximum(features_specgram, 1e-10)).T\n",
"fig, ax = plt.subplots(1, 1)\n",
"ax.set_ylabel('Frequency (Hz)')\n",
"ax.matshow(log_spec, origin=\"lower\", aspect=\"auto\")\n",
"\n",
"xlim = ax.get_xlim()\n",
"xticks = ax.get_xticks()\n",
"xticklabels = [f\"{int(t * hop_length / 1000)}\" for t in xticks]\n",
"ax.set_xticks(xticks)\n",
"ax.set_xticklabels(xticklabels)\n",
"# reset the xlim, which may have been modified by setting the xticks\n",
"ax.set_xlim(xlim)\n",
"ax.set_xlabel('Time (s)')\n",
"\n",
"# y-axis is frequency\n",
"ylim = ax.get_ylim()\n",
"yticks = ax.get_yticks()\n",
"# convert yticks into frequencies\n",
"frequencies = yticks / nfft * audio.sample_rate\n",
"frequencies = [f\"{int(f)}\" for f in frequencies]\n",
"ax.set_yticks(yticks)\n",
"ax.set_yticklabels(frequencies)\n",
"ax.set_ylim(ylim)\n",
"\n",
"# Display the image\n",
"plt.show()\n"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit b6351f0

Please sign in to comment.