Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added example using field selection #228

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions examples/notebooks/compound_type_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import numpy as np\n",
"USE_H5PY=False\n",
"if USE_H5PY:\n",
" import h5py\n",
" filepath = \"./compound.h5\"\n",
"else:\n",
" import h5pyd as h5py\n",
" filepath = \"/home/test_user1/test/compound.h5\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# create a new domain/file\n",
"f = h5py.File(filepath, \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# create a numpy dtype with 260 Fields: \n",
"# A0, A1, A2, ..., Z7, Z8, Z9\n",
"fields = []\n",
"for i in range(26):\n",
" ch1 = chr(ord('A') + i)\n",
" for j in range(10):\n",
" ch2 = chr(ord('0') + j)\n",
" fields.append((ch1+ch2, \"S6\"))\n",
"dt = np.dtype(fields)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<HDF5 dataset \"dset\": shape (10000,), type \"|V1560\">"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create a dataset using the dtype\n",
"NUM_ROWS = 10000\n",
"dset = f.create_dataset(\"dset\", (NUM_ROWS,), dtype=dt)\n",
"dset\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# write some values into the dataset\n",
"arr = np.zeros((NUM_ROWS,), dtype=dt)\n",
"for i in range(NUM_ROWS):\n",
" row = arr[i]\n",
" for name in dt.names:\n",
" row[name] = f\"{i:03d}_{name}\".encode()\n",
"dset[:] = arr[:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get a random set of field names.\n",
"# k controls the max number of names returned\n",
"names = random.choices(dt.names, k=10)\n",
"names = list(set(names))\n",
"names"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 27.1 ms, sys: 16.1 ms, total: 43.2 ms\n",
"Wall time: 93.8 ms\n"
]
},
{
"data": {
"text/plain": [
"array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
" (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
" (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
" ...,\n",
" (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
" (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
" (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
" dtype={'names': ['C4', 'P6', 'V0', 'S8', 'P4', 'B5', 'L1', 'E7'], 'formats': ['S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6', 'S6'], 'offsets': [144, 936, 1260, 1128, 924, 90, 666, 282], 'itemsize': 1560})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get the dataset values and then return the field selection\n",
"%time dset[:][names]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.92 ms, sys: 0 ns, total: 3.92 ms\n",
"Wall time: 20.7 ms\n"
]
},
{
"data": {
"text/plain": [
"array([(b'000_C4', b'000_P6', b'000_V0', b'000_S8', b'000_P4', b'000_B5', b'000_L1', b'000_E7'),\n",
" (b'001_C4', b'001_P6', b'001_V0', b'001_S8', b'001_P4', b'001_B5', b'001_L1', b'001_E7'),\n",
" (b'002_C4', b'002_P6', b'002_V0', b'002_S8', b'002_P4', b'002_B5', b'002_L1', b'002_E7'),\n",
" ...,\n",
" (b'9997_C', b'9997_P', b'9997_V', b'9997_S', b'9997_P', b'9997_B', b'9997_L', b'9997_E'),\n",
" (b'9998_C', b'9998_P', b'9998_V', b'9998_S', b'9998_P', b'9998_B', b'9998_L', b'9998_E'),\n",
" (b'9999_C', b'9999_P', b'9999_V', b'9999_S', b'9999_P', b'9999_B', b'9999_L', b'9999_E')],\n",
" dtype=[('C4', 'S6'), ('P6', 'S6'), ('V0', 'S6'), ('S8', 'S6'), ('P4', 'S6'), ('B5', 'S6'), ('L1', 'S6'), ('E7', 'S6')])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Have HSDS (or HDF5 lib) return just the values for the given set of field names\n",
"# Will return same values as above cell, but should be faster as less data needs \n",
"# to be transferred \n",
"%time dset.fields(names)[:]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hs",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}