Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

correctly serve data for ndarrays #9

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,12 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# macos
.DS_Store

# IDEs
.vscode/

# calley
testing_home_csv/
219 changes: 152 additions & 67 deletions csv_hapireader.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,166 @@
""" CSV ingest program from original hapi-server.py code

Good for CSV files in a file tree
"""
CSV ingest program from original hapi-server.py code.

Part of the HAPI Python Server. The code and documentation resides at:
Part of the HAPI Python Server. The code and documentation resides at:
https://github.com/hapi-server/server-python

See accompanying 'csv_config.py' file for site-specific details.
Assumes data is flat files in a directory hierarchy of
"data/[id]/YYYY/[id].YYYYMMDD.csv
See accompanying 'csv_config.py' file for site-specific details.

Assumes data is flat files in a directory hierarchy of:
"data/[id]/YYYY/[id].YYYYMMDD.csv
"""

import os
import json
import math
import os
from pathlib import Path

import dateutil


def do_parameters_map(id: str, floc: dict, parameters: list[str]) -> dict:
"""Maps requested parameter indices to their respective column indices in a CSV file.

Reads metadata from a JSON info file to determine CSV column mappings for each parameter.
Returns a dict where each requested parameter is mapped to its corresponding column indices.
Multi-column parameters (e.g., arrays) are represented by consecutive indices. Ensures that
time (index 0) is always included in the output as the first entry.

Parameters
----------
id : str
Dataset identifier for locating the JSON info file.
floc : dict
Dictionary with a 'dir' key pointing to the base directory of the info file.
parameters : list of str
List of parameter names to include in the mapping.

Returns
-------
dict
A dictionary where keys are indices of requested parameters, and values are lists of
their respective column indices in the CSV.
"""
ff = Path(floc["dir"]) / "info" / f"{id}.json"
with open(ff, "r", encoding="utf-8") as fin:
jset = json.loads(fin.read())
pp = [item["name"] for item in jset["parameters"]]

def do_parameters_map( id, floc, parameters ):
ff= floc['dir'] + '/info/' + id + '.json'
fin=open(ff,'r')
jset = json.loads(fin.read())
fin.close()
pp = [item['name'] for item in jset['parameters']]
result= list (map( pp.index, parameters ))
if ( result[0]!=0 ):
result.insert(0,0)
return result


def do_data_csv( id, timemin, timemax, parameters, catalog, floc,
stream_flag, stream):
import dateutil
ff= floc['dir'] + '/data/' + id + '/'
filemin= dateutil.parser.parse( timemin ).strftime('%Y%m%d')
filemax= dateutil.parser.parse( timemax ).strftime('%Y%m%d')
timemin= dateutil.parser.parse( timemin ).strftime('%Y-%m-%dT%H:%M:%S')
timemax= dateutil.parser.parse( timemax ).strftime('%Y-%m-%dT%H:%M:%S')
yrmin= int( timemin[0:4] )
yrmax= int( timemax[0:4] )
if ( parameters!=None ):
mm= do_parameters_map( id, floc, parameters )
curr_col = 0
param_dict = {}
for idx, param_name in enumerate(pp):
# Calculate the number of columns used for this parameter
size = jset["parameters"][idx].get("size")
if size:
ncols = math.prod(size)
col_indices = list(range(curr_col, curr_col + ncols))
curr_col += ncols
else:
col_indices = [curr_col]
curr_col += 1

# Filter requested parameters
if param_name in parameters:
param_dict[idx] = col_indices

# Ensure time (index 0) is always present and first
if param_dict.get(0) != 0:
param_dict = {0: [0]} | param_dict

return param_dict


def do_data_csv(
id: str,
timemin: str,
timemax: str,
parameters: list[str],
catalog,
floc: dict,
stream_flag: bool,
stream,
) -> tuple[int, str]:
"""
Retrieves and filters CSV time-series data within a specified date range and parameter list.

Parses CSV files located within a dataset directory to extract data for specified parameters
and a time range. If streaming is enabled, writes the data incrementally to the stream object.
Returns a tuple with a status code and the final data string.

Parameters:
----------
id : str
Dataset identifier for locating data files.
timemin : str
Start time for filtering records.
timemax : str
End time for filtering records.
parameters : list[str]
List of parameter names to retrieve from the data files.
catalog : _
Not used
floc : dict
Dictionary with a 'dir' key pointing to the base directory of the info file.
stream_flag : bool
If True, enables data streaming to `stream`.
stream : object
Stream object for data output.

Returns:
-------
tuple[int, str]
Status code (1200 if data found, 1201 if no data for time range) and the collected data
string.
"""
ff = floc["dir"] + "/data/" + id + "/"
filemin = dateutil.parser.parse(timemin).strftime("%Y%m%d")
filemax = dateutil.parser.parse(timemax).strftime("%Y%m%d")
timemin = dateutil.parser.parse(timemin).strftime("%Y-%m-%dT%H:%M:%S")
timemax = dateutil.parser.parse(timemax).strftime("%Y-%m-%dT%H:%M:%S")
yrmin = int(timemin[0:4])
yrmax = int(timemax[0:4])

if parameters is not None:
mm = do_parameters_map(id, floc, parameters)
else:
mm= None
mm = None

datastr = ""
status = 0

for yr in range(yrmin,yrmax+1):
ffyr= ff + '%04d' % yr
if ( not os.path.exists(ffyr) ): continue
files= sorted( os.listdir( ffyr ) )
for yr in range(yrmin, yrmax + 1):
ffyr = ff + f"{yr:04d}"
if not os.path.exists(ffyr):
continue
files = sorted(os.listdir(ffyr))
for file in files:
ymd= file[-12:-4]
if ( filemin<=ymd and ymd<=filemax ):
for rec in open( ffyr + '/' + file ):
ydmhms= rec[0:19]
if ( timemin<=ydmhms and ydmhms<timemax ):
if ( mm!=None ):
ss= rec.split(',')
comma= False
for i in mm:
if comma:
datastr += ','
datastr += ss[i]
comma=True
if mm[-1]<(len(ss)-1):
datastr += '\n'
else:
datastr += rec

if len(datastr) > 0: status=1200
if stream_flag:
# write then flush
stream.wfile.write(bytes(datastr,"utf-8"))
datastr = ""


#s.wfile.write(bytes(datastr,"utf-8"))
ymd = file[-12:-4]
if filemin <= ymd <= filemax:
with open(Path(ffyr) / file, "r", encoding="utf-8") as f:
for rec in f:
ydmhms = rec[0:19]
if timemin <= ydmhms < timemax:
if mm is not None:
ss = rec.split(",")
comma = False
for i in mm:
for li in mm[i]:
if comma:
datastr += ","
datastr += ss[li]
comma = True
if list(mm.values())[-1][-1] < (len(ss) - 1):
datastr += "\n"
else:
datastr += rec

if len(datastr) > 0:
status = 1200
if stream_flag:
# Write then flush
stream.wfile.write(bytes(datastr, "utf-8"))
datastr = ""

if status != 1200 and len(datastr) == 0:
status=1201 # status 1200 is HAPI "OK- no data for time range"
status = 1201 # status 1200 is HAPI "OK- no data for time range"

#print("Debug: datastr is ",datastr);
return(status,datastr)
return status, datastr
4 changes: 4 additions & 0 deletions home_csv/catalog.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
{
"id": "cputemp",
"title": "Temperature of CPU and GPU of Raspberry PI"
},
{
"id": "spacecraft-spk-ck",
"title": "Parker Solar Probe Ephemeris and Orientation Data"
}
],
"status": {
Expand Down
Loading