Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 67 - Fix the char as short as int issue. #68

Merged
merged 3 commits into from
Oct 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions extensions/dataframes/RHipoDS.cxx
Original file line number Diff line number Diff line change
@@ -413,6 +413,9 @@ void RHipoDS::ClearData(int slot){
return;
}

for(auto &vecvec: fVecShortData){
vecvec[slot].clear();
}
for(auto &vecvec: fVecIntData){
vecvec[slot].clear();
}
@@ -479,8 +482,10 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){
if( fColumnTypeIsVector[col_index] ){
for(int irow=0; irow < nrows; ++irow) {
switch (fColumnType.at(col_index)) {
case 1: // vector<char>
case 1: // vector<char> -- is upcast to short.
case 2: // vector<short>
fVecShortData.at(data_index).at(slot).push_back( (short) fBanks[bank_index].getShort(fColumnItem[col_index], irow));
break;
case 3: // vector<int>
fVecIntData.at(data_index).at(slot).push_back(fBanks[bank_index].getInt(fColumnItem[col_index], irow));
break;
@@ -503,6 +508,11 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){
switch (fColumnType.at(col_index)) {
case 1: // char
case 2: // short
if(nrows>0){
fShortData.at(data_index).at(slot) = fBanks[bank_index].getShort(fColumnItem[col_index], 0);
} else
fShortData.at(data_index).at(slot) = 0;
break;
case 3: // int
if(nrows>0){
fIntData.at(data_index).at(slot) = fBanks[bank_index].getInt(fColumnItem[col_index], 0);
@@ -569,8 +579,16 @@ std::vector<void *> RHipoDS::GetColumnReadersImpl(std::string_view col_name, con
fActiveColumns.push_back(col_index);
if( fColumnTypeIsVector.at(col_index)) { // VECTORS
switch (fColumnType.at(col_index)) {
case 1: // vector<char>
case 1: // vector<char> is up-cast to vector<short>
case 2: // vector<short>
fVecShortData.emplace_back( fNSlots, std::vector<short>(fNColumnDepth));
fIndexToData.push_back((int)fVecShortData.size()-1);
fColumnPointers.emplace_back(fNSlots);
for(int i=0; i<fNSlots; ++i){
fColumnPointers.back()[i] = (void *)&fVecShortData.back()[i];
ret[i] = &fColumnPointers.back()[i];
}
break;
case 3: // vector<int>
fVecIntData.emplace_back( fNSlots, std::vector<int>(fNColumnDepth));
fIndexToData.push_back((int)fVecIntData.size()-1);
@@ -616,6 +634,14 @@ std::vector<void *> RHipoDS::GetColumnReadersImpl(std::string_view col_name, con
switch (fColumnType.at(col_index)) {
case 1: // char
case 2: // short
fShortData.emplace_back(fNSlots);
fIndexToData.push_back((int)fShortData.size()-1);
fColumnPointers.emplace_back(fNSlots);
for(int i=0; i<fNSlots; ++i){
fColumnPointers.back()[i] = (void *)&fShortData.back()[i];
ret[i] = &fColumnPointers.back()[i];
}
break;
case 3: // int
fIntData.emplace_back(fNSlots);
fIndexToData.push_back((int)fIntData.size()-1);
11 changes: 10 additions & 1 deletion extensions/dataframes/RHipoDS.hxx
Original file line number Diff line number Diff line change
@@ -34,7 +34,12 @@ public:
// };

bool fHipoReadOnlyPhysicsEvents = true;

//
// The following list of type names is actually very important for the proper functioning of the RDataFrame.
// The data itself is stored in a memory block with a type-less pointer to it (void *). This block is interpreted
// according to the types in the list below.
// Note that we are up-casting the char type to short. (In HIPO every getByte, getShort or getInt actually
// returns an int!)
const std::vector<std::string> fgCollTypeNumToString{ // ORDER is important here. C++ so go +1
"zero", "short", "short", "int", "float", "double", "long", "None1", "long"};
std::vector<std::string> fHeaders;
@@ -79,11 +84,15 @@ public:
std::vector< std::vector<void *> > fColumnPointers; // [active_index][slot] - The anonymous store of the pointers to the data.

// The data, one of each type, per slot. data_index = fIndexToData[active_index]
//std::vector< std::vector<char> > fCharData;
std::vector< std::vector<short> > fShortData;
std::vector< std::vector<int> > fIntData; // [data_index][slot] - to integer.
std::vector< std::vector<long> > fLongData;
std::vector< std::vector<float> > fFloatData;
std::vector< std::vector<double> > fDoubleData;

//std::vector< std::vector< std::vector<char> > > fVecCharData;
std::vector< std::vector< std::vector<short> > > fVecShortData;
std::vector< std::vector< std::vector<int> > > fVecIntData; // [data_index][slot] to vector.
std::vector< std::vector< std::vector<long> > > fVecLongData;
std::vector< std::vector< std::vector<float> > > fVecFloatData;
95 changes: 13 additions & 82 deletions extensions/dataframes/test_hipo_ds.cxx
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ std::vector<float> v_abs(std::vector<float> &x, std::vector<float> &y, std::vec
int main(int argc, char **argv) {
// Very simple test of the Hipo DataFrame.
// ROOT::EnableImplicitMT();
int N_open = 100000;
int N_open = 100;
std::chrono::nanoseconds delta_t;

if(argc < 2){
@@ -37,95 +37,26 @@ int main(int argc, char **argv) {
auto cols_ds = ds->GetColumnNames();
bool translated = ds->fColumnNameTranslation;
auto stop = std::chrono::high_resolution_clock::now();
auto total_events = ds->GetEntries();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
printf("Open file in %6.5f ms for %6d events = %6.5f ns/event\n",
delta_t.count()*1.e-6, N_open, double(delta_t.count())/N_open );
printf("Open file in %6.5f ms for %6lu events = %6.5f ns/event\n",
delta_t.count()*1.e-6, total_events, double(delta_t.count())/total_events );

//("/data/CLAS12/data/hipo/rec_clas_016321.evio.00001.hipo");
// auto all_columns = ds->GetColumnNames();
// for(int i=0; i< all_columns.size(); ++i){
// printf("%40s bank id: %4d %s \n", all_columns[i].c_str(), i, ds->fColumnTypeIsVector[i] ? "vector":"scaler" );
// }
auto total_events = ds->GetEntries();

ds->fDebug = 5;
auto df = RDataFrame(std::move(ds));
auto cols_df = df.GetColumnNames();
RInterface<Detail::RDF::RLoopManager, void> df2 = df;
std::string run_config_event = "RUN::config.event";
if(translated){
df2 = df.Alias("px", "REC_Particle_px").Alias("py", "REC_Particle_py").Alias("pz", "REC_Particle_pz").Alias("pid", "REC_Particle_pid").Alias("status","REC_Particle_status");
run_config_event = "RUN_config_event";
}else{
df2 = df.Alias("px", "REC::Particle.px").Alias("py", "REC::Particle.py").Alias("pz", "REC::Particle.pz").Alias("pid", "REC::Particle.pid").Alias("status","REC::Particle.status");
}

auto h_pid=df2.Histo1D({"h_pid","Particle ID",4601,-2300,2300},"pid");
auto h_evt = df2.Histo1D({"h_evt", "Event number", 1000001, 0, 1000000}, run_config_event);
auto h_px = df2.Histo1D({"h_px", "P_x", 1000, 0., 12.},"px");
auto h_py = df2.Histo1D({"h_py", "P_y", 1000, 0., 12.},"py");
auto h_pz = df2.Histo1D({"h_pz", "P_z", 1000, 0., 12.},"pz");

// Lambda function for the absolute of a vector component set.
auto v_abs_l = [](
std::vector<float> &x, std::vector<float> &y, std::vector<float> &z)
{ RVec<double> out;
for(int i=0; i< x.size(); ++i){
out.push_back(sqrt(x[i]*x[i]+y[i]*y[i]+z[i]*z[i]));
};
return out;
};

auto h_p = df2.Define("p",v_abs,{"px","py","pz"}).Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p");
//
// Note that for the following style of DataFrame definitions, you *must* use aliasses. The original names
// of columns in HIPO are incompatible with C++ (or Python or anything really) code direct access to these variables.
//
// auto h_p = df2.Define("p","vector<float> out;for(int i=0; i< px.size(); ++i){out.push_back(sqrt(px[i]*px[i]+py[i]*py[i]+pz[i]*pz[i]));}; return out;").Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p");

TCanvas* c = new TCanvas("c", "Test RHipoDS", 0, 0, 2000, 1000);
c->Divide(2, 1);
c->cd(1);
// First pass through the data
start = std::chrono::high_resolution_clock::now();
h_pid->DrawClone();
stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
double time_ns = double(delta_t.count());
printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9,
time_ns/total_events);

c->cd(2);
start = std::chrono::high_resolution_clock::now();
h_evt->DrawClone();
stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
time_ns = double(delta_t.count());
// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events);

c->Print("demo1.pdf");

c->Clear();
c->Divide(2, 2);
auto disp =
df.Filter("ATOF_tdc_layer.size() > 0")
//.Display({"s","l","c","o","tdc","tot"},50,48);
.Display({"ATOF_tdc_sector", "ATOF_tdc_layer", "ATOF_tdc_component",
"ATOF_tdc_order", "ATOF_tdc_TDC", "ATOF_tdc_ToT"},
5, 48);
disp->Print();

start = std::chrono::high_resolution_clock::now();
auto p1 = c->cd(1);
p1->SetLogy();
h_px->DrawClone();

auto p2 = c->cd(2);
p2->SetLogy();
h_py->DrawClone();

auto p3 = c->cd(3);
p3->SetLogy();
h_pz->DrawClone();

auto p4 = c->cd(4);
p4->SetLogy();
h_p->DrawClone();

stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
time_ns = double(delta_t.count());
// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events);
c->Print("demo2.pdf");
}