Skip to content

Commit

Permalink
Merge pull request #68 from mholtrop/maurik_iss_67
Browse files Browse the repository at this point in the history
Issue 67 - Fix the char as short as int issue.
  • Loading branch information
gavalian authored Oct 29, 2024
2 parents a98b98f + cff2ca2 commit 61ecdd8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 85 deletions.
30 changes: 28 additions & 2 deletions extensions/dataframes/RHipoDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,9 @@ void RHipoDS::ClearData(int slot){
return;
}

for(auto &vecvec: fVecShortData){
vecvec[slot].clear();
}
for(auto &vecvec: fVecIntData){
vecvec[slot].clear();
}
Expand Down Expand Up @@ -479,8 +482,10 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){
if( fColumnTypeIsVector[col_index] ){
for(int irow=0; irow < nrows; ++irow) {
switch (fColumnType.at(col_index)) {
case 1: // vector<char>
case 1: // vector<char> -- is upcast to short.
case 2: // vector<short>
fVecShortData.at(data_index).at(slot).push_back( (short) fBanks[bank_index].getShort(fColumnItem[col_index], irow));
break;
case 3: // vector<int>
fVecIntData.at(data_index).at(slot).push_back(fBanks[bank_index].getInt(fColumnItem[col_index], irow));
break;
Expand All @@ -503,6 +508,11 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){
switch (fColumnType.at(col_index)) {
case 1: // char
case 2: // short
if(nrows>0){
fShortData.at(data_index).at(slot) = fBanks[bank_index].getShort(fColumnItem[col_index], 0);
} else
fShortData.at(data_index).at(slot) = 0;
break;
case 3: // int
if(nrows>0){
fIntData.at(data_index).at(slot) = fBanks[bank_index].getInt(fColumnItem[col_index], 0);
Expand Down Expand Up @@ -569,8 +579,16 @@ std::vector<void *> RHipoDS::GetColumnReadersImpl(std::string_view col_name, con
fActiveColumns.push_back(col_index);
if( fColumnTypeIsVector.at(col_index)) { // VECTORS
switch (fColumnType.at(col_index)) {
case 1: // vector<char>
case 1: // vector<char> is up-cast to vector<short>
case 2: // vector<short>
fVecShortData.emplace_back( fNSlots, std::vector<short>(fNColumnDepth));
fIndexToData.push_back((int)fVecShortData.size()-1);
fColumnPointers.emplace_back(fNSlots);
for(int i=0; i<fNSlots; ++i){
fColumnPointers.back()[i] = (void *)&fVecShortData.back()[i];
ret[i] = &fColumnPointers.back()[i];
}
break;
case 3: // vector<int>
fVecIntData.emplace_back( fNSlots, std::vector<int>(fNColumnDepth));
fIndexToData.push_back((int)fVecIntData.size()-1);
Expand Down Expand Up @@ -616,6 +634,14 @@ std::vector<void *> RHipoDS::GetColumnReadersImpl(std::string_view col_name, con
switch (fColumnType.at(col_index)) {
case 1: // char
case 2: // short
fShortData.emplace_back(fNSlots);
fIndexToData.push_back((int)fShortData.size()-1);
fColumnPointers.emplace_back(fNSlots);
for(int i=0; i<fNSlots; ++i){
fColumnPointers.back()[i] = (void *)&fShortData.back()[i];
ret[i] = &fColumnPointers.back()[i];
}
break;
case 3: // int
fIntData.emplace_back(fNSlots);
fIndexToData.push_back((int)fIntData.size()-1);
Expand Down
11 changes: 10 additions & 1 deletion extensions/dataframes/RHipoDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ public:
// };

bool fHipoReadOnlyPhysicsEvents = true;

//
// The following list of type names is actually very important for the proper functioning of the RDataFrame.
// The data itself is stored in a memory block with a type-less pointer to it (void *). This block is interpreted
// according to the types in the list below.
// Note that we are up-casting the char type to short. (In HIPO every getByte, getShort or getInt actually
// returns an int!)
const std::vector<std::string> fgCollTypeNumToString{ // ORDER is important here. C++ so go +1
"zero", "short", "short", "int", "float", "double", "long", "None1", "long"};
std::vector<std::string> fHeaders;
Expand Down Expand Up @@ -79,11 +84,15 @@ public:
std::vector< std::vector<void *> > fColumnPointers; // [active_index][slot] - The anonymous store of the pointers to the data.

// The data, one of each type, per slot. data_index = fIndexToData[active_index]
//std::vector< std::vector<char> > fCharData;
std::vector< std::vector<short> > fShortData;
std::vector< std::vector<int> > fIntData; // [data_index][slot] - to integer.
std::vector< std::vector<long> > fLongData;
std::vector< std::vector<float> > fFloatData;
std::vector< std::vector<double> > fDoubleData;

//std::vector< std::vector< std::vector<char> > > fVecCharData;
std::vector< std::vector< std::vector<short> > > fVecShortData;
std::vector< std::vector< std::vector<int> > > fVecIntData; // [data_index][slot] to vector.
std::vector< std::vector< std::vector<long> > > fVecLongData;
std::vector< std::vector< std::vector<float> > > fVecFloatData;
Expand Down
95 changes: 13 additions & 82 deletions extensions/dataframes/test_hipo_ds.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ std::vector<float> v_abs(std::vector<float> &x, std::vector<float> &y, std::vec
int main(int argc, char **argv) {
// Very simple test of the Hipo DataFrame.
// ROOT::EnableImplicitMT();
int N_open = 100000;
int N_open = 100;
std::chrono::nanoseconds delta_t;

if(argc < 2){
Expand All @@ -37,95 +37,26 @@ int main(int argc, char **argv) {
auto cols_ds = ds->GetColumnNames();
bool translated = ds->fColumnNameTranslation;
auto stop = std::chrono::high_resolution_clock::now();
auto total_events = ds->GetEntries();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
printf("Open file in %6.5f ms for %6d events = %6.5f ns/event\n",
delta_t.count()*1.e-6, N_open, double(delta_t.count())/N_open );
printf("Open file in %6.5f ms for %6lu events = %6.5f ns/event\n",
delta_t.count()*1.e-6, total_events, double(delta_t.count())/total_events );

//("/data/CLAS12/data/hipo/rec_clas_016321.evio.00001.hipo");
// auto all_columns = ds->GetColumnNames();
// for(int i=0; i< all_columns.size(); ++i){
// printf("%40s bank id: %4d %s \n", all_columns[i].c_str(), i, ds->fColumnTypeIsVector[i] ? "vector":"scaler" );
// }
auto total_events = ds->GetEntries();

ds->fDebug = 5;
auto df = RDataFrame(std::move(ds));
auto cols_df = df.GetColumnNames();
RInterface<Detail::RDF::RLoopManager, void> df2 = df;
std::string run_config_event = "RUN::config.event";
if(translated){
df2 = df.Alias("px", "REC_Particle_px").Alias("py", "REC_Particle_py").Alias("pz", "REC_Particle_pz").Alias("pid", "REC_Particle_pid").Alias("status","REC_Particle_status");
run_config_event = "RUN_config_event";
}else{
df2 = df.Alias("px", "REC::Particle.px").Alias("py", "REC::Particle.py").Alias("pz", "REC::Particle.pz").Alias("pid", "REC::Particle.pid").Alias("status","REC::Particle.status");
}

auto h_pid=df2.Histo1D({"h_pid","Particle ID",4601,-2300,2300},"pid");
auto h_evt = df2.Histo1D({"h_evt", "Event number", 1000001, 0, 1000000}, run_config_event);
auto h_px = df2.Histo1D({"h_px", "P_x", 1000, 0., 12.},"px");
auto h_py = df2.Histo1D({"h_py", "P_y", 1000, 0., 12.},"py");
auto h_pz = df2.Histo1D({"h_pz", "P_z", 1000, 0., 12.},"pz");

// Lambda function for the absolute of a vector component set.
auto v_abs_l = [](
std::vector<float> &x, std::vector<float> &y, std::vector<float> &z)
{ RVec<double> out;
for(int i=0; i< x.size(); ++i){
out.push_back(sqrt(x[i]*x[i]+y[i]*y[i]+z[i]*z[i]));
};
return out;
};

auto h_p = df2.Define("p",v_abs,{"px","py","pz"}).Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p");
//
// Note that for the following style of DataFrame definitions, you *must* use aliasses. The original names
// of columns in HIPO are incompatible with C++ (or Python or anything really) code direct access to these variables.
//
// auto h_p = df2.Define("p","vector<float> out;for(int i=0; i< px.size(); ++i){out.push_back(sqrt(px[i]*px[i]+py[i]*py[i]+pz[i]*pz[i]));}; return out;").Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p");

TCanvas* c = new TCanvas("c", "Test RHipoDS", 0, 0, 2000, 1000);
c->Divide(2, 1);
c->cd(1);
// First pass through the data
start = std::chrono::high_resolution_clock::now();
h_pid->DrawClone();
stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
double time_ns = double(delta_t.count());
printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9,
time_ns/total_events);

c->cd(2);
start = std::chrono::high_resolution_clock::now();
h_evt->DrawClone();
stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
time_ns = double(delta_t.count());
// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events);

c->Print("demo1.pdf");

c->Clear();
c->Divide(2, 2);
auto disp =
df.Filter("ATOF_tdc_layer.size() > 0")
//.Display({"s","l","c","o","tdc","tot"},50,48);
.Display({"ATOF_tdc_sector", "ATOF_tdc_layer", "ATOF_tdc_component",
"ATOF_tdc_order", "ATOF_tdc_TDC", "ATOF_tdc_ToT"},
5, 48);
disp->Print();

start = std::chrono::high_resolution_clock::now();
auto p1 = c->cd(1);
p1->SetLogy();
h_px->DrawClone();

auto p2 = c->cd(2);
p2->SetLogy();
h_py->DrawClone();

auto p3 = c->cd(3);
p3->SetLogy();
h_pz->DrawClone();

auto p4 = c->cd(4);
p4->SetLogy();
h_p->DrawClone();

stop = std::chrono::high_resolution_clock::now();
delta_t = std::chrono::duration_cast<std::chrono::nanoseconds>(stop-start);
time_ns = double(delta_t.count());
// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events);
c->Print("demo2.pdf");
}

0 comments on commit 61ecdd8

Please sign in to comment.