From 9629defa5e4e0ab552363425f3d26c6869bdfffd Mon Sep 17 00:00:00 2001 From: Maurik Holtrop Date: Tue, 29 Oct 2024 18:22:07 +0000 Subject: [PATCH 1/2] Code was incorrectly up-casting vector to vector but interpreting it as vector causing doubling issue. We no longer up-cast short to int, but still upcast char to short. --- extensions/dataframes/RHipoDS.cxx | 40 ++++++++++- extensions/dataframes/RHipoDS.hxx | 11 ++- extensions/dataframes/test_hipo_ds.cxx | 95 ++++---------------------- 3 files changed, 61 insertions(+), 85 deletions(-) diff --git a/extensions/dataframes/RHipoDS.cxx b/extensions/dataframes/RHipoDS.cxx index 595b524..4bbfa11 100644 --- a/extensions/dataframes/RHipoDS.cxx +++ b/extensions/dataframes/RHipoDS.cxx @@ -413,6 +413,12 @@ void RHipoDS::ClearData(int slot){ return; } +// for(auto &vecvec: fVecCharData){ +// vecvec[slot].clear(); +// } + for(auto &vecvec: fVecShortData){ + vecvec[slot].clear(); + } for(auto &vecvec: fVecIntData){ vecvec[slot].clear(); } @@ -476,11 +482,20 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){ // So here, we can assume that the bank has the data we are interested in. int nrows = fBanks[bank_index].getRows(); int data_index = fIndexToData[active_index]; + char cnum = 0; + short snum = 0; if( fColumnTypeIsVector[col_index] ){ for(int irow=0; irow < nrows; ++irow) { switch (fColumnType.at(col_index)) { case 1: // vector - case 2: // vector + cnum = fBanks[bank_index].getByte(fColumnItem[col_index], irow); + fVecShortData.at(data_index).at(slot).push_back( (short) cnum ); +// short snum = fBanks[bank_index].getByte(fColumnItem[col_index], irow); +// fVecCharData.at(data_index).at(slot).push_back(cnum); + break; + case 2: // + fVecShortData.at(data_index).at(slot).push_back( (short) fBanks[bank_index].getShort(fColumnItem[col_index], irow)); + break; case 3: // vector fVecIntData.at(data_index).at(slot).push_back(fBanks[bank_index].getInt(fColumnItem[col_index], irow)); break; @@ -503,6 +518,11 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){ switch (fColumnType.at(col_index)) { case 1: // char case 2: // short + if(nrows>0){ + fShortData.at(data_index).at(slot) = fBanks[bank_index].getShort(fColumnItem[col_index], 0); + } else + fShortData.at(data_index).at(slot) = 0; + break; case 3: // int if(nrows>0){ fIntData.at(data_index).at(slot) = fBanks[bank_index].getInt(fColumnItem[col_index], 0); @@ -569,8 +589,16 @@ std::vector RHipoDS::GetColumnReadersImpl(std::string_view col_name, con fActiveColumns.push_back(col_index); if( fColumnTypeIsVector.at(col_index)) { // VECTORS switch (fColumnType.at(col_index)) { - case 1: // vector + case 1: // vector is up-cast to vector case 2: // vector + fVecShortData.emplace_back( fNSlots, std::vector(fNColumnDepth)); + fIndexToData.push_back((int)fVecShortData.size()-1); + fColumnPointers.emplace_back(fNSlots); + for(int i=0; i fVecIntData.emplace_back( fNSlots, std::vector(fNColumnDepth)); fIndexToData.push_back((int)fVecIntData.size()-1); @@ -616,6 +644,14 @@ std::vector RHipoDS::GetColumnReadersImpl(std::string_view col_name, con switch (fColumnType.at(col_index)) { case 1: // char case 2: // short + fShortData.emplace_back(fNSlots); + fIndexToData.push_back((int)fShortData.size()-1); + fColumnPointers.emplace_back(fNSlots); + for(int i=0; i fgCollTypeNumToString{ // ORDER is important here. C++ so go +1 "zero", "short", "short", "int", "float", "double", "long", "None1", "long"}; std::vector fHeaders; @@ -79,11 +84,15 @@ public: std::vector< std::vector > fColumnPointers; // [active_index][slot] - The anonymous store of the pointers to the data. // The data, one of each type, per slot. data_index = fIndexToData[active_index] + //std::vector< std::vector > fCharData; + std::vector< std::vector > fShortData; std::vector< std::vector > fIntData; // [data_index][slot] - to integer. std::vector< std::vector > fLongData; std::vector< std::vector > fFloatData; std::vector< std::vector > fDoubleData; + //std::vector< std::vector< std::vector > > fVecCharData; + std::vector< std::vector< std::vector > > fVecShortData; std::vector< std::vector< std::vector > > fVecIntData; // [data_index][slot] to vector. std::vector< std::vector< std::vector > > fVecLongData; std::vector< std::vector< std::vector > > fVecFloatData; diff --git a/extensions/dataframes/test_hipo_ds.cxx b/extensions/dataframes/test_hipo_ds.cxx index 712e93b..df1e2d5 100644 --- a/extensions/dataframes/test_hipo_ds.cxx +++ b/extensions/dataframes/test_hipo_ds.cxx @@ -22,7 +22,7 @@ std::vector v_abs(std::vector &x, std::vector &y, std::vec int main(int argc, char **argv) { // Very simple test of the Hipo DataFrame. // ROOT::EnableImplicitMT(); - int N_open = 100000; + int N_open = 100; std::chrono::nanoseconds delta_t; if(argc < 2){ @@ -37,95 +37,26 @@ int main(int argc, char **argv) { auto cols_ds = ds->GetColumnNames(); bool translated = ds->fColumnNameTranslation; auto stop = std::chrono::high_resolution_clock::now(); + auto total_events = ds->GetEntries(); delta_t = std::chrono::duration_cast(stop-start); - printf("Open file in %6.5f ms for %6d events = %6.5f ns/event\n", - delta_t.count()*1.e-6, N_open, double(delta_t.count())/N_open ); + printf("Open file in %6.5f ms for %6lu events = %6.5f ns/event\n", + delta_t.count()*1.e-6, total_events, double(delta_t.count())/total_events ); //("/data/CLAS12/data/hipo/rec_clas_016321.evio.00001.hipo"); // auto all_columns = ds->GetColumnNames(); // for(int i=0; i< all_columns.size(); ++i){ // printf("%40s bank id: %4d %s \n", all_columns[i].c_str(), i, ds->fColumnTypeIsVector[i] ? "vector":"scaler" ); // } - auto total_events = ds->GetEntries(); + + ds->fDebug = 5; auto df = RDataFrame(std::move(ds)); auto cols_df = df.GetColumnNames(); - RInterface df2 = df; - std::string run_config_event = "RUN::config.event"; - if(translated){ - df2 = df.Alias("px", "REC_Particle_px").Alias("py", "REC_Particle_py").Alias("pz", "REC_Particle_pz").Alias("pid", "REC_Particle_pid").Alias("status","REC_Particle_status"); - run_config_event = "RUN_config_event"; - }else{ - df2 = df.Alias("px", "REC::Particle.px").Alias("py", "REC::Particle.py").Alias("pz", "REC::Particle.pz").Alias("pid", "REC::Particle.pid").Alias("status","REC::Particle.status"); - } - - auto h_pid=df2.Histo1D({"h_pid","Particle ID",4601,-2300,2300},"pid"); - auto h_evt = df2.Histo1D({"h_evt", "Event number", 1000001, 0, 1000000}, run_config_event); - auto h_px = df2.Histo1D({"h_px", "P_x", 1000, 0., 12.},"px"); - auto h_py = df2.Histo1D({"h_py", "P_y", 1000, 0., 12.},"py"); - auto h_pz = df2.Histo1D({"h_pz", "P_z", 1000, 0., 12.},"pz"); - - // Lambda function for the absolute of a vector component set. - auto v_abs_l = []( - std::vector &x, std::vector &y, std::vector &z) - { RVec out; - for(int i=0; i< x.size(); ++i){ - out.push_back(sqrt(x[i]*x[i]+y[i]*y[i]+z[i]*z[i])); - }; - return out; - }; - - auto h_p = df2.Define("p",v_abs,{"px","py","pz"}).Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p"); - // - // Note that for the following style of DataFrame definitions, you *must* use aliasses. The original names - // of columns in HIPO are incompatible with C++ (or Python or anything really) code direct access to these variables. - // - // auto h_p = df2.Define("p","vector out;for(int i=0; i< px.size(); ++i){out.push_back(sqrt(px[i]*px[i]+py[i]*py[i]+pz[i]*pz[i]));}; return out;").Histo1D({"h_p","P (Momentum)", 1000, 0., 12.}, "p"); - - TCanvas* c = new TCanvas("c", "Test RHipoDS", 0, 0, 2000, 1000); - c->Divide(2, 1); - c->cd(1); - // First pass through the data - start = std::chrono::high_resolution_clock::now(); - h_pid->DrawClone(); - stop = std::chrono::high_resolution_clock::now(); - delta_t = std::chrono::duration_cast(stop-start); - double time_ns = double(delta_t.count()); - printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, - time_ns/total_events); - - c->cd(2); - start = std::chrono::high_resolution_clock::now(); - h_evt->DrawClone(); - stop = std::chrono::high_resolution_clock::now(); - delta_t = std::chrono::duration_cast(stop-start); - time_ns = double(delta_t.count()); -// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events); - - c->Print("demo1.pdf"); - - c->Clear(); - c->Divide(2, 2); + auto disp = + df.Filter("ATOF_tdc_layer.size() > 0") + //.Display({"s","l","c","o","tdc","tot"},50,48); + .Display({"ATOF_tdc_sector", "ATOF_tdc_layer", "ATOF_tdc_component", + "ATOF_tdc_order", "ATOF_tdc_TDC", "ATOF_tdc_ToT"}, + 5, 48); + disp->Print(); - start = std::chrono::high_resolution_clock::now(); - auto p1 = c->cd(1); - p1->SetLogy(); - h_px->DrawClone(); - - auto p2 = c->cd(2); - p2->SetLogy(); - h_py->DrawClone(); - - auto p3 = c->cd(3); - p3->SetLogy(); - h_pz->DrawClone(); - - auto p4 = c->cd(4); - p4->SetLogy(); - h_p->DrawClone(); - - stop = std::chrono::high_resolution_clock::now(); - delta_t = std::chrono::duration_cast(stop-start); - time_ns = double(delta_t.count()); -// printf("processed events = %7lu in %6.5f s, or %10.3f ns/event. \n", total_events, time_ns*1.e-9, time_ns/total_events); - c->Print("demo2.pdf"); } \ No newline at end of file From cff2ca226d8c799ba1d5becc2154f6096b8112dd Mon Sep 17 00:00:00 2001 From: Maurik Holtrop Date: Tue, 29 Oct 2024 18:56:41 +0000 Subject: [PATCH 2/2] Some cleanup of comments and code simplification. --- extensions/dataframes/RHipoDS.cxx | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/extensions/dataframes/RHipoDS.cxx b/extensions/dataframes/RHipoDS.cxx index 4bbfa11..8e8fa35 100644 --- a/extensions/dataframes/RHipoDS.cxx +++ b/extensions/dataframes/RHipoDS.cxx @@ -413,9 +413,6 @@ void RHipoDS::ClearData(int slot){ return; } -// for(auto &vecvec: fVecCharData){ -// vecvec[slot].clear(); -// } for(auto &vecvec: fVecShortData){ vecvec[slot].clear(); } @@ -482,18 +479,11 @@ bool RHipoDS::SetEntry(unsigned int slot, ULong64_t entry){ // So here, we can assume that the bank has the data we are interested in. int nrows = fBanks[bank_index].getRows(); int data_index = fIndexToData[active_index]; - char cnum = 0; - short snum = 0; if( fColumnTypeIsVector[col_index] ){ for(int irow=0; irow < nrows; ++irow) { switch (fColumnType.at(col_index)) { - case 1: // vector - cnum = fBanks[bank_index].getByte(fColumnItem[col_index], irow); - fVecShortData.at(data_index).at(slot).push_back( (short) cnum ); -// short snum = fBanks[bank_index].getByte(fColumnItem[col_index], irow); -// fVecCharData.at(data_index).at(slot).push_back(cnum); - break; - case 2: // + case 1: // vector -- is upcast to short. + case 2: // vector fVecShortData.at(data_index).at(slot).push_back( (short) fBanks[bank_index].getShort(fColumnItem[col_index], irow)); break; case 3: // vector