diff --git a/_episodes/06-developing-parallel-workflows.md b/_episodes/06-developing-parallel-workflows.md index f6e3bb6..2e327be 100644 --- a/_episodes/06-developing-parallel-workflows.md +++ b/_episodes/06-developing-parallel-workflows.md @@ -64,7 +64,7 @@ stages: data: outfilename environment: environment_type: 'docker-encapsulated' - image: 'reanahub/reana-env-root6' + image: 'docker.io/reanahub/reana-env-root6' imagetag: '6.18.04' - name: fitdata dependencies: [gendata] @@ -84,10 +84,10 @@ stages: plot: outfile environment: environment_type: 'docker-encapsulated' - image: 'reanahub/reana-env-root6' + image: 'docker.io/reanahub/reana-env-root6' imagetag: '6.18.04' ``` - +{: .source} We can see that the workflow consists of two steps, ``gendata`` does not depending on anything (``[init]``) and ``fitdata`` depending on ``gendata``. This is how linear workflows are expressed @@ -101,8 +101,11 @@ How can we run the example on REANA platform? We have to instruct REANA that we Yadage as our workflow engine. We can do that by editing ``reana.yaml`` and specifying: ```yaml -version: 0.6.0 inputs: + files: + - code/gendata.C + - code/fitdata.C + - workflow.yaml parameters: events: 20000 gendata: code/gendata.C @@ -114,12 +117,14 @@ outputs: files: - fitdata/plot.png ``` +{: .source} We now can run this example on REANA in the usual way: ```bash -$ reana-client run -w roofityadage -f reana-yadage.yaml +reana-client run -w roofityadage ``` +{: .source} > ## Exercise > @@ -133,15 +138,15 @@ $ reana-client run -w roofityadage -f reana-yadage.yaml > Nothing changes in the usual user interaction with the REANA platform: > > ```bash -> $ reana-client create -w roofityadage -f ./reana-yadage.yaml -> $ reana-client upload ./code -w roofityadage -> $ reana-client start -w roofityadage -> $ reana-client status -w roofityadage -> $ reana-client logs -w roofityadage -> $ reana-client ls -w roofityadage -> $ reana-client download plot.png -w roofityadage +> reana-client create -w roofityadage -f ./reana-yadage.yaml +> reana-client upload ./code -w roofityadage +> reana-client start -w roofityadage +> reana-client status -w roofityadage +> reana-client logs -w roofityadage +> reana-client ls -w roofityadage +> reana-client download plot.png -w roofityadage > ``` -> +> {: .source} {: .solution} ## Physics code vs orchestration code @@ -203,6 +208,7 @@ stages: output_dir: '{workdir}/output' step: {$ref: 'steps.yaml#/plot'} ``` +{: .source} where steps are expressed as: @@ -267,6 +273,7 @@ fit: publish: fitting_plot: '{output_dir}/fit.png' ``` +{: .source} The workflow definition is similar to that of the Serial workflow, and, as we can see, it can already lead to certain parallelism, because the fitting step and the plotting step can run @@ -287,23 +294,27 @@ Let us try to run it on REANA cloud. > ## Solution > -> ```yaml -> $ vim workflow.yaml # take contents above and store it as workflow.yaml -> $ vim steps.yaml # take contents above and store it as steps.yaml -> $ vim reana.yaml # this was the task -> $ cat reana.yaml -> version: 0.6.0 +> ```bash +> mkdir awesome-analysis-yadage-simple +> cd awesome-analysis-yadage-simple +> vim workflow.yaml # take contents above and store it as workflow.yaml +> vim steps.yaml # take contents above and store it as steps.yaml +> vim reana.yaml # to create this file was the task +> cat reana.yaml +> ``` +> {: .source} +> ``` > inputs: +> files: +> - steps.yaml +> - workflow.yaml > parameters: > input_dir: root://eospublic.cern.ch//eos/root-eos/HiggsTauTauReduced > workflow: > type: yadage > file: workflow.yaml -> outputs: -> files: -> - fit/output/fit.png > ``` -> +> {: .output} {: .solution} ## Parallelism via scatter-gather paradigm @@ -348,6 +359,7 @@ stages: parameters: input: {stages: 'map2', output: outputA} ``` +{: .source} Note the "scatter" happening over "input" with a wanted batch size. diff --git a/fig/awesome-analysis-yadage-simple/reana.yaml b/fig/awesome-analysis-yadage-simple/reana.yaml index 62ab63e..38999a6 100644 --- a/fig/awesome-analysis-yadage-simple/reana.yaml +++ b/fig/awesome-analysis-yadage-simple/reana.yaml @@ -1,10 +1,9 @@ -version: 0.6.0 inputs: + files: + - steps.yaml + - workflow.yaml parameters: input_dir: root://eospublic.cern.ch//eos/root-eos/HiggsTauTauReduced workflow: type: yadage file: workflow.yaml -outputs: - files: - - outputs/statanalysis/fitresults/pre.png diff --git a/fig/roofit-analysis-yadage/code/fitdata.C b/fig/roofit-analysis-yadage/code/fitdata.C new file mode 100644 index 0000000..d22186e --- /dev/null +++ b/fig/roofit-analysis-yadage/code/fitdata.C @@ -0,0 +1,55 @@ +#ifndef __CINT__ +#include "RooGlobalFunc.h" +#endif +#include "RooRealVar.h" +#include "RooDataSet.h" +#include "RooGaussian.h" +#include "RooChebychev.h" +#include "RooAddPdf.h" +#include "RooExtendPdf.h" +#include "TCanvas.h" +#include "TAxis.h" +#include "RooPlot.h" +using namespace RooFit ; + +void fitdata(const char* input, const char* output) +{ + // Open input file with workspace (generated by rf14_wspacewrite) + TFile *f = new TFile(input) ; + + // Retrieve workspace from file + RooWorkspace* w = (RooWorkspace*) f->Get("w") ; + + // Retrieve x,model and data from workspace + RooRealVar* x = w->var("x") ; + RooAbsPdf* model = w->pdf("model") ; + RooAbsData* data = w->data("modelData") ; + + // Fit model to data, extended ML term automatically included + model->fitTo(*data) ; + + // Plot data and PDF overlaid + RooPlot* xframe = x->frame(Title("Fit example")) ; + data->plotOn(xframe) ; + model->plotOn(xframe,Normalization(1.0,RooAbsReal::RelativeExpected)) ; + + // Overlay the background component of model with a dashed line + model->plotOn(xframe,Components("bkg"),LineStyle(kDashed),Normalization(1.0,RooAbsReal::RelativeExpected)) ; + + // Overlay the background components of model with a dotted line + //model->plotOn(xframe,Components(RooArgSet("bkg")),LineStyle(kDotted),Normalization(1.0,RooAbsReal::RelativeExpected)) ; + + // Print structure of composite p.d.f. + //model.Print("t") ; + // Draw the frame on the canvas + TCanvas res("rf202_composite","rf202_composite",600,600) ; + gPad->SetLeftMargin(0.15) ; + xframe->GetYaxis()->SetTitleOffset(1.4) ; + xframe->Draw(); + + res.Update(); + res.SaveAs(output); + res.Close(); + + +} diff --git a/fig/roofit-analysis-yadage/code/gendata.C b/fig/roofit-analysis-yadage/code/gendata.C new file mode 100644 index 0000000..8397d4f --- /dev/null +++ b/fig/roofit-analysis-yadage/code/gendata.C @@ -0,0 +1,57 @@ +#ifndef __CINT__ +#include "RooGlobalFunc.h" +#endif +#include "RooRealVar.h" +#include "RooDataSet.h" +#include "RooGaussian.h" +#include "RooChebychev.h" +#include "RooAddPdf.h" +#include "RooExtendPdf.h" +#include "TCanvas.h" +#include "TAxis.h" +#include "RooPlot.h" +using namespace RooFit ; + +void gendata(int numevents, const char* outfilename) +{ + // Declare observable x + RooRealVar x("x","x",0,10) ; + + // Create two Gaussian PDFs g1(x,mean1,sigma) anf g2(x,mean2,sigma) and their parameters + RooRealVar mean("mean","mean of gaussians",5) ; + RooRealVar sigma1("sigma1","width of gaussians",0.5) ; + // RooRealVar sigma2("sigma2","width of gaussians",1) ; + + RooGaussian sig1("sig1","Signal component 1",x,mean,sigma1) ; + //RooGaussian sig2("sig2","Signal component 2",x,mean,sigma2) ; + + // Build Chebychev polynomial p.d.f. + RooRealVar a0("a0","a0",0.5,0.,1.) ; + RooRealVar a1("a1","a1",-0.2,0.,1.) ; + RooChebychev bkg("bkg","Background",x,RooArgSet(a0,a1)) ; + + // Sum the signal components into a composite signal p.d.f. + RooRealVar sig1frac("sig1frac","fraction of component 1 in signal",0.8,0.,1.) ; + //RooAddPdf sig("sig","Signal",RooArgList(sig1,sig2),sig1frac) ; + RooAddPdf sig("sig","Signal",RooArgList(sig1),sig1frac) ; + + // Sum the composite signal and background into an extended pdf nsig*sig+nbkg*bkg + RooRealVar nsig("nsig","number of signal events",500,0.,10000) ; + RooRealVar nbkg("nbkg","number of background events",500,0,10000) ; + RooAddPdf model("model","(g1+g2)+a",RooArgList(bkg,sig),RooArgList(nbkg,nsig)) ; + + RooDataSet *data = model.generate(x, numevents) ; + + // Create a new workspace + RooWorkspace *w = new RooWorkspace("w","workspace") ; + w->import(model) ; + w->import(*data) ; + + // Print workspace contents + w->Print() ; + // Save the workspace into a ROOT file + w->writeToFile(outfilename) ; + // Workspace will remain in memory after macro finishes + gDirectory->Add(w) ; + +} diff --git a/fig/roofit-analysis-yadage/reana.yaml b/fig/roofit-analysis-yadage/reana.yaml new file mode 100644 index 0000000..a6ec4a6 --- /dev/null +++ b/fig/roofit-analysis-yadage/reana.yaml @@ -0,0 +1,16 @@ +inputs: + files: + - code/gendata.C + - code/fitdata.C + - workflow.yaml + parameters: + events: 20000 + gendata: code/gendata.C + fitdata: code/fitdata.C +workflow: + type: yadage + file: workflow.yaml +outputs: + files: + - fitdata/plot.png + diff --git a/fig/roofit-analysis-yadage/workflow.yaml b/fig/roofit-analysis-yadage/workflow.yaml new file mode 100644 index 0000000..ac6b0d7 --- /dev/null +++ b/fig/roofit-analysis-yadage/workflow.yaml @@ -0,0 +1,42 @@ +stages: + - name: gendata + dependencies: [init] + scheduler: + scheduler_type: 'singlestep-stage' + parameters: + events: {step: init, output: events} + gendata: {step: init, output: gendata} + outfilename: '{workdir}/data.root' + step: + process: + process_type: 'interpolated-script-cmd' + script: root -b -q '{gendata}({events},"{outfilename}")' + publisher: + publisher_type: 'frompar-pub' + outputmap: + data: outfilename + environment: + environment_type: 'docker-encapsulated' + image: 'docker.io/reanahub/reana-env-root6' + imagetag: '6.18.04' + - name: fitdata + dependencies: [gendata] + scheduler: + scheduler_type: 'singlestep-stage' + parameters: + fitdata: {step: init, output: fitdata} + data: {step: gendata, output: data} + outfile: '{workdir}/plot.png' + step: + process: + process_type: 'interpolated-script-cmd' + script: root -b -q '{fitdata}("{data}","{outfile}")' + publisher: + publisher_type: 'frompar-pub' + outputmap: + plot: outfile + environment: + environment_type: 'docker-encapsulated' + image: 'docker.io/reanahub/reana-env-root6' + imagetag: '6.18.04' +