Skip to content

Commit

Permalink
Merge pull request #14 from brucala/CMGTools-from-CMSSW_7_2_3-crab
Browse files Browse the repository at this point in the history
tools and integration for CRAB production
  • Loading branch information
brucala committed Mar 14, 2015
2 parents c495ea4 + 90a4c2b commit 97d1856
Show file tree
Hide file tree
Showing 49 changed files with 473 additions and 649 deletions.
2 changes: 1 addition & 1 deletion CMGTools/TTHAnalysis/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
<use name="CommonTools/Utils"/>
<use name="TrackingTools/IPTools"/>
<use name="MagneticField/UniformEngine"/>
<use name="MagneticField/ParametrizedEngine"/>
<use name="EgammaAnalysis/ElectronTools"/>
<use name="SimDataFormats/GeneratorProducts"/>
<use name="RecoVertex/KalmanVertexFit"/>
<use name="rootrflx"/>
<use name="roottmva"/>
<use name="rootminuit2"/>
<use name="lhapdf"/>
<flags LDFLAGS="/afs/cern.ch/cms/slc6_amd64_gcc481/cms/cmssw/CMSSW_7_0_2/lib/slc6_amd64_gcc481/pluginMagneticFieldParametrizedEnginePlugins.so" />
<export>
<lib name="1"/>
</export>
17 changes: 17 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/FrameworkJobReport.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<FrameworkJobReport>
<ReadBranches>
</ReadBranches>
<PerformanceReport>
<PerformanceSummary Metric="StorageStatistics">
<Metric Name="Parameter-untracked-bool-enabled" Value="true"/>
<Metric Name="Parameter-untracked-bool-stats" Value="true"/>
<Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>
<Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>
<Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>
<Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>
</PerformanceSummary>
</PerformanceReport>

<GeneratorInfo>
</GeneratorInfo>
</FrameworkJobReport>
14 changes: 14 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
1) edit ../run_susyMT2.cfg to run over your favorite components

2) edit launchall.py to change the CMG-version/tag and the production name

3) run!!!
> voms-proxy-init -voms cms --valid=50:00
Enter GRID pass phrase for this identity: pandolfiniGay
> python launchall.py


Notes:
- debugging: choose a single component and set debug=True in heppy_crab_config_env.py

- modify heppy_crab_config.py to run only on your favorite sites
1 change: 1 addition & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_config.py
30 changes: 30 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_crab_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from WMCore.Configuration import Configuration
config = Configuration()

config.section_("General")
config.General.transferLogs = True

config.section_("JobType")
config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'heppy_crab_fake_pset.py'
config.JobType.scriptExe = 'heppy_crab_script.sh'
# config.JobType.sendPythonFolder = True #doesn't work, not supported yet? do it by hand
import os
os.system("tar czf python.tar.gz --dereference --directory $CMSSW_BASE python")
os.system("tar czf cmgdataset.tar.gz --directory $HOME .cmgdataset")
os.system("tar czf cafpython.tar.gz --directory /afs/cern.ch/cms/caf/ python")
config.JobType.inputFiles = ['FrameworkJobReport.xml','heppy_config.py','heppy_crab_script.py','cmgdataset.tar.gz', 'python.tar.gz', 'cafpython.tar.gz']
config.JobType.outputFiles = ['output.log.tgz'] # mt2.root is automatically send because of the pset file

config.section_("Data")
config.Data.inputDBS = 'global'
config.Data.splitting = 'EventBased'
config.Data.outLFN = '/store/user/' + os.environ["USER"]
config.Data.publication = False

config.section_("Site")
#config.Site.whitelist = ["T2_CH_CSCS"]
config.Site.whitelist = ["T2_CH_CSCS", "T2_IT_Legnaro", "T2_UK_London_IC", "T2_UK_SGrid_Bristol", "T2_DE_DESY", "T2_ES_CIEMAT", "T2_IT_Rome", "T2_UK_London_Brunel", "T2_AT_Vienna","T2_DE_RWTH","T2_FR_GRIF_IRFU", "T2_HU_Budapest"] #, "T2_FR_IPHC", "T2_BE_IIHE", "T2_IT_Pisa", "T2_ES_IFCA"] , "T2_PK_NCP", "T2_US_Purdue", "T2_UA_KIPT", "T2_KR_KNU", "T2_RU_IHEP", "T2_RU_INR", "T2_RU_JINR", "T2_US_MIT", "T2_US_Wisconsin", "T2_US_UCSD", "T2_US_Vanderbilt", "T2_US_Caltech"]
config.Site.storageSite = "T3_CH_PSI"
#config.Data.ignoreLocality = True

47 changes: 47 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_crab_config_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# here we set all crab options that are not fixed
# values we'll be taken from environment variables set in launchall.py
# fixed options will be taken from heppy_crab_config.py

debug = False

import imp
file = open( "heppy_crab_config.py", 'r' )
cfg = imp.load_source( 'cfg', "heppy_crab_config.py", file)
config = cfg.config
import os
import re
dataset=os.environ["DATASET"]
NJOBS=int(os.environ["NJOBS"])
production_label = os.environ["PROD_LABEL"]
cmg_version = os.environ["CMG_VERSION"]

print dataset, NJOBS

if debug:
NJOBS = 2
NEVENTS = 200

config.General.requestName = dataset + "_" + cmg_version # task name
config.General.workArea = 'crab_' + production_label # crab dir name

# this will divide task in *exactly* NJOBS jobs (for this we need JobType.pluginName = 'PrivateMC' and Data.splitting = 'EventBased')
config.Data.unitsPerJob = 10
config.Data.totalUnits = config.Data.unitsPerJob * NJOBS

# arguments to pass to scriptExe. They have to be like "arg=value".
config.JobType.scriptArgs = ["dataset="+dataset, "total="+str(NJOBS)]

# output will be .../$outLFN/$PRIMARY_DS/$PUBLISH_NAME/$TIMESTAMP/$COUNTER/$FILENAME
# https://twiki.cern.ch/twiki/bin/view/CMSPublic/Crab3DataHandling
config.Data.outLFN += '/babies/' + cmg_version
config.Data.primaryDataset = production_label
config.Data.publishDataName = dataset
#final output: /store/user/$USER/babies/cmg_version/production_label/dataset/150313_114158/0000/foo.bar

# if NEVENTS variable is set then only nevents will be run
try:
NEVENTS
except NameError:
pass
else:
config.JobType.scriptArgs += ["nevents="+str(NEVENTS)]
6 changes: 6 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_crab_fake_pset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('FAKE')
process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring())
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(10))
process.output = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string('mt2.root'))
process.out = cms.EndPath(process.output)
101 changes: 101 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_crab_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python
import os
import PhysicsTools.HeppyCore.framework.config as cfg
#cfg.Analyzer.nosubdir=True

import ROOT
from DataFormats.FWLite import *
import sys
import re
#import PSet

ROOT.gSystem.Load("libCMGToolsTTHAnalysis")
ROOT.gSystem.Load("libFWCoreFWLite.so")
ROOT.gSystem.Load("libDataFormatsFWLite.so")
ROOT.gSystem.Load("libPhysicsToolsHeppy")
ROOT.gSystem.Load("libCMGToolsRootTools")
ROOT.AutoLibraryLoader.enable()

dataset = ""
total = 0 # total number of jobs for given dataset, not used at the moment
nevents = None # this means run all events
nprint = 0 # quiet printout, change if you want to print the first nprint events

# arguments of scriptExe
print "ARGV:",sys.argv
JobNumber=sys.argv[1] # 1st crab argument is jobID
job = int(JobNumber)
# if one wants to include more options to be passed to the crab scriptExe add a corresponding argument below
# crab only allows arguments of the type 'arg=value'
for arg in sys.argv[2:]:
if arg.split("=")[0] == "dataset": # this argument is strictly necessary
dataset = arg.split("=")[1]
elif arg.split("=")[0] == "total":
total = int(arg.split("=")[1])
elif arg.split("=")[0] == "nevents":
nevents = int(arg.split("=")[1])
print "selected to run over", nevents, "events"

print "dataset:", dataset
print "job", job , " out of", total

# fetch config file
import imp
handle = open("heppy_config.py", 'r')
cfo = imp.load_source("heppy_config", "heppy_config.py", handle)
config = cfo.config
handle.close()

# pick right component from dataset and file from jobID
selectedComponents = []
for comp in config.components:
if comp.name == dataset:
comp.files = comp.files[job-1: job] # first job number is 1
comp.name = comp.name+"_Chunk"+str(job)
selectedComponents.append(comp)

# check selectedComponents
if len(selectedComponents) == 0:
print "No selected components found!!"
print " - dataset:", dataset
print " - components:", config.components
if len(selectedComponents)>1:
print "More than one selected component:"
cfg.printComps(selectedComponents)
else:
print "Selected component:"
print selectedComponents[0]
print "files: ", selectedComponents[0].files

# set component to run
config.components = selectedComponents

# run!!!
from PhysicsTools.HeppyCore.framework.looper import Looper
looper = Looper( 'Output', config, nPrint = nprint, nEvents = nevents)
looper.loop()
looper.write()

#os.system("ls -lR") # for debugging

# assign the right name
os.rename("Output/mt2.root", "mt2.root")

# print in crab log file the content of the job log files, so one can see it from 'crab getlog'
print "-"*25
print "printing output txt files"
os.system('for i in Output/*.txt; do echo $i; cat $i; echo "---------"; done')

# pack job log files to be sent to output site
os.system("tar czf output.log.tgz Output/")
#os.system("mkdir log")
#os.rename("output.log.tgz log/output.log.tgz")


import ROOT
f=ROOT.TFile.Open('mt2.root')
entries=f.Get('tree').GetEntries()
f.Close()

print entries, "events processed"
print "job succesful"
32 changes: 32 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/heppy_crab_script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# extract exported necessary stuff
tar xzf cmgdataset.tar.gz --directory $HOME
tar xzf python.tar.gz --directory $CMSSW_BASE
tar xzf cafpython.tar.gz --directory $CMSSW_BASE

# uncomment for debuging purposes

#ls -lR .
#echo "ARGS:"
#echo $@
#echo "ENV..................................."
#env
#echo "VOMS"
#voms-proxy-info -all
#echo "CMSSW BASE, python path, pwd, home"
#echo $CMSSW_BASE
#echo $PYTHONPATH
#echo $PWD
#echo $HOME

# copy auxiliarity data to the right place (json, pu, lep eff, jet corr, ...)
cp lib/slc*/* $CMSSW_BASE/lib/slc*
for i in `find src/ -name data -type d`
do
echo $i
mkdir -p $CMSSW_BASE/$i
cp -r $i/* $CMSSW_BASE/$i
done

#ls -lR

python heppy_crab_script.py $@
29 changes: 29 additions & 0 deletions CMGTools/TTHAnalysis/cfg/crab/launchall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import imp, os

# datasets to run as defined from run_susyMT2.cfg
# right now configured for maximal job splitting
# in principle one only needs to modify the following two lines:
production_label = "fullProd_test3"
cmg_version = 'MT2_CMGTools-from-CMSSW_7_2_3'

handle = open("heppy_config.py", 'r')
cfo = imp.load_source("heppy_config", "heppy_config.py", handle)
conf = cfo.config
handle.close()

os.system("scramv1 runtime -sh")
os.system("source /cvmfs/cms.cern.ch/crab3/crab.sh")

os.environ["PROD_LABEL"] = production_label
os.environ["CMG_VERSION"] = cmg_version

for comp in conf.components:
#set maximal splitting
NJOBS = len(comp.files)
os.environ["NJOBS"] = str(NJOBS)
os.environ["DATASET"] = str(comp.name)
os.system("crab submit -c heppy_crab_config_env.py")

os.system("rm -f python.tar.gz")
os.system("rm -f cmgdataset.tar.gz")
os.system("rm -f cafpython.tar.gz")
3 changes: 2 additions & 1 deletion CMGTools/TTHAnalysis/python/samples/samples_13TeV_PHYS14.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@

#-----------DATA---------------

dataDir = os.environ['CMSSW_BASE']+"/src/CMGTools/TTHAnalysis/data"
#dataDir = os.environ['CMSSW_BASE']+"/src/CMGTools/TTHAnalysis/data"
dataDir = "$CMSSW_BASE/src/CMGTools/TTHAnalysis/data" # use environmental variable, useful for instance to run on CRAB
#lumi: 12.21+7.27+0.134 = 19.62 /fb @ 8TeV

json=dataDir+'/json/Cert_Run2012ABCD_22Jan2013ReReco.json'
Expand Down
2 changes: 1 addition & 1 deletion CMGTools/TTHAnalysis/src/SignedImpactParameter.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "CMGTools/TTHAnalysis/interface/SignedImpactParameter.h"
#include "MagneticField/UniformEngine/src/UniformMagneticField.h"
#include "MagneticField/ParametrizedEngine/plugins/OAEParametrizedMagneticField.h"
#include "MagneticField/ParametrizedEngine/src/OAEParametrizedMagneticField.h"
#include "TrackingTools/PatternTools/interface/TwoTrackMinimumDistance.h"
#include "TrackingTools/TransientTrack/interface/TransientTrack.h"
#include "TrackingTools/IPTools/interface/IPTools.h"
Expand Down
9 changes: 9 additions & 0 deletions MagneticField/ParametrizedEngine/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<use name="DataFormats/GeometryVector"/>
<!--use name="FWCore/Framework"/-->
<use name="FWCore/ParameterSet"/>
<use name="MagneticField/Engine"/>
<use name="MagneticField/UniformEngine"/>
<use name="MagneticField/Records"/>
<export>
<lib name="1"/>
</export>
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef ParametrizedMagneticFieldFactory_h
#define ParametrizedMagneticFieldFactory_h

/** \class ParametrizedMagneticFieldFactory
*
* Create a parametrized field map with the specified configuration.
*
* \author N. Amapane - Torino
*/


#include <MagneticField/Engine/interface/MagneticField.h>
#include <vector>
#include <string>
#include <memory>

namespace edm{
class ParameterSet;
}

namespace magneticfield{
class ParametrizedMagneticFieldProducer;
class VolumeBasedMagneticFieldESProducerFromDB;
}

class ParametrizedMagneticFieldFactory {
public:
/// Constructor
ParametrizedMagneticFieldFactory();

private:
friend class magneticfield::ParametrizedMagneticFieldProducer;
friend class magneticfield::VolumeBasedMagneticFieldESProducerFromDB;

// Get map configured from pset (deprecated)
std::auto_ptr<MagneticField>
static get(std::string version, const edm::ParameterSet& parameters);

// Get map configured from type name and numerical parameters
std::auto_ptr<MagneticField>
static get(std::string version, std::vector<double> parameters);

};
#endif

1 change: 1 addition & 0 deletions MagneticField/ParametrizedEngine/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<use name="FWCore/ParameterSet"/>
<use name="MagneticField/Engine"/>
<use name="MagneticField/Records"/>
<use name="MagneticField/ParametrizedEngine"/>
<library file="*.cc" name="MagneticFieldParametrizedEnginePlugins">
<flags EDM_PLUGIN="1"/>
</library>
Loading

0 comments on commit 97d1856

Please sign in to comment.