KaliVeda  1.12/06
Heavy-Ion Analysis Toolkit
KV_CCIN2P3_Slurm.cpp
Go to the documentation of this file.
1 //Created by KVClassFactory on Mon Jan 24 16:54:04 2022
2 //Author: John Frankland,,,
3 
4 #include "KV_CCIN2P3_Slurm.h"
5 #include "TSystem.h"
6 #include "TEnv.h"
7 #include "KVDataAnalyser.h"
8 #include "KVDataAnalysisTask.h"
9 #include "KVGEBatchJob.h"
10 #include "KVDataRepository.h"
11 #include "KVDataSetAnalyser.h"
12 #include "KVSimDirAnalyser.h"
13 
14 using namespace std;
15 
17 
18 
19 
24  : KVBatchSystem(name), fMultiJobs(kTRUE)
25 {
26  //Default constructor
27  //Sets default job time, memory and disk space as defined in $KVROOT/KVFiles/.kvrootrc
28 
29  fDefJobTime = gEnv->GetValue("GE.BatchSystem.DefaultJobTime", "00:05:00");
30  fDefJobMem = gEnv->GetValue("GE.BatchSystem.DefaultJobMemory", "3G");
31  fTimeSet = fMemSet = kFALSE;
32  //default number of runs per job in multi jobs mode (default=1)
33  SetRunsPerJob(gEnv->GetValue("GE.BatchSystem.RunsPerJob", 1));
34 }
35 
36 
37 
38 
41 
43 {
44  //Clear previously set parameters in order to create a new job submission command
46  fTimeSet = fMemSet = kFALSE;
47  fMultiJobs = kTRUE;
48 }
49 
50 
51 
52 
55 
56 KV_CCIN2P3_Slurm::~KV_CCIN2P3_Slurm()
57 {
58  //Destructor
59 }
60 
61 
62 
66 
68 {
69  //Set CPU time for batch job.
70  // SetJobTime() => use default time
71  KVString tmp(time);
72  if (tmp == "") tmp = fDefJobTime;
73  //time given as "hh:mm:ss"
74  if (tmp.GetNValues(":") == 2) tmp.Prepend("00:");
75  else if (tmp.GetNValues(":") == 1) tmp.Prepend("00:00:");
76  fParList.SetValue("--time ", tmp);
77  fTimeSet = kTRUE;
78 }
79 
80 
81 
86 
88 {
89  //Set maximum memory used by job.
90  //Include units in string, i.e. "100M", "1G" etc.
91  //If mem="", use default value
92  KVString tmp(mem);
93  if (tmp == "") tmp = fDefJobMem;
94  fParList.SetValue("--mem ", tmp);
95  fMemSet = kTRUE;
96 }
97 
98 
99 
102 
104 {
105  //Print list of owner's jobs.
106  KVList* j = GetListOfJobs();
107  j->ls();
108  delete j;
109 }
110 
111 
112 
115 
117 {
118  // Checks the job and asks for any missing parameters
119 
121 
122  if (!fTimeSet) ChooseJobTime();
123 
124  if (!fMemSet) ChooseJobMemory();
125 
126  return kTRUE;
127 }
128 
129 
130 
132 
134 {
135  KVString tmp = "";
136  cout << "Enter max CPU time per job (ss/mn:ss/hh:mn:ss) ["
137  << fDefJobTime << "] : ";
138  cout.flush();
139  tmp.ReadToDelim(cin);
140  if (!tmp.Length()) {
141  SetJobTime();
142  return;
143  } else
144  SetJobTime(tmp);
145 }
146 
147 
148 
150 
152 {
153  KVString tmp = "";
154  cout << "Enter max memory per job (xKB/xMB/xGB) ["
155  << fDefJobMem.Data() << "] : ";
156  cout.flush();
157  tmp.ReadToDelim(cin);
158  SetJobMemory(tmp.Data());
159 }
160 
161 
162 
165 
167 {
168 // returns the parameter string corresponding to the job CPU time
169  return fParList.GetStringValue("--time ");
170 }
171 
172 
173 
176 
178 {
179 // returns the parameter string corresponding to the job Memory
180  return fParList.GetStringValue("--mem ");
181 }
182 
183 
184 
185 
189 
191 {
192  //Store any useful information on batch system in the TEnv
193  //(this method is used by KVDataAnalyser::WriteBatchEnvFile)
195  env->SetValue("BatchSystem.MultiJobs", MultiJobsMode());
196  if (MultiJobsMode()) env->SetValue("BatchSystem.CurrentRunList", fCurrJobRunList.AsString());
197  env->SetValue("BatchSystem.Time", GetJobTime());
198  env->SetValue("BatchSystem.Memory", GetJobMemory());
199  // if analysis of simulated data is being used, we copy the files to analyse to the
200  // scratch disk of the batch job (make sure enough disk space is requested)
201  env->SetValue("SimDirAnalyser.CopyFilesToWorkingDirectory", true);
202 }
203 
204 
205 
206 
210 
212 {
213  //Read any useful information on batch system from the TEnv
214  //(this method is used by KVDataAnalyser::ReadBatchEnvFile)
216  SetMultiJobsMode(env->GetValue("BatchSystem.MultiJobs", kFALSE));
217  if (MultiJobsMode()) fCurrJobRunList.SetList(env->GetValue("BatchSystem.CurrentRunList", ""));
218  SetJobTime(env->GetValue("BatchSystem.Time", ""));
219  SetJobMemory(env->GetValue("BatchSystem.Memory", ""));
220 }
221 
222 
223 
224 
228 
230 {
231  //if option="log", print infos for batch log file
232  //if option="all", print detailed info on batch system
233  if (!strcmp(option, "log")) {
234  KVBatchSystem::Print(option);
235  cout << "* MEM_REQ: " << GetJobMemory() << " *" << endl;
236  } else
237  KVBatchSystem::Print(option);
238 }
239 
240 
241 
242 
256 
258 {
259  // PRIVATE method called by SubmitTask() at moment of job submission.
260  // Depending on the current environment, the default job submission options
261  // may be changed by this method.
262  //
263  // This method overrides and augments KVBatchSystem::ChangeDefJobOpt (which
264  // changes the options as a function of the type of analysis task).
265  // Here we add the CCIN2P3-specific case where the job is launched from a directory
266  // on the /sps/ semi-permanent storage facility, or if the data being analysed is
267  // stored in a repository on /sps/. In this case we need to add
268  // the option '-l u_sps_indra' to the 'qsub' command (if not already in the
269  // default job options)
270  //
272  KVString taskname = da->GetAnalysisTask()->GetName();
274  Bool_t repIsSPS = rootdir.BeginsWith("/sps/");
275 
276  KVString wrkdir(gSystem->WorkingDirectory());
277  KVString oldoptions(GetDefaultJobOptions());
278 
279  if (!oldoptions.Contains("sps")) {
280  Bool_t NeedToAddSPS = wrkdir.Contains("/sps/");
281  if ((NeedToAddSPS || repIsSPS)) {
282  oldoptions += " -L sps";
283  SetDefaultJobOptions(oldoptions.Data());
284  }
285  }
286 }
287 
288 
289 
290 
297 
299 {
300  // Batch-system dependent sanitization of jobnames
301  // Grid Engine does not allow:
302  // :
303  // Any such character appearing in the current jobname will be replaced
304  // with '_'
305 
306  fCurrJobName.ReplaceAll(":", "_");
307 }
308 
309 
310 
315 
317 {
318  //Processes the job requests for the batch system.
319  //In normal mode, this submits one job for the data analyser fAnalyser
320  //In multijobs mode, this submits one job for each run in the runlist associated to fAnalyser
321 
322  if (!CheckJobParameters()) return;
323 
324  if (MultiJobsMode()) {
325  if (fAnalyser->InheritsFrom("KVDataSetAnalyser")) {
326  //submit jobs for every GetRunsPerJob() runs in runlist
327  KVDataSetAnalyser* ana = dynamic_cast<KVDataSetAnalyser*>(fAnalyser);
328  KVNumberList runs = ana->GetRunList();
329  runs.Begin();
330  Int_t remaining_runs = runs.GetNValues();
331  fCurrJobRunList.Clear();
332  while (remaining_runs && !runs.End()) {
333  Int_t run = runs.Next();
334  remaining_runs--;
335  fCurrJobRunList.Add(run);
336  if ((fCurrJobRunList.GetNValues() == GetRunsPerJob()) || runs.End()) {
337  // submit job for GetRunsPerJob() runs (or less if we have reached end of runlist 'runs')
338  ana->SetRuns(fCurrJobRunList, kFALSE);
339  ana->SetFullRunList(runs);
340  SubmitJob();
341  fCurrJobRunList.Clear();
342  }
343  }
344  ana->SetRuns(runs, kFALSE);
345  } else if (fAnalyser->InheritsFrom("KVSimDirAnalyser")) {
346  // here we understand "run" to mean "file"
347  KVSimDirAnalyser* ana = dynamic_cast<KVSimDirAnalyser*>(fAnalyser);
348  TList* file_list = ana->GetFileList();
349  Int_t remaining_runs = ana->GetNumberOfFilesToAnalyse();
350  fCurrJobRunList.Clear();
351  TList cur_file_list;
352  TObject* of;
353  TIter it(file_list);
354  Int_t file_no = 1;
355  while ((of = it())) {
356  cur_file_list.Add(of);
357  fCurrJobRunList.Add(file_no);
358  remaining_runs--;
359  file_no++;
360  if ((fCurrJobRunList.GetNValues() == GetRunsPerJob()) || (remaining_runs == 0)) {
361  // submit job for GetRunsPerJob() files (or less if we have reached end of list)
362  ana->SetFileList(&cur_file_list);
363  SubmitJob();
364  fCurrJobRunList.Clear();
365  cur_file_list.Clear();
366  }
367  }
368  ana->SetFileList(file_list);
369  }
370  } else {
371  SubmitJob();
372  }
373 
374 }
375 
376 
377 
389 
391 {
392  //Returns name of batch job, either during submission of batch jobs or when an analysis
393  //task is running in batch mode (access through gBatchSystem global pointer).
394  //
395  //In multi-job mode, the job name is generated from the base name set by SetJobName()
396  //plus the extension "_Rxxxx-yyyy" with "xxxx" and "yyyy" the number of the first and last run
397  //which will be analysed by the current job.
398  //
399  // Depending on the batch system, some sanitization of the jobname may be required
400  // e.g. to remove "illegal" characters from the jobname. This is done by SanitizeJobName()
401  // before the jobname is returned.
402 
403  if (!fAnalyser) {
404  //stand-alone batch submission ?
405  fCurrJobName = fJobName;
406  } else {
407  //replace any special symbols with their current values
408  fCurrJobName = fAnalyser->ExpandAutoBatchName(fJobName.Data());
409  if (MultiJobsMode() && !fAnalyser->BatchMode()) {
410  KVString tmp;
411  if (fCurrJobRunList.GetNValues() > 1)
412  tmp.Form("_R%d-%d", fCurrJobRunList.First(), fCurrJobRunList.Last());
413  else
414  tmp.Form("_R%d", fCurrJobRunList.First());
415  fCurrJobName += tmp;
416  }
417  }
418  SanitizeJobName();
419  return fCurrJobName.Data();
420 }
421 
422 
423 
436 
438 {
439  // Fill the list with all relevant parameters for batch system,
440  // set to their default values.
441  //
442  // Parameters defined here are:
443  // JobTime [string]
444  // JobMemory [string]
445  // MultiJobsMode [bool]
446  // RunsPerJob [int]
447  // EMailOnStart [bool]
448  // EMailOnEnd [bool]
449  // EMailAddress [string]
450 
452  nl.SetValue("JobTime", fDefJobTime);
453  nl.SetValue("JobMemory", fDefJobMem);
454  nl.SetValue("MultiJobsMode", MultiJobsMode());
455  nl.SetValue("RunsPerJob", fRunsPerJob);
456 }
457 
458 
459 
462 
464 {
465  // Use the parameters in the list to set all relevant parameters for batch system.
466 
468  SetJobTime(nl.GetStringValue("JobTime"));
469  SetJobMemory(nl.GetStringValue("JobMemory"));
470  SetMultiJobsMode(nl.GetBoolValue("MultiJobsMode"));
471  SetRunsPerJob(nl.GetIntValue("RunsPerJob"));
472 }
473 
474 
int Int_t
ClassImp(KVPartitionList) void KVPartitionList
Initialisation.
char Char_t
const Bool_t kFALSE
bool Bool_t
const Bool_t kTRUE
const char Option_t
R__EXTERN TEnv * gEnv
const char rootdir[]
R__EXTERN TSystem * gSystem
Base class for interface to a batch job management system.
Definition: KVBatchSystem.h:77
virtual void WriteBatchEnvFile(TEnv *)
virtual void Print(Option_t *="") const
virtual void ChangeDefJobOpt(KVDataAnalyser *da)
virtual void ReadBatchEnvFile(TEnv *)
virtual void SetBatchSystemParameters(const KVNameValueList &)
Use the parameters in the list to set all relevant parameters for batch system.
virtual void GetBatchSystemParameterList(KVNameValueList &)
virtual void Clear(Option_t *opt="")
virtual Bool_t CheckJobParameters()
Checks the job and ask for the job name if needed.
Manager class which sets up and runs data analysis tasks.
virtual KVString GetRootDirectoryOfDataToAnalyse() const
KVDataAnalysisTask * GetAnalysisTask() const
Pilots user analysis of experimental data.
void SetFullRunList(const KVNumberList &nl)
void SetRuns(const KVNumberList &nl, Bool_t check=kTRUE)
const KVNumberList & GetRunList() const
Extended TList class which owns its objects by default.
Definition: KVList.h:27
Handles lists of named parameters with different types, a list of KVNamedParameter objects.
Int_t GetIntValue(const Char_t *name) const
void SetValue(const Char_t *name, value_type value)
Bool_t GetBoolValue(const Char_t *name) const
const Char_t * GetStringValue(const Char_t *name) const
Strings used to represent a set of ranges of values.
Definition: KVNumberList.h:83
Bool_t End(void) const
Definition: KVNumberList.h:196
Int_t GetNValues() const
void Begin(void) const
Int_t Next(void) const
Class piloting analyses of simulated data.
Int_t GetNumberOfFilesToAnalyse() const
TList * GetFileList() const
void SetFileList(TList *l)
Extension of ROOT TString class which allows backwards compatibility with ROOT v3....
Definition: KVString.h:72
Int_t GetNValues(TString delim) const
Definition: KVString.cpp:859
Interface to CCIN2P3 Grid Engine batch job management system.
virtual void Print(Option_t *="") const
virtual void ChangeDefJobOpt(KVDataAnalyser *)
const Char_t * GetJobName() const
const Char_t * GetJobTime(void) const
returns the parameter string corresponding to the job CPU time
virtual void Clear(Option_t *opt="")
Clear previously set parameters in order to create a new job submission command.
virtual Bool_t CheckJobParameters()
Checks the job and asks for any missing parameters.
void SetJobMemory(const Char_t *h="")
virtual void GetBatchSystemParameterList(KVNameValueList &)
const Char_t * GetJobMemory(void) const
returns the parameter string corresponding to the job Memory
void ChooseJobMemory(void)
void SetJobTime(const Char_t *h="")
virtual void SetBatchSystemParameters(const KVNameValueList &)
Use the parameters in the list to set all relevant parameters for batch system.
void PrintJobs(Option_t *opt="")
Print list of owner's jobs.
virtual void ReadBatchEnvFile(TEnv *)
virtual void SanitizeJobName() const
virtual void WriteBatchEnvFile(TEnv *)
virtual void ls(Option_t *option="") const
virtual const char * GetValue(const char *name, const char *dflt) const
virtual void SetValue(const char *name, const char *value, EEnvLevel level=kEnvChange, const char *type=nullptr)
virtual void Add(TObject *obj)
virtual void Clear(Option_t *option="")
virtual const char * GetName() const
Ssiz_t Length() const
std::istream & ReadToDelim(std::istream &str, char delim='\n')
const char * Data() const
TString & Prepend(char c, Ssiz_t rep=1)
void Form(const char *fmt,...)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
virtual const char * WorkingDirectory()
UInt_t GetListOfJobs(TFile *file, TList &jobdirs)