KaliVeda  1.13/01
Heavy-Ion Analysis Toolkit
KVDataSetManager.cpp
Go to the documentation of this file.
1 /*
2 $Id: KVDataSetManager.cpp,v 1.17 2007/10/01 15:03:38 franklan Exp $
3 $Revision: 1.17 $
4 $Date: 2007/10/01 15:03:38 $
5 $Author: franklan $
6 */
7 
8 #include "KVBase.h"
9 #include "KVDataSetManager.h"
11 #include "KVString.h"
12 #include "TObjString.h"
13 #include "TObjArray.h"
14 #include "Riostream.h"
15 #include "TEnv.h"
16 #include "KVBase.h"
17 #include "TPluginManager.h"
18 #include "TError.h"
19 
20 //macro converting octal filemode to decimal value
21 //to convert e.g. 664 (=u+rw, g+rw, o+r) use CHMODE(6,6,4)
22 #define CHMODE(u,g,o) ((u << 6) + (g << 3) + o)
23 
24 using namespace std;
25 
27 
29 
30 
32 
34 {
35  fNavailable = 0;
36  fRepository = 0;
37  fCacheAvailable = kFALSE;
38  fMaxCacheTime = 0;
39  fCacheFileName = "";
40  fDataSets.SetOwner();
41  fTasks.SetOwner();
42 }
43 
44 
45 
47 
48 KVDataSetManager::~KVDataSetManager()
49 {
50 }
51 
52 
53 
64 
66 {
67  //Initialisation of dataset manager for the repository 'dr'.
68  //If dr=0x0 (default) then all known datasets are 'available', otherwise
69  //we check availability of datasets based on data present in repository.
70  //Initialise all possible data analysis tasks,
71  //then set list of possible tasks for each available dataset.
72  //
73  //Returns kTRUE if all goes well.
74  //For remote data repositories we return kFALSE if no datasets are available
75  //(as the access to these is read-only)
76 
77  fRepository = dr;
78 
79  ReadUserGroups();
80 
81  if (!ReadDataSetList())
82  return kFALSE;
83 
84  //use caching for dataset availability ?
85  if (dr) {
86  fCacheAvailable = gEnv->GetValue(Form("%s.DataRepository.CacheAvailable", dr->GetName()),
87  kFALSE);
88  fMaxCacheTime = (UInt_t)gEnv->GetValue(Form("%s.DataRepository.MaxCacheTime", dr->GetName()),
89  0);
90  }
91  //name of cache file
92  if (dr) fCacheFileName.Form("%s.available.datasets", dr->GetName());
93 
94  //check which datasets are available
95  CheckAvailability();
96  if (!GetNavailable() && dr && dr->IsRemote()) return kFALSE;
97 
98  if (!ReadTaskList())
99  return kFALSE;
100 
101  //set available data analysis tasks for available datasets
102  if (GetNavailable()) {
103  for (Int_t i = 1; i <= GetNavailable(); i++)
104  GetAvailableDataSet(i)->SetAnalysisTasks(&fTasks);
105  }
106 
107  // stand-alone dataset manager: make it the default
108  if (!dr) gDataSetManager = this;
109 
110  return kTRUE;
111 }
112 
113 
114 
119 
121 {
122  //Sets up list of user groups defining restricted access to certain datasets.
123  //Definition of different user groups is given in $KVROOT/KVFiles/.kvrootrc
124 
125  //UserGroups env var contains whitespace-separated list of group names
126  TString groups = gEnv->GetValue("UserGroup", "");
127  if (groups == "") {
128  //cout << "No value for UserGroup" << endl;
129  return;
130  }
131  //split into array of group names
132  TObjArray* toks = groups.Tokenize(' ');
133  TObjString* group_name;
134  TIter next_name(toks);
135  fUserGroups.Clear();
136  while ((group_name = (TObjString*) next_name())) {
137  //for each group_name, the env var 'group_name.Users' contains a whitespace-separated list of user names
138  //we store this string in the fUserGroups parameter list with parameter name 'group_name'
139  TString users =
140  gEnv->
141  GetValue(Form("%s.Users", group_name->String().Data()), "");
142  fUserGroups.SetValue(group_name->String().Data(), users);
143  }
144  delete toks;
145 }
146 
147 
148 
152 
154 {
155  //Initialise list of all known datasets from informations in $KVROOT/KVFIles/.kvrootrc
156  //(and user's .kvrootrc)
157 
158  KVString manip_list = gEnv->GetValue("DataSet", "");
159 
160  fDataSets.Clear();
161 
162  TObjArray* manips = manip_list.Tokenize(" ");
163  TIter next(manips);
164  TObjString* manip;
165  while ((manip = (TObjString*)next())) {
166 
167  KVDataSet* ds = NewDataSet();
168  ds->SetName(manip->GetString().Data());
169  ds->SetTitle(gEnv->GetValue(Form("%s.DataSet.Title", manip->GetString().Data()), "Experimental dataset"));
170  ds->SetDataPathSubdir(gEnv->GetValue(Form("%s.DataSet.RepositoryDir", manip->GetString().Data()), manip->GetString().Data()));
171  ds->SetUserGroups(gEnv->GetValue(Form("%s.DataSet.UserGroup", manip->GetString().Data()), ""));
172  ds->SetRepository(fRepository);
173  fDataSets.Add(ds);
174 
175  }
176 
177  delete manips;
178 
179  return kTRUE;
180 }
181 
182 
183 
187 
189 {
190  //Initialise list of all known analysis tasks from informations in $KVROOT/KVFIles/.kvrootrc
191  //(and user's .kvrootrc)
192 
193  KVString task_list = gEnv->GetValue("DataAnalysisTask", "");
194 
195  fTasks.Clear();
196 
197  task_list.Begin(" ");
198  while (!task_list.End()) {
199 
201  TString name = task_list.Next();
202  dat->SetName(name.Data());
203  dat->SetTitle(gEnv->GetValue(Form("%s.DataAnalysisTask.Title", name.Data()), ""));
204  dat->SetPrereq(gEnv->GetValue(Form("%s.DataAnalysisTask.Prereq", name.Data()), ""));
205  dat->SetDataAnalyser(gEnv->GetValue(Form("%s.DataAnalysisTask.Analyser", name.Data()), "KVDataAnalyser"));
206  dat->SetWithUserClass(gEnv->GetValue(Form("%s.DataAnalysisTask.UserClass", name.Data()), kFALSE));
207  dat->SetUserBaseClass(gEnv->GetValue(Form("%s.DataAnalysisTask.UserClass.Base", name.Data()), ""));
208  dat->SetStatusUpdateInterval(gEnv->GetValue(Form("%s.DataAnalysisTask.StatusUpdateInterval", name.Data()), 1000));
209  fTasks.Add(dat);
210 
211  }
212 
213  return kTRUE;
214 }
215 
216 
217 
223 
225 {
226  //Print list of datasets
227  //If opt="" (default) all datasets are shown with full information
228  //if opt="available" only available datasets are shown, each with a number which can
229  //be used with GetAvailableDataSet(Int_t) in order to retrieve the corresponding dataset.
230 
231  TString Sopt(opt);
232  Sopt.ToUpper();
233  if (Sopt.BeginsWith("AVAIL")) {
234  if (!fNavailable) {
235  cout << " *** No available datasets ***" <<
236  endl;
237  return;
238  }
239  else {
240  for (int i = 1; i <= fNavailable; i++) {
241  KVDataSet* ds = GetAvailableDataSet(i);
242  cout << "\t" << i << ". " << ds->GetTitle() << endl;
243  }
244  }
245  return;
246  }
247  if (fDataSets.GetSize()) {
248  TIter next(&fDataSets);
249  KVDataSet* ds;
250  while ((ds = (KVDataSet*) next()))
251  ds->ls();
252  }
253 }
254 
255 
256 
278 
280 {
281  //Check availability of datasets in repository associated to this data set manager
282  //
283  //If caching is activated for the parent repository, i.e. if
284  //
285  // [repository name].DataRepository.CacheAvailable: yes
286  //
287  //then instead of directly checking the existence of the directories for each dataset,
288  //we use the cached information written in the file
289  //KVBase::WorkingRepository()/[repository name].available.datasets
290  //unless (1) it doesn't exist, or (2) the file is older than the maximum
291  //cache time (in seconds) defined by
292  //
293  // [repository name].DataRepository.MaxCacheSeconds:
294  //
295  //In either of these 2 cases, we check the existence of the directories and update/
296  //create the cache file.
297  //
298  //If the repository appears to be empty (perhaps because we are using a remote access
299  //protocol to check it, and the protocol has some problems...), then as a last resort we
300  //we will use the cache if it exists, whatever its age.
301 
302  if (fCacheAvailable) {
303  //caching of dataset availability is activated
304  if (CheckCacheStatus()) {
305  //cache file exists and is not out of date
306  if (ReadAvailableDatasetsFile()) return;
307  }
308  }
309 
310  // print (repository-dependent) warning/informational message
311  if (fRepository) fRepository->PrintAvailableDatasetsUpdateWarning();
312 
313  //open temporary file
314  ofstream tmp_file;
315  TString tmp_file_path = fCacheFileName;
316  KVBase::OpenTempFile(tmp_file_path, tmp_file);
317 
318  fNavailable = 0;
319  if (fDataSets.GetSize()) {
320  TIter next(&fDataSets);
321  KVDataSet* ds;
322  while ((ds = (KVDataSet*) next())) {
323  //The results of this check are written in $KVROOT/KVFiles/[repository name].available.datasets
324  //This file may be read by KVRemoteDataSetManager::CheckAvailability when this
325  //data repository is accessed as a remote data repository from a remote machine.
326  //In this case we do not want the identity of the user to influence the contents of the file.
327  //Therefore even for 'unavailable' datasets we write the available datatypes (if any)
328  //in the file.
329  tmp_file << ds->GetName() << " : ";
330  ds->CheckAvailable();
331  tmp_file << ds->GetAvailableDataTypes() << endl;
332  if (ds->IsAvailable()) {
333  fNavailable++;
334  }
335  }
336 
337  //close temp file
338  tmp_file.close();
339  //if datasets are found, then we copy the temporary file to KVFiles directory,
340  //overwriting any previous version. if no datasets were found, we try the cache
341  //file (if it exists)
342  if (fNavailable && fRepository) { //if no repository is associated, no need to keep file
343  TString runlist = KVBase::GetWORKDIRFilePath(fCacheFileName.Data());
344  gSystem->CopyFile(tmp_file_path, runlist, kTRUE);
345  //set access permissions to 664
346  gSystem->Chmod(runlist.Data(), CHMODE(6, 6, 4));
347  }
348 
349  //delete temp file
350  gSystem->Unlink(tmp_file_path);
351 
352  if (!fNavailable) {
353  //no datasets found when checking file system ?
354  //can we rely on the cache file ?
355  ReadAvailableDatasetsFile();
356  }
357  else {
358  //now set up array of available datasets' indices
359  fIndex.clear();
360  next.Reset();
361  Int_t j(0);
362  while ((ds = (KVDataSet*) next())) {
363  if (ds->IsAvailable()) {
364  fIndex.push_back(j);
365  }
366  j++;
367  }
368  }
369  }
370 }
371 
372 
373 
374 
377 
379 {
380  //Return pointer to DataSet using index in list of all datasets, index>=0
381  if (fDataSets.GetSize() && index < fDataSets.GetSize())
382  return (KVDataSet*) fDataSets.At(index);
383  return 0;
384 }
385 
386 
387 
390 
392 {
393  //Return pointer to DataSet using name
394  return (KVDataSet*) fDataSets.FindObjectByName(name);
395 }
396 
397 
398 
403 
405 {
406  //Return pointer to available DataSet using index of available datasets
407  //Note this index begins at 1, and corresponds to the number printed next to the dataset
408  //when Print("available") is called
409  if (fNavailable && index && index <= fNavailable)
410  return GetDataSet(fIndex[index - 1]);
411  return 0;
412 }
413 
414 
415 
418 
420 {
421  //Return pointer to named data analysis task
422  return (KVDataAnalysisTask*) fTasks.FindObjectByName(name);
423 }
424 
425 
426 
430 
432  const Char_t* username)
433 {
434  //Check in list of groups fUserGroups if the user name 'username' is part of the group 'groupname'.
435  //If 'username' is not given (default) we use current user info (gSystem->GetUserInof()->fUser).
436 
437  TString Username = strcmp(username,
438  "") ? username : gSystem->GetUserInfo()->
439  fUser.Data();
440 
441  if (fUserGroups.HasParameter(groupname)) {
442  if (fUserGroups.GetTStringValue(groupname).Contains(Username.Data()))
443  return kTRUE;
444  }
445  return kFALSE;
446 }
447 
448 
449 
452 
454 {
455  //Creates and returns pointer to new data set object
456  return (new KVDataSet);
457 }
458 
459 
460 
466 
468 {
469  //Called when the physical state of the repository has changed i.e. a subdirectory for
470  //a new dataset or datatype has been added or removed. We update the available datatsets,
471  //datatypes and analysis tasks.
472 
473  //check which datasets are available
474  CheckAvailability();
475 
476  //set available data analysis tasks for available datasets
477  if (GetNavailable()) {
478  for (Int_t i = 1; i <= GetNavailable(); i++)
479  GetAvailableDataSet(i)->SetAnalysisTasks(&fTasks);
480  }
481 }
482 
483 
484 
491 
493 {
494  //Opens file KVBase::WorkingDirectory()/[repository name].available.datasets
495  //containing cached info on available datasets and
496  //associated subdirectories in data repository.
497  //Opens file for reading, & if all goes well returns kTRUE.
498  //Returns kFALSE in case of problems.
499 
500  return KVBase::SearchAndOpenKVFile(KVBase::GetWORKDIRFilePath(fCacheFileName), fDatasets);
501 }
502 
503 
504 
510 
512 {
513  //Opens and reads file containing cached info on available datasets, and sets
514  //the availability of the concerned datasets.
515  //Returns kTRUE if all goes well.
516  //Returns kFALSE if no cache exists or if file cannot be opened.
517  if (OpenAvailableDatasetsFile()) {
518  Info("ReadAvailableDataSetsFile",
519  "Reading cached information in file %s", fCacheFileName.Data());
520  //read file
521  TString line;
522  line.ReadLine(fDatasets);
523  while (fDatasets.good()) {
524 
525  TObjArray* toks = line.Tokenize(": ,");
526 
527  //first entry is dataset name
528  TString datasetname = ((TObjString*) toks->At(0))->String();
529  KVDataSet* dataset = GetDataSet(datasetname.Data());
530 
531  if (dataset) { //check dataset is known to local version of KaliVeda
532  //in case of remote repository, there may be datasets in the remote repository which are not defined here
533  if (toks->GetEntries() > 1 && dataset->CheckUserCanAccess()) {
534  //AVAILABLE DATASET
535  dataset->SetAvailable();
536  fNavailable++;
537  for (int i = 1; i < toks->GetEntries(); i++) {
538  //each following entry is a subdirectory name
539  dataset->AddAvailableDataType(((TObjString*) toks->At(i))->String().
540  Data());
541  }
542  }
543  else {
544  //UNAVAILABLE DATASET (no subdirs)
545  dataset->SetAvailable(kFALSE);
546  }
547  }
548 
549  delete toks;
550  line.ReadLine(fDatasets);
551  }
552 
553  //close file
554  fDatasets.close();
555  fDatasets.clear();
556 
557  if (fNavailable) {
558  TIter next(&fDataSets);
559  //now set up array of available datasets' indices
560  fIndex.clear();
561  Int_t j(0);
562  KVDataSet* ds;
563  while ((ds = (KVDataSet*) next())) {
564  if (ds->IsAvailable()) {
565  fIndex.push_back(j);
566  }
567  j++;
568  }
569  }
570  //all is OK
571  return kTRUE;
572  }
573  //we could not find/open the cache file
574  return kFALSE;
575 }
576 
577 
578 
579 
584 
586 {
587  //We check the status of the available datasets cache file.
588  //We return kTRUE if the file exists & was last modified
589  //less than fMaxCacheTime seconds ago.
590 
591  TString fullpath;
592  Info("KVDataSetManager::CheckCacheStatus", "Checking for available datasets cache file...");
593  if (KVBase::SearchKVFile(KVBase::GetWORKDIRFilePath(fCacheFileName), fullpath)) {
594 
595  // file exists - how old is it ?
596  FileStat_t file_info;
597  gSystem->GetPathInfo(fullpath.Data(), file_info);
598  TDatime file_date(file_info.fMtime);
599  TDatime now;
600  UInt_t file_age = now.Convert() - file_date.Convert();
601  Info("KVDataSetManager::CheckCacheStatus", "...file found. It is %u seconds old", file_age);
602  if (file_age < fMaxCacheTime) {
603  Info("KVDataSetManager::CheckCacheStatus", "Using cached file");
604  return kTRUE;
605  }
606  else
607  Info("KVDataSetManager::CheckCacheStatus", "File is too old (max time=%u). Update will be performed.", fMaxCacheTime);
608  }
609  else
610  Info("KVDataSetManager::CheckCacheStatus", "...no file found");
611  return kFALSE;
612 }
613 
614 
615 
621 
623 {
624  // This method returns a pointer to the analysis task whose description (title) contains
625  // all of the whitespace-separated keywords (which may be regular expressions)
626  // given in the string "keywords". The comparison is case-insensitive.
627 
628  //case-insensitive search for matches in list of all analysis tasks, based on 'title' attribute
629  return (KVDataAnalysisTask*)GetAnalysisTaskList()->FindObjectAny("title", keywords, kTRUE, kFALSE);
630 }
631 
632 
int Int_t
unsigned int UInt_t
#define CHMODE(u, g, o)
KVDataSetManager * gDataSetManager
ClassImp(KVPartitionList) void KVPartitionList
Initialisation.
KVString GetValue(KVString &l, char c)
Definition: KVTGID.cpp:800
char Char_t
const Bool_t kFALSE
bool Bool_t
const Bool_t kTRUE
const char Option_t
R__EXTERN TEnv * gEnv
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
static void OpenTempFile(TString &base, std::ofstream &fp)
Definition: KVBase.cpp:827
static const Char_t * GetWORKDIRFilePath(const Char_t *namefile="")
Definition: KVBase.cpp:127
static Bool_t SearchKVFile(const Char_t *name, TString &fullpath, const Char_t *kvsubdir="")
Definition: KVBase.cpp:538
static Bool_t SearchAndOpenKVFile(const Char_t *name, KVSQLite::database &dbfile, const Char_t *kvsubdir="")
Definition: KVBase.cpp:649
Define and manage data analysis tasks.
virtual void SetDataAnalyser(const Char_t *d)
virtual void SetPrereq(const Char_t *p)
virtual void SetStatusUpdateInterval(Long64_t n)
virtual void SetUserBaseClass(const Char_t *d)
virtual void SetWithUserClass(Bool_t w=kTRUE)
Base class for managing repositories of experimental data.
virtual Bool_t IsRemote() const
Returns kTRUE for remote repositories, kFALSE for local repositories.
Manage all datasets contained in a given data repository.
virtual void Print(Option_t *opt="") const
virtual void ReadUserGroups()
virtual Bool_t ReadDataSetList()
virtual Bool_t Init(KVDataRepository *=0)
KVDataAnalysisTask * GetAnalysisTaskAny(const Char_t *keywords) const
virtual Bool_t OpenAvailableDatasetsFile()
virtual void Update()
virtual Bool_t CheckCacheStatus()
virtual Bool_t ReadAvailableDatasetsFile()
virtual Bool_t CheckUser(const Char_t *groupname, const Char_t *username="")
virtual KVDataAnalysisTask * GetTask(const Char_t *name)
Return pointer to named data analysis task.
KVDataSet * GetDataSet(Int_t) const
Return pointer to DataSet using index in list of all datasets, index>=0.
virtual KVDataSet * NewDataSet()
Creates and returns pointer to new data set object.
virtual Bool_t ReadTaskList()
virtual void CheckAvailability()
virtual KVDataSet * GetAvailableDataSet(Int_t) const
Manage an experimental dataset corresponding to a given experiment or campaign.
Definition: KVDataSet.h:207
virtual void SetUserGroups(const Char_t *groups)
Definition: KVDataSet.h:282
virtual void SetDataPathSubdir(const Char_t *s)
Definition: KVDataSet.h:261
virtual const Char_t * GetAvailableDataTypes() const
Definition: KVDataSet.h:277
virtual void ls(Option_t *opt="") const
Print dataset information.
Definition: KVDataSet.cpp:399
virtual void AddAvailableDataType(const Char_t *)
Definition: KVDataSet.cpp:528
void SetName(const char *name)
Definition: KVDataSet.cpp:688
virtual void CheckAvailable()
Definition: KVDataSet.cpp:471
virtual Bool_t IsAvailable() const
Returns kTRUE if this dataset is available for analysis, i.e. if any associated data files are stored...
Definition: KVDataSet.h:287
virtual void SetAvailable(Bool_t yes=kTRUE)
Definition: KVDataSet.h:291
virtual Bool_t CheckUserCanAccess()
Definition: KVDataSet.cpp:1356
void SetRepository(KVDataRepository *)
Set pointer to data repository in which dataset is stored.
Definition: KVDataSet.cpp:1387
Extension of ROOT TString class which allows backwards compatibility with ROOT v3....
Definition: KVString.h:72
void Begin(TString delim) const
Definition: KVString.cpp:565
Bool_t End() const
Definition: KVString.cpp:634
KVString Next(Bool_t strip_whitespace=kFALSE) const
Definition: KVString.cpp:695
UInt_t Convert(Bool_t toGMT=kFALSE) const
virtual const char * GetValue(const char *name, const char *dflt) const
void Reset()
virtual const char * GetName() const
virtual void SetTitle(const char *title="")
virtual const char * GetTitle() const
virtual void SetName(const char *name)
Int_t GetEntries() const
TObject * At(Int_t idx) const
const TString & GetString() const
TString & String()
void ToUpper()
TObjArray * Tokenize(const TString &delim) const
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
const char * Data() const
virtual int Chmod(const char *file, UInt_t mode)
virtual int CopyFile(const char *from, const char *to, Bool_t overwrite=kFALSE)
virtual int GetPathInfo(const char *path, FileStat_t &buf)
virtual UserGroup_t * GetUserInfo(const char *user=nullptr)
virtual int Unlink(const char *name)
TLine * line
void Info(const char *location, const char *va_(fmt),...)
const char * String
Long_t fMtime
size_t fIndex