Skip to content

Commit

Permalink
Replaced CHECFiles with classify_files function which returns a RunFi…
Browse files Browse the repository at this point in the history
…lesCollection objet which contains RunFilesRecords
  • Loading branch information
sflis committed Jun 28, 2019
1 parent f1e6bd4 commit 5bef196
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 71 deletions.
2 changes: 1 addition & 1 deletion crundb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from . import modules
from . import core
from .core import CHECFiles, SubmitPluginBase
from .core import SubmitPluginBase
from .core import importdir as _import
import os as _os
from .utils import get_root_folder as _get_root_folder
Expand Down
2 changes: 1 addition & 1 deletion crundb/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .submitplugin import CHECFiles, SubmitPluginBase
from .submitplugin import SubmitPluginBase
9 changes: 4 additions & 5 deletions crundb/core/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import zmq.asyncio

from crundb.core import submitplugin

from crundb import utils
class Client:
def __init__(self, ip, port, zmqcontext=None,verbose = False):
"""Summary
Expand Down Expand Up @@ -48,12 +48,11 @@ def list_submit_plugins(self):
return [p.__name__ for p in submitplugin.SubmitPluginBase.subclasses]

def submit(self, files:list, send=True,dry_run=False):
cf = submitplugin.CHECFiles()
run_collection = cf.classify_files(files)
run_collection = utils.classify_files(files)
self.pr("Number of input files: {}".format(len(files)))
self.pr("Files from {} runs ".format(len(cf._runs_file_collection.keys())))
self.pr("Files from {} runs ".format(len(run_collection.collection.keys())))
self.pr("Type of files:")
for key,count in cf._counters.items():
for key,count in run_collection.counters.items():
self.pr(f" `{key}`: {count}")
if dry_run:
return None
Expand Down
63 changes: 0 additions & 63 deletions crundb/core/submitplugin.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,4 @@
from crundb.core import importdir
from crundb.utils import get_root_folder,get_data_folder
import os
import yaml
import re
from collections import defaultdict
class CHECFiles:

"""Summary
"""

def __init__(self):
"""Summary
"""
with open(os.path.join(get_data_folder(),'pageconf.yaml')) as f:
conf = yaml.load(f)
self._file_def = conf['FileDefs']
self._file_collection = {'Run':None}
for key in self._file_def.keys():
self._file_collection[key] = None
self._has_file_types = set()
self._runs_file_collection = defaultdict(dict)
self._counters = defaultdict(int)

def classify_files(self, files):
"""Summary
Args:
files (TYPE): Description
"""
for full_path in files:
file = os.path.basename(full_path)
if file[:3] == 'Run' and file[3]!='_':
match= re.search(r'[0-9]+', file)
span = match.span()
runnumber = file[span[0]:span[1]]
run_name = f"Run{runnumber}"
if run_name not in self._runs_file_collection:
self._runs_file_collection[run_name] ={'Run':run_name}
for fdef in self._file_def.keys():
self._runs_file_collection[run_name][fdef] = None
tmp = defaultdict(list)
for fdef,patrns in self._file_def.items():
for patrn in patrns:
if re.sub('\*',run_name,patrn) == file:
tmp[fdef].append(full_path)
self._counters[fdef] +=1
# else:
# #Do something with unmatched files
# pass

self._runs_file_collection[run_name].update(tmp)

else:
print("unknown format of file at location {}".format(full_path))
return self._runs_file_collection

def find_run_files(self,folder, run):
"""Summary
Args:
run (TYPE): Description
"""
pass

class SubmitPluginBase:

Expand Down
135 changes: 134 additions & 1 deletion crundb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import errno

import io

import yaml
from collections import defaultdict
import re

def get_root_folder()->str:
"""Summary
Expand Down Expand Up @@ -102,6 +104,137 @@ def savefig_to_buffer(fig:Figure)->bytes:
return figbuf.read()




class RunFilesRecord:
def __init__(self,run,filedefs,**kwargs):
"""Summary
Args:
run (TYPE): Description
filedefs (TYPE): Description
**kwargs: Description
"""
self._record = defaultdict(lambda: None)

self._run = run
for k in filedefs.keys():
setattr(self,k,None)
self._record[k] = None
for k,v in kwargs.items():
self._record[k] = [v]

@property
def run(self):
"""Summary
Returns:
TYPE: Description
"""
return self._run

def __getitem__(self,key):
return self._record[key]

def items(self):
return self._record.items()

def update(self, d):
for k,v in d.items():
if v is None:
continue
if self._record[k]is not None:
self._record[k] += v
else:
self._record[k] = v

for k,v in self._record.items():
setattr(self,k,v)
def __str__(self):
s =f"<RunFilesRecord>:\n{self._run}"
for k,v in self._record.items():
if v is not None:
s +=f'\n{k}: {v}'
return s

class RunFilesCollection:

"""Summary
"""

def __init__(self):
"""Summary
"""
self._collection ={}
self._counters = defaultdict(int)

def add(self,record):
"""Summary
Args:
record (TYPE): Description
"""
if record.run in self._collection:
self._collection[record.run].update(record)
else:
self._collection[record.run] =record
for k,v in record.items():
if v is not None:
self._counters[k] +=1
def items(self):
return self.collection.items()

@property
def collection(self):
"""Summary
Returns:
TYPE: Description
"""
return self._collection

@property
def counters(self):
"""Summary
Returns:
TYPE: Description
"""
return self._counters

def classify_files(files,filename_conf=os.path.join(get_data_folder(),'pageconf.yaml')):
"""Summary
Args:
files (TYPE): Description
"""
with open(filename_conf) as f:
conf = yaml.load(f)
file_def = conf['FileDefs']
collection = RunFilesCollection()


for full_path in files:
file = os.path.basename(full_path)
if file[:3] == 'Run' and file[3]!='_':
match= re.search(r'[0-9]+', file)
span = match.span()
runnumber = file[span[0]:span[1]]
run_name = f"Run{runnumber}"

for fdef,patrns in file_def.items():
for patrn in patrns:
if re.sub('\*',run_name,patrn) == file:
collection.add(RunFilesRecord(run=run_name,filedefs = file_def,**{fdef:full_path}))
# else:
# #Do something with unmatched files
# pass

else:
print("unknown format of file at location {}".format(full_path))
return collection


def pid_exists(pid: int)->bool:
"""Check whether pid exists in the current process table.
UNIX only.
Expand Down

0 comments on commit 5bef196

Please sign in to comment.