diff --git a/cgi/cloud_mode_active_cgi.py b/cgi/cloud_mode_active_cgi.py
new file mode 100755
index 0000000..c6d4e80
--- /dev/null
+++ b/cgi/cloud_mode_active_cgi.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python2.6
+import cgi
+import os
+print "Content-Type: text/html"     # HTML is following                                                  
+print 
+
+try:
+    cloud = os.listdir('/etc/appliance/resources/cloud')
+    print len(cloud)
+except Exception as ex:
+    print ex
diff --git a/cgi/exclude_cgi.py b/cgi/exclude_cgi.py
new file mode 100755
index 0000000..5fc9a86
--- /dev/null
+++ b/cgi/exclude_cgi.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python2.6
+import cgi
+import os
+form = cgi.FieldStorage()
+print "Content-Type: text/html"     # HTML is following
+print            
+print "<TITLE>CGI script exclude</TITLE>"
+
+try:
+    os.unlink('exclude')
+except:
+    pass
+fp = open('exclude','w+')
+fp.close()
+
diff --git a/cgi/include_cgi.py b/cgi/include_cgi.py
new file mode 100755
index 0000000..34d37e5
--- /dev/null
+++ b/cgi/include_cgi.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python2.6
+import cgi
+import os
+form = cgi.FieldStorage()
+print "Content-Type: text/html"     # HTML is following
+print            
+print "<TITLE>CGI script exclude</TITLE>"
+
+try:
+    os.unlink('include')
+except:
+    pass
+fp = open('include','w+')
+fp.close()
+
diff --git a/cgi/suspend_cgi.py b/cgi/suspend_cgi.py
index 6a2f97c..ad8e9b6 100755
--- a/cgi/suspend_cgi.py
+++ b/cgi/suspend_cgi.py
@@ -5,10 +5,15 @@
 print "Content-Type: text/html"     # HTML is following
 print            
 print "<TITLE>CGI script suspend</TITLE>"
+
+portsuffix=""
+if "port" in form:
+    portsuffix=form["port"].value
+
 try:
-    os.unlink('suspend')
+    os.unlink('suspend'+portsuffix)
 except:
     pass
-fp = open('suspend','w+')
+fp = open('suspend'+portsuffix,'w+')
 fp.close()
 
diff --git a/esplugins/head-master.zip b/esplugins/head-master.zip
new file mode 100644
index 0000000..4d16a1e
Binary files /dev/null and b/esplugins/head-master.zip differ
diff --git a/esplugins/hq-master.zip b/esplugins/hq-master.zip
new file mode 100644
index 0000000..6f50d38
Binary files /dev/null and b/esplugins/hq-master.zip differ
diff --git a/esplugins/install.sh b/esplugins/install.sh
index 7bd2e8f..1e63fda 100644
--- a/esplugins/install.sh
+++ b/esplugins/install.sh
@@ -1,4 +1,4 @@
 cd $1
-echo installing elasticsearch plugins...
-bin/plugin --url file:///opt/fff/esplugins/$2 --install $3
+echo installing elasticsearch plugin $3 ...
+bin/plugin -s --url file:///opt/fff/esplugins/$2 --install $3
 
diff --git a/esplugins/paramedic-master.zip b/esplugins/paramedic-master.zip
new file mode 100644
index 0000000..b0fc5e5
Binary files /dev/null and b/esplugins/paramedic-master.zip differ
diff --git a/esplugins/uninstall.sh b/esplugins/uninstall.sh
index c22303c..301411a 100644
--- a/esplugins/uninstall.sh
+++ b/esplugins/uninstall.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
 cd $1
-bin/plugin --remove $2
+echo uninstalling elastic plugin $2 ...
+bin/plugin -s --remove $2
 
diff --git a/etc/hltd.conf b/etc/hltd.conf
index a91699b..c1643a4 100644
--- a/etc/hltd.conf
+++ b/etc/hltd.conf
@@ -1,5 +1,6 @@
 [General]
 enabled = False
+instance = main
 exec_directory = /opt/hltd
 user = daqlocal
 watch_directory = /fff/data
@@ -9,19 +10,19 @@ mount_command = mount
 mount_type = nfs4
 mount_options_ramdisk = rw,noatime,vers=4,rsize=65536,wsize=65536,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,noac
 mount_options_output = rw,vers=4,rsize=65536,wsize=65536,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys
-micromerge_output = /fff/BU0/output
 delete_run_dir = True
 output_adler32 = True
 
 [Monitoring]
 use_elasticsearch = True
-close_es_index = False
+close_es_index = True
 es_cmssw_log_level = DISABLED
 es_hltd_log_level = ERROR
 es_local = localhost
 
 [Web]
 cgi_port = 9000
+cgi_instance_port_offset = 0
 soap2file_port = 8010
 
 [Resources]
diff --git a/etc/instances.input b/etc/instances.input
new file mode 100644
index 0000000..f826192
--- /dev/null
+++ b/etc/instances.input
@@ -0,0 +1,27 @@
+{
+  "DISABLED-dvbu-c2f34-30-01":
+  {
+    "names":["main","testing"],
+    "sizes":[20,30]
+  },
+  "DISABLED-dvrubu-c2f34-17-03":
+  {
+    "names":["testing"],
+    "sizes":[0]
+  },
+  "DISABLED-dvrubu-c2f34-17-04":
+  {
+    "names":["testing"],
+    "sizes":[0]
+  },
+  "bu-vm-01-01.cern.ch":
+  {
+    "names":["main","testing"],
+    "sizes":[1000,500]
+  },
+  "fu-vm-02-02.cern.ch":
+  {
+    "names":["testing"],
+    "sizes":[0]
+  }
+}
diff --git a/json/runapplianceTemplate.json b/json/runapplianceTemplate.json
index d410952..8578066 100644
--- a/json/runapplianceTemplate.json
+++ b/json/runapplianceTemplate.json
@@ -274,50 +274,35 @@
         }
       }
     },
-    "hltrates-legend": {
+    "qstatus": {
       "properties": {
-        "path-names": {
-          "type": "string",
-          "index": "not_analyzed"
-        },
-        "dataset-names": {
-          "type": "string",
-          "index": "not_analyzed"
-        }
-      }
-    },
-    "hltrates": {
-      "properties": {
-        "ls": {
-          "type": "integer"
-        },
-        "pid": {
-          "type": "integer"
-        },
-        "processed": {
-          "type": "integer"
-        },
-        "path-wasrun": {
+        "numQueuedLS": {
           "type": "integer"
         },
-        "path-afterl1seed": {
+        "maxQueuedLS": {
           "type": "integer"
         },
-        "path-afterprescale": {
+        "numReadFromQueueLS": {
           "type": "integer"
         },
-        "path-accepted": {
+        "maxClosedLS": {
           "type": "integer"
         },
-        "path-rejected": {
+        "numReadOpenLS": {
           "type": "integer"
         },
-        "path-errors": {
-          "type": "integer"
+        "fm_date": {
+          "type": "date"
         },
-        "dataset-accepted": {
-          "type": "integer"
+        "host": {
+          "type": "string",
+          "index":"not_analyzed"
         }
+      },
+      "_timestamp": {
+        "enabled": true,
+        "store": "yes",
+        "path": "fm_date"
       }
     },
     "cmsswlog": {
diff --git a/lib/python-procname/procnamemodule.c b/lib/python-procname/procnamemodule.c
new file mode 100644
index 0000000..e447032
--- /dev/null
+++ b/lib/python-procname/procnamemodule.c
@@ -0,0 +1,72 @@
+/*
+ *   Copyright (C) 2008  Eugene A. Lisitsky 
+ *
+ *     The procname library for Python.
+ *
+ *       This library is free software; you can redistribute it and/or
+ *         modify it under the terms of the GNU Lesser General Public
+ *           License as published by the Free Software Foundation; either
+ *             version 2.1 of the License, or (at your option) any later version.
+ *
+ *               This library is distributed in the hope that it will be useful,
+ *                 but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *                   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *                     Lesser General Public License for more details.
+ *
+ *                       You should have received a copy of the GNU Lesser General Public
+ *                         License along with this library; if not, write to the Free Software
+ *                           Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *                           */
+
+#include <Python.h>
+#include <sys/prctl.h>
+
+void Py_GetArgcArgv(int*, char***);
+
+PyDoc_STRVAR(procname__doc__, "Module for setting/getting process name");
+
+static PyObject *
+procname_check(PyObject *self, PyObject *args) {
+        return Py_BuildValue("i", 1);
+};
+
+
+static PyObject *
+procname_getprocname(PyObject *self, PyObject *args) {
+        int argc;
+        char **argv;
+        Py_GetArgcArgv(&argc, &argv);
+        return Py_BuildValue("s", argv[0]);
+};
+
+
+static PyObject *
+procname_setprocname(PyObject *self, PyObject *args) {
+        int argc;
+        char **argv;
+        char *name;
+        if (!PyArg_ParseTuple(args, "s", &name))
+                return NULL;
+        Py_GetArgcArgv(&argc, &argv);
+        strncpy(argv[0], name , strlen(name));
+        memset(&argv[0][strlen(name)], '\0', strlen(&argv[0][strlen(name)]));
+        prctl (15 /* PR_SET_NAME */, name, 0, 0, 0);
+        Py_INCREF(Py_None);
+        return Py_None;
+};
+
+
+static PyMethodDef procname_methods[] = {
+        {"check", procname_check, METH_VARARGS, "Test func"},
+        {"getprocname", procname_getprocname, METH_VARARGS,
+                "Get procname.\nReturns name (string)"},
+        {"setprocname", procname_setprocname, METH_VARARGS,
+                "Set procname.\n  name (string) -> new process name.\nReturns None."},
+        {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initprocname(void) {
+        (void) Py_InitModule3("procname", procname_methods, procname__doc__);
+}
+
diff --git a/lib/python-procname/setup.py b/lib/python-procname/setup.py
new file mode 100755
index 0000000..cf97d9e
--- /dev/null
+++ b/lib/python-procname/setup.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import distutils.core
+import distutils.util
+
+platform = distutils.util.get_platform()
+
+
+distutils.core.setup(
+    name='procname',
+    version='0.1',
+    description='Process name renaming',
+    author="Eugene A Lisitsky",
+    license='LGPL',
+    platforms='Linux',
+    ext_modules=[distutils.core.Extension('procname', sources=['procnamemodule.c'])],
+    )
diff --git a/python/aUtils.py b/python/aUtils.py
index df226e3..76c9499 100644
--- a/python/aUtils.py
+++ b/python/aUtils.py
@@ -2,17 +2,19 @@
 import os,stat
 import time,datetime
 import shutil
-import json
+import simplejson as json
 import logging
 import zlib
 import subprocess
+import threading
+#import fcntl
 
 from inotifywrapper import InotifyWrapper
 import _inotify as inotify
 
 
 ES_DIR_NAME = "TEMP_ES_DIRECTORY"
-UNKNOWN,OUTPUTJSD,JSD,STREAM,INDEX,FAST,SLOW,OUTPUT,STREAMERR,STREAMDQMHISTOUTPUT,INI,EOLS,EOR,COMPLETE,DAT,PDAT,PJSNDATA,PIDPB,PB,CRASH,MODULELEGEND,PATHLEGEND,BOX,BOLS,HLTRATES,HLTRATESLEGEND = range(26)            #file types 
+UNKNOWN,OUTPUTJSD,DEFINITION,STREAM,INDEX,FAST,SLOW,OUTPUT,STREAMERR,STREAMDQMHISTOUTPUT,INI,EOLS,EOR,COMPLETE,DAT,PDAT,PJSNDATA,PIDPB,PB,CRASH,MODULELEGEND,PATHLEGEND,BOX,BOLS,QSTATUS = range(25)            #file types 
 TO_ELASTICIZE = [STREAM,INDEX,OUTPUT,STREAMERR,STREAMDQMHISTOUTPUT,EOLS,EOR,COMPLETE]
 TEMPEXT = ".recv"
 ZEROLS = 'ls0000'
@@ -40,6 +42,16 @@ def __init__(self,recursiveMode=False):
         self.logger = logging.getLogger(self.__class__.__name__)
         self.eventQueue = False
         self.inotifyWrapper = InotifyWrapper(self,recursiveMode)
+        self.queueStatusPath = None
+        self.queueStatusPathMon = None
+        self.queueStatusPathDir = None
+        self.queuedLumiList = []
+        self.maxQueuedLumi=-1
+        #max seen/closed by anelastic thread
+        self.maxReceivedEoLS=-1
+        self.maxClosedLumi=-1
+        self.numOpenLumis=-1
+        self.lock = threading.Lock()
 
     def register_inotify_path(self,path,mask):
         self.inotifyWrapper.registerPath(path,mask)
@@ -48,20 +60,101 @@ def start_inotify(self):
         self.inotifyWrapper.start()
 
     def stop_inotify(self):
-        logging.info("MonitorRanger: Stop inotify wrapper")
+        self.logger.info("MonitorRanger: Stop inotify wrapper")
         self.inotifyWrapper.stop()
-        logging.info("MonitorRanger: Join inotify wrapper")
+        self.logger.info("MonitorRanger: Join inotify wrapper")
         self.inotifyWrapper.join()
-        logging.info("MonitorRanger: Inotify wrapper returned")
+        self.logger.info("MonitorRanger: Inotify wrapper returned")
 
     def process_default(self, event):
         self.logger.debug("event: %s on: %s" %(str(event.mask),event.fullpath))
         if self.eventQueue:
-            self.eventQueue.put(event)
+
+            if self.queueStatusPath!=None:
+                if self.checkNewLumi(event):
+                    self.eventQueue.put(event)
+            else:
+                self.eventQueue.put(event)
 
     def setEventQueue(self,queue):
         self.eventQueue = queue
 
+    def checkNewLumi(self,event):
+        if event.fullpath.endswith("_EoLS.jsn"):
+            try:
+                queuedLumi = int(os.path.basename(event.fullpath).split('_')[1][2:])
+                self.lock.acquire()
+                if queuedLumi not in self.queuedLumiList:
+                    if queuedLumi>self.maxQueuedLumi:
+                        self.maxQueuedLumi=queuedLumi
+                    self.queuedLumiList.append(queuedLumi)
+                    self.lock.release()
+                    self.updateQueueStatusFile()
+                else:
+                    self.lock.release()
+                    #skip if EoL for LS in queue has already been written once (e.g. double file create race)
+                    return False
+            except:
+                self.logger.warning("Problem checking new EoLS filename: "+str(os.path.basename(event.fullpath)) + " error:"+str(ex))
+                try:self.lock.release()
+                except:pass
+        return True
+
+    def notifyLumi(self,ls,maxReceivedEoLS,maxClosedLumi,numOpenLumis):
+        if self.queueStatusPath==None:return
+        self.lock.acquire()
+        if ls!=None and ls in self.queuedLumiList:
+            self.queuedLumiList.remove(ls)
+        self.maxReceivedEoLS=maxReceivedEoLS
+        self.maxClosedLumi=maxClosedLumi
+        self.numOpenLumis=numOpenLumis
+        self.lock.release()
+        self.updateQueueStatusFile()
+
+    def setQueueStatusPath(self,path,monpath):
+        self.queueStatusPath = path
+        self.queueStatusPathMon = monpath
+        self.queueStatusPathDir = path[:path.rfind('/')]
+
+    def updateQueueStatusFile(self):
+        if self.queueStatusPath==None:return
+        num_queued_lumis = len(self.queuedLumiList)
+        if not os.path.exists(self.queueStatusPathDir):
+            self.logger.error("No directory to write queueStatusFile: "+str(self.queueStatusPathDir))
+        else:
+            self.logger.info("Update status file - queued lumis:"+str(num_queued_lumis)+ " EoLS:: max queued:"+str(self.maxQueuedLumi) \
+                             +" un-queued:"+str(self.maxReceivedEoLS)+"  Lumis:: last closed:"+str(self.maxClosedLumi)+ " num open:"+str(self.numOpenLumis))
+        #write json
+        doc = {"numQueuedLS":num_queued_lumis,
+               "maxQueuedLS":self.maxQueuedLumi,
+               "numReadFromQueueLS:":self.maxReceivedEoLS,
+               "maxClosedLS":self.maxClosedLumi,
+               "numReadOpenLS":self.numOpenLumis
+               }
+        try:
+            if self.queueStatusPath!=None:
+                attempts=3
+                while attempts>0:
+                    try:
+                        with open(self.queueStatusPath+TEMPEXT,"w") as fp:
+                            #fcntl.flock(fp, fcntl.LOCK_EX)
+                            json.dump(doc,fp)
+                        os.rename(self.queueStatusPath+TEMPEXT,self.queueStatusPath)
+                        break
+                    except Exception as ex:
+                        attempts-=1
+                        if attempts==0:
+                            raise ex
+                        self.logger.warning("Unable to write status file, with error:" + str(ex)+".retrying...")
+                        time.sleep(0.05)
+                try:
+                    shutil.copyfile(self.queueStatusPath,self.queueStatusPathMon)
+                except:
+                    pass
+        except Exception as ex:
+            self.logger.error("Unable to open/write " + self.queueStatusPath)
+            self.logger.exception(ex)
+
 
 class fileHandler(object):
     def __eq__(self,other):
@@ -106,6 +199,7 @@ def getFiletype(self,filepath = None):
         if not filepath: filepath = self.filepath
         filename = self.basename
         name,ext = self.name,self.ext
+        if ext==TEMPEXT:return UNKNOWN
         name = name.upper()
         if "mon" not in filepath:
             if ext == ".dat" and "_PID" not in name: return DAT
@@ -113,26 +207,26 @@ def getFiletype(self,filepath = None):
             if ext == ".jsndata" and "_PID" in name: return PJSNDATA
             if ext == ".ini" and "_PID" in name: return INI
             if ext == ".jsd" and "OUTPUT_" in name: return OUTPUTJSD
-            if ext == ".jsd" : return JSD
+            if ext == ".jsd" : return DEFINITION
             if ext == ".jsn":
                 if STREAMERRORNAME.upper() in name: return STREAMERR
-                elif "BOLS" in name : return BOLS
-                elif "STREAM" in name and "_PID" in name: return STREAM
-                elif "INDEX" in name and  "_PID" in name: return INDEX
-                elif "CRASH" in name and "_PID" in name: return CRASH
-                elif "EOLS" in name: return EOLS
-                elif "EOR" in name: return EOR
+                elif "_BOLS" in name : return BOLS
+                elif "_STREAM" in name and "_PID" in name: return STREAM
+                elif "_INDEX" in name and  "_PID" in name: return INDEX
+                elif "_CRASH" in name and "_PID" in name: return CRASH
+                elif "_EOLS" in name: return EOLS
+                elif "_EOR" in name: return EOR
+                elif "_TRANSFER" in name: return DEFINITION
         if ext==".jsn":
             if STREAMDQMHISTNAME.upper() in name and "_PID" not in name: return STREAMDQMHISTOUTPUT
-            if "STREAM" in name and "_PID" not in name: return OUTPUT
-            if "_HLTRATESLEGEND" in name: return  HLTRATESLEGEND
-            elif "_HLTRATES" in name: return  HLTRATES
+            if "_STREAM" in name and "_PID" not in name: return OUTPUT
+            if name.startswith("QUEUE_STATUS"): return QSTATUS
         if ext==".pb":
             if "_PID" not in name: return PB
             else: return PIDPB
         if name.endswith("COMPLETE"): return COMPLETE
-        if ".fast" in filename: return FAST
-        if "slow" in filename: return SLOW
+        if ext == ".fast" in filename: return FAST
+        if ext == ".slow" in filename: return SLOW
         if ext == ".leg" and "MICROSTATELEGEND" in name: return MODULELEGEND
         if ext == ".leg" and "PATHLEGEND" in name: return PATHLEGEND
         if "boxes" in filepath : return BOX
@@ -149,7 +243,6 @@ def getFileHeaders(self):
         elif filetype in [DAT,PB,OUTPUT,STREAMERR,STREAMDQMHISTOUTPUT]: self.run,self.ls,self.stream,self.host = splitname
         elif filetype == INDEX: self.run,self.ls,self.index,self.pid = splitname
         elif filetype == EOLS: self.run,self.ls,self.eols = splitname
-        elif filetype == HLTRATES:self.run,self.ls,self.ftype,self.pid = splitname
         else: 
             self.logger.warning("Bad filetype: %s" %self.filepath)
             self.run,self.ls,self.stream = [None]*3
@@ -167,11 +260,12 @@ def getBoxData(self,filepath = None):
                 data = fi.read()
                 data = data.strip(sep).split(sep)
                 data = dict([d.split('=') for d in data])
+        except IOError,e:
+            data = {}
         except StandardError,e:
             self.logger.exception(e)
             data = {}
 
-
         return data
 
         #get data from json file
@@ -247,7 +341,12 @@ def setFieldByName(self,field,value,warning=True):
         #get definitions from jsd file
     def getDefinitions(self):
         if self.filetype in [STREAM]:
+            #try:
             self.jsdfile = self.data["definition"]
+            #except:
+            #    self.logger.error("no definition field in "+str(self.filepath))
+            #   self.definitions = {}
+            #   return False
         elif not self.jsdfile: 
             self.logger.warning("jsd file not set")
             self.definitions = {}
@@ -256,10 +355,11 @@ def getDefinitions(self):
         return True
 
 
-    def deleteFile(self):
+    def deleteFile(self,silent=False):
         #return True
         filepath = self.filepath
-        self.logger.info(filepath)
+        if silent==False:
+            self.logger.info(filepath)
         if os.path.isfile(filepath):
             try:
                 os.remove(filepath)
@@ -389,21 +489,37 @@ def writeout(self,empty=False):
             return False
         return True
 
+    #TODO:make sure that the file is copied only once
     def esCopy(self):
         if not self.exists(): return
         if self.filetype in TO_ELASTICIZE:
             esDir = os.path.join(self.dir,ES_DIR_NAME)
             if os.path.isdir(esDir):
+                newpathTemp = os.path.join(esDir,self.basename+TEMPEXT)
                 newpath = os.path.join(esDir,self.basename)
                 retries = 5
                 while True:
                     try:
-                        shutil.copy(self.filepath,newpath)
+                        shutil.copy(self.filepath,newpathTemp)
+                        break
+                    except (OSError,IOError),e:
+                        retries-=1
+                        if retries == 0:
+                            self.logger.exception(e)
+                            return
+                            #raise e #non-critical exception
+                        else:
+                            time.sleep(0.5)
+                retries = 5
+                while True:
+                    try:
+                        os.rename(newpathTemp,newpath)
                         break
                     except (OSError,IOError),e:
                         retries-=1
                         if retries == 0:
                             self.logger.exception(e)
+                            return
                             #raise e #non-critical exception
                         else:
                             time.sleep(0.5)
diff --git a/python/anelastic.py b/python/anelastic.py
index 99428ef..63db0b2 100755
--- a/python/anelastic.py
+++ b/python/anelastic.py
@@ -11,7 +11,7 @@
 import _inotify as inotify
 import threading
 import Queue
-import json
+import simplejson as json
 import logging
 
 
@@ -21,8 +21,9 @@
 
 class LumiSectionRanger():
     host = os.uname()[1]        
-    def __init__(self,tempdir,outdir,run_number):
+    def __init__(self,mr,tempdir,outdir,run_number):
         self.logger = logging.getLogger(self.__class__.__name__)
+        self.mr = mr
         self.stoprequest = threading.Event()
         self.emptyQueue = threading.Event()  
         self.firstStream = threading.Event()  
@@ -41,7 +42,10 @@ def __init__(self,tempdir,outdir,run_number):
         self.jsdfile = None
         self.buffer = []        # file list before the first stream file
         self.emptyOutTemplate = None
-
+        self.useTimeout=60
+        self.maxQueuedLumi=0
+        self.maxReceivedEoLS=0
+        self.maxClosedLumi=0
 
 
     def join(self, stop=False, timeout=None):
@@ -52,7 +56,8 @@ def join(self, stop=False, timeout=None):
     def start(self):
         self.run()
 
-    def stop(self):
+    def stop(self,timeout=60):
+        self.useTimeout=timeout
         self.stoprequest.set()
 
     def setSource(self,source):
@@ -71,11 +76,15 @@ def run(self):
                     self.process() 
                 except (KeyboardInterrupt,Queue.Empty) as e:
                     self.emptyQueue.set() 
+                except Exception as ex:
+                    self.logger.exception(ex)
+                    self.logger.fatal("Exiting on unhandled exception")
+                    os._exit(1)
             else:
                 time.sleep(0.5)
             #allow timeout in case 'complete' file is received and lumi is not closed
             if self.stoprequest.isSet() and self.emptyQueue.isSet() and self.checkClosure()==False:
-                if endTimeout<=-1: endTimeout=100
+                if endTimeout<=-1: endTimeout=self.useTimeout*2
                 if endTimeout==0: break
                 endTimeout-=1
 
@@ -105,8 +114,8 @@ def process(self):
         eventtype = self.eventtype
 
         if eventtype:# & inotify.IN_CLOSE_WRITE:
-            if filetype == JSD:
-                self.processJsdFile()
+            if filetype == DEFINITION:
+                self.processDefinitionFile()
             if filetype == OUTPUTJSD and not self.jsdfile:
                 self.jsdfile=self.infile.filepath
                 self.createEmptyOutputTemplate()
@@ -119,18 +128,28 @@ def process(self):
             elif filetype in [STREAM,STREAMDQMHISTOUTPUT,INDEX,EOLS,DAT,PB]:
                 run,ls = (self.infile.run,self.infile.ls)
                 key = (run,ls)
+                ls_num=int(ls[2:])
                 if filetype == EOLS :
+                    if self.maxReceivedEoLS<ls_num:
+                         self.maxReceivedEoLS=ls_num
+                    self.mr.notifyLumi(ls_num,self.maxReceivedEoLS,self.maxClosedLumi,self.getNumOpenLumis())
                     for lskey in self.LSHandlerList:
-                        if  self.LSHandlerList[lskey].ls < ls and not self.LSHandlerList[lskey].EOLS:
+                        if  int(self.LSHandlerList[lskey].ls[2:]) < ls_num and not self.LSHandlerList[lskey].EOLS:
                             self.createEOLSFile(self.LSHandlerList[lskey].ls)
                 if key not in self.LSHandlerList and not filetype == EOLS :
                     self.LSHandlerList[key] = LumiSectionHandler(run,ls,self.activeStreams,self.streamCounters,self.tempdir,self.outdir,self.jsdfile)
+                    self.mr.notifyLumi(None,self.maxReceivedEoLS,self.maxClosedLumi,self.getNumOpenLumis())
                 #if key not in self.LSHandlerList and filetype == EOLS :
                 #    self.copyEmptyDQMJsons(ls)
                 if key in self.LSHandlerList:
                     self.LSHandlerList[key].processFile(self.infile)
                     if self.LSHandlerList[key].closed.isSet():
+                        if self.maxClosedLumi<ls_num:
+                            self.maxClosedLumi=ls_num
+                            self.mr.notifyLumi(None,self.maxReceivedEoLS,self.maxClosedLumi,self.getNumOpenLumis())
                         self.LSHandlerList.pop(key,None)
+
+
             elif filetype == CRASH:
                 self.processCRASHfile()
             elif filetype == EOR:
@@ -204,11 +223,11 @@ def processINIfile(self):
                         # Where shall this exception be handled?
                 self.logger.warning("Found a bad ini file %s" %filepath)
             else:
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
 
         self.createErrIniFile()
 
-    def processJsdFile(self):
+    def processDefinitionFile(self):
         run = 'run'+self.run_number.zfill(conf.run_number_padding)
         if "_pid" in self.infile.name:
           #name with pid: copy to local name without pid
@@ -226,7 +245,7 @@ def processJsdFile(self):
           except:pass
         else:
           #name with pid: copy to output
-          self.infile.moveFile(os.path.join(conf.micromerge_output,run,self.infile.basename),copy = True,adler32=False,silent=True)
+          self.infile.moveFile(os.path.join(outputDir,run,self.infile.basename),copy = True,adler32=False,silent=True)
           
     def createEOLSFile(self,ls):
         eolname = os.path.join(self.tempdir,'run'+self.run_number.zfill(conf.run_number_padding)+"_"+ls+"_EoLS.jsn")
@@ -247,7 +266,10 @@ def processEORFile(self):
     def processCompleteFile(self):
         self.logger.info("received run complete file")
         self.complete = self.infile
-        self.stop()
+        if "abortcomplete" in self.infile.filepath:
+            self.stop(timeout=60)
+        else:
+            self.stop(timeout=5)
 
     def checkClosure(self):
         for key in self.LSHandlerList.keys():
@@ -262,7 +284,12 @@ def getOpenLumis(self):
                 openLumis.append(key)
         return openLumis
 
-
+    def getNumOpenLumis(self):
+        openLumis=0
+        for key in self.LSHandlerList.keys():
+            if not self.LSHandlerList[key].closed.isSet():
+                openLumis+=1
+        return openLumis
 
     def createOutputEoR(self):
 
@@ -272,7 +299,7 @@ def createOutputEoR(self):
         eorname = 'run'+self.run_number.zfill(conf.run_number_padding)+"_ls0000_EoR_"+os.uname()[1]+".jsn"
         runname = 'run'+self.run_number.zfill(conf.run_number_padding)
         srcName = os.path.join(conf.watch_directory,runname,eorname)
-        destName = os.path.join(conf.micromerge_output,runname,eorname)
+        destName = os.path.join(outputDir,runname,eorname)
         document = {"data":[str(0)]}
 
         for stream in self.streamCounters.keys():
@@ -300,7 +327,7 @@ def createEmptyOutputTemplate(self):
     #special handling for DQM stream (empty lumisection output json is created)
     def copyEmptyDQMJsons(self,ls):
         run = 'run'+self.run_number.zfill(conf.run_number_padding)
-        destinationStem = os.path.join(conf.micromerge_output,run,run+'_'+ls)
+        destinationStem = os.path.join(outputDir,run,run+'_'+ls)
         if "streamDQM" in self.activeStreams and self.emptyOutTemplate:
             destinationName = destinationStem+'_streamDQM_'+os.uname()[1]+'.jsn'
             self.logger.info("writing empty output json for streamDQM: "+str(ls))
@@ -405,7 +432,7 @@ def processStreamFile(self):
             try:
                 (filestem,ext)=os.path.splitext(self.infile.filepath)
                 os.remove(filestem + '.pb')
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
             except:pass
             return
         
@@ -415,7 +442,7 @@ def processStreamFile(self):
                 try:
                     (filestem,ext)=os.path.splitext(self.infile.filepath)
                     os.remove(filestem + '.pb')
-                    self.infile.deleteFile()
+                    self.infile.deleteFile(silent=True)
                 except:pass
             else:
                 self.logger.critical("pid %r not in pidlist as expected for ls %r. Skip file. " %(self.infile.pid,self.ls))
@@ -449,7 +476,7 @@ def processStreamFile(self):
                 processed = outfile.getFieldByName("Processed")
                 self.logger.info("ls,stream: %r,%r - events %r / %r " %(ls,stream,processed,self.totalEvent))
                 infile.esCopy()
-                infile.deleteFile()
+                infile.deleteFile(silent=True)
                 return True
         return False
 
@@ -553,7 +580,8 @@ def checkClosure(self):
         outfilelist = self.outfileList[:]
         for outfile in outfilelist:
             stream = outfile.stream
-            processed = outfile.getFieldByName("Processed")+outfile.getFieldByName("ErrorEvents")
+            errEntry = outfile.getFieldByName("ErrorEvents")
+            processed = outfile.getFieldByName("Processed")+errEntry
             if processed == self.totalEvent:
 
                 if outfile.filetype==STREAMDQMHISTOUTPUT:
@@ -599,11 +627,37 @@ def checkClosure(self):
                         except:
                             doChecksum=False
                             pass
-                        (status,checksum)=datfile.moveFile(newfilepath,adler32=doChecksum)
+                        #test
+                        if processed==errEntry:
+                            self.logger.info("all events are error events for stream "+stream+":"+str(errEntry))
+                            try:os.remove(datfile.filepath)
+                            except:pass
+                            doChecksum=False
+                        else: 
+                            (status,checksum)=datfile.moveFile(newfilepath,adler32=doChecksum)
+                        checksum_success=True
                         if doChecksum and status:
                             if checksum_cmssw!=checksum&0xffffffff:
-                                self.logger.fatal("checksum mismatch for "+ datfile.filepath + " expected:" + str(checksum_cmssw) + " got:" + str(checksum))
-                        if checksum_cmssw!=None:
+                                checksum_success=False
+                                try:
+                                    self.logger.fatal("checksum mismatch for "+ datfile.filepath  \
+                                                      +" expected adler32:" + str(checksum_cmssw) + ' size:'+ outfile.getFieldByName("Filesize") \
+                                                      + " , got adler32:" + str(checksum)+' size:'+os.stat(datfile.filepath).st_size)
+                                except:
+                                    self.logger.fatal("checksum mismatch for "+ datfile.filepath)
+                                #failed checksum, assign everything to error events and try to delete the file
+                                procevts = int(outfile.getFieldByName("Processed"))
+                                errevts = int(outfile.getFieldByName("ErrorEvents"))
+                                outfile.setFieldByName("Processed","0")
+                                outfile.setFieldByName("Accepted","0")
+                                outfile.setFieldByName("ErrorEvents",str(procevts+errevts))
+                                outfile.setFieldByName("Filelist","")
+                                outfile.setFieldByName("Filesize","0")
+                                outfile.setFieldByName("FileAdler32","-1")
+                                outfile.writeout()
+                                try:os.remove(newfilepath)
+                                except:pass
+                        if checksum_cmssw!=None and checksum_success==True:
                                 outfile.setFieldByName("FileAdler32",str(checksum_cmssw&0xffffffff))
                                 outfile.writeout()
                         try:
@@ -628,7 +682,7 @@ def checkClosure(self):
 
             #delete all index files
             for item in self.indexfileList:
-                item.deleteFile()
+                item.deleteFile(silent=True)
 
 
             #moving streamError file
@@ -760,39 +814,24 @@ def process(self,outfile):
        hasError=False
        exitCodes =  outfile.getFieldByName('ReturnCodeMask')
        if numFiles>=0:
-           if numFiles == 1:
-               #fastHadd crashes trying to merge only one file
-               os.rename(command_args[4],command_args[3])
-           else:
-               p = subprocess.Popen(command_args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-               p.wait()
-               if p.returncode!=0:
-                   self.logger.error('fastHadd returned with exit code '+str(p.returncode)+' and response: ' + str(p.communicate()) + '. Merging parameters given:'+str(command_args) +' ,file sizes(B):'+str(inFileSizes))
-                   #DQM more verbose debugging
-                   try:
-                       filesize = os.stat(fullOutputPath).st_size
-                       self.logger.error('fastHadd reported to fail at merging, while output pb file exists! '+ fullOutputPath + ' with size(B): '+str(filesize))
-                   except:
-                       pass
-                   outfile.setFieldByName('ReturnCodeMask', str(p.returncode))
-                   hasError=True
-           if True:
-               if numFiles==1:
-                   try:
-                       filesize = os.stat(fullOutputPath).st_size
-                   except:
-                       self.logger.error('Error checking fastHadd output file size: '+ fullOutputPath)
-                       hasError=True
-                   try:
-                       os.chmod(fullOutputPath,0666)
-                   except:
-                       self.logger.error('Error fixing permissions of fastHadd output file: '+ fullOutputPath)
-               if numFiles>1:
-                   for f in command_args[4:]:
-                       try:
-                           if hasError==False:os.remove(f)
-                       except OSError as ex:
-                           self.logger.warning('exception removing file '+f+' : '+str(ex))
+           p = subprocess.Popen(command_args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
+           p.wait()
+           if p.returncode!=0:
+               self.logger.error('fastHadd returned with exit code '+str(p.returncode)+' and response: ' + str(p.communicate()) + '. Merging parameters given:'+str(command_args) +' ,file sizes(B):'+str(inFileSizes))
+               #DQM more verbose debugging
+               try:
+                   filesize = os.stat(fullOutputPath).st_size
+                   self.logger.error('fastHadd reported to fail at merging, while output pb file exists! '+ fullOutputPath + ' with size(B): '+str(filesize))
+               except:
+                   pass
+               outfile.setFieldByName('ReturnCodeMask', str(p.returncode))
+               hasError=True
+
+           for f in command_args[4:]:
+               try:
+                   if hasError==False:os.remove(f)
+               except OSError as ex:
+                   self.logger.warning('exception removing file '+f+' : '+str(ex))
        else:
            hasError=True
 
@@ -830,8 +869,14 @@ def abortMerging(self):
 
 
 if __name__ == "__main__":
+
+    import procname
+    procname.setprocname('anelastic')
+
+    conf=initConf()
+
     logging.basicConfig(filename=os.path.join(conf.log_dir,"anelastic.log"),
-                    level=logging.INFO,
+                    level=conf.service_log_level,
                     format='%(levelname)s:%(asctime)s - %(funcName)s - %(message)s',
                     datefmt='%Y-%m-%d %H:%M:%S')
     logger = logging.getLogger(os.path.basename(__file__))
@@ -848,7 +893,7 @@ def abortMerging(self):
     rawinputdir = sys.argv[3]
     dirname = os.path.basename(os.path.normpath(dirname))
     watchDir = os.path.join(conf.watch_directory,dirname)
-    outputDir = conf.micromerge_output
+    outputDir = sys.argv[4]
 
     dqmHandler = None
 
@@ -868,11 +913,12 @@ def abortMerging(self):
         #starting inotify thread
         mr = MonitorRanger()
         mr.setEventQueue(eventQueue)
+        mr.setQueueStatusPath(os.path.join(watchDir,"open","queue_status.jsn"),os.path.join(watchDir,"mon","queue_status.jsn"))
         mr.register_inotify_path(watchDir,mask)
         mr.start_inotify()
 
         #starting lsRanger thread
-        ls = LumiSectionRanger(watchDir,outputDir,run_number)
+        ls = LumiSectionRanger(mr,watchDir,outputDir,run_number)
         ls.setSource(eventQueue)
         ls.start()
 
diff --git a/python/applianceumount.py b/python/applianceumount.py
index 523e259..ee3c13f 100644
--- a/python/applianceumount.py
+++ b/python/applianceumount.py
@@ -36,8 +36,8 @@ def run(self):
             os.symlink('/opt/hltd/cgi',self.watch_directory+'/cgi-bin')
 
             handler.cgi_directories = ['/cgi-bin']
-            print("starting http server on port "+str(self.cgi_port+5))
-            self.httpd = BaseHTTPServer.HTTPServer(("", self.cgi_port+5), handler)
+            print("starting http server on port "+str(self.cgi_port+20))
+            self.httpd = BaseHTTPServer.HTTPServer(("", self.cgi_port+20), handler)
 
             self.httpd.serve_forever()
             self.finished=True
@@ -51,9 +51,10 @@ def run(self):
     def stop(self):
             self.httpd.shutdown()
 
-def checkMode():
+def checkMode(instance):
     try:
         hltdconf='/etc/hltd.conf'
+        if instance != "main": hltdconf='/etc/hltd-'+instance+'.conf'
         with open(hltdconf,'r') as f:
             for l in f.readlines():
                 ls=l.strip(' \n')
@@ -63,31 +64,37 @@ def checkMode():
         pass
     return "unknown"
 
-def stopFUs():
+def stopFUs(instance):
 
     hltdconf='/etc/hltd.conf'
     watch_directory='/fff/ramdisk'
+    if instance != "main": hltdconf='/etc/hltd-'+instance+'.conf'
     machine_is_bu=False
     machine_is_fu=False
     cgi_port=9000
+    cgi_offset=0
 
     try:
         f=open(hltdconf,'r')
         for l in f.readlines():
             ls=l.strip(' \n')
-            if not ls.startswith('#') and ls.startswith('watch_directory'):
+            if ls.startswith('watch_directory'):
                 watch_directory=ls.split('=')[1].strip(' ')
-            if not ls.startswith('#') and ls.startswith('role'):
+            elif ls.startswith('role'):
                 if 'bu' in ls.split('=')[1].strip(' '): machine_is_bu=True
                 if 'fu' in ls.split('=')[1].strip(' ')=='fu': machine_is_fu=True
-            if not ls.startswith('#') and ls.startswith('cgi_port'):
+            elif ls.startswith('cgi_instance_port_offset'):
+                cgi_offset=int(ls.split('=')[1].strip(' '))
+            elif ls.startswith('cgi_port'):
                 cgi_port=int(ls.split('=')[1].strip(' '))
         f.close()
     except Exception as ex:
-        print "Unable to read parameters",str(ex),"using defaults"
+        if instance!="main": raise ex
+        else:
+            print "Unable to read parameters",str(ex),"using defaults"
 
     if machine_is_bu==False:return True
-    syslog.syslog("hltd:Initiating FU unmount procedure")
+    syslog.syslog("hltd-"+str(instance)+": initiating FU unmount procedure")
     #continue with notifying FUs
     boxinfodir=os.path.join(watch_directory,'appliance/boxes')
 
@@ -106,15 +113,16 @@ def stopFUs():
         current_time = time.time()
         age = current_time - os.path.getmtime(os.path.join(boxinfodir,machine))
         print "found machine",machine," which is ",str(age)," seconds old"
-        syslog.syslog("hltd: found machine "+str(machine) + " which is "+ str(age)+" seconds old")
+        syslog.syslog("hltd-"+str(instance)+": found machine "+str(machine) + " which is "+ str(age)+" seconds old")
         if age < 30:
             if receiver==None:
                 receiver = UmountResponseReceiver(watch_directory,cgi_port)
                 receiver.start()
                 time.sleep(1)
             try:
-                connection = httplib.HTTPConnection(machine, cgi_port,timeout=5)
-                connection.request("GET",'cgi-bin/suspend_cgi.py')
+                #subtract cgi offset when connecting machine
+                connection = httplib.HTTPConnection(machine, cgi_port-cgi_offset,timeout=5)
+                connection.request("GET",'cgi-bin/suspend_cgi.py?port='+str(cgi_port))
                 response = connection.getresponse()
                 machinelist.append(machine)
             except:
@@ -133,7 +141,7 @@ def stopFUs():
                     machinePending=True
                     activeMachines.append(machine)
 
-            syslog.syslog("hltd: waiting for machines to respond:"+str(activeMachines))
+            syslog.syslog("hltd-"+str(instance)+": waiting for machines to respond:"+str(activeMachines))
             if machinePending:
                 usedTimeout+=2
                 time.sleep(2)
@@ -142,12 +150,12 @@ def stopFUs():
     except:
         #handle interrupt
         print "Interrupted!"
-        syslog.syslog("hltd: FU suspend was interrupted")
+        syslog.syslog("hltd-"+str(instance)+": FU suspend was interrupted")
         count=0
         if receiver!=None:
           while receiver.finished==False:
             count+=1
-            if count%100==0:syslog.syslog("hltd stop: trying to stop suspend receiver HTTP server thread (script interrupted)")
+            if count%100==0:syslog.syslog("hltd-"+str(instance)+": stop: trying to stop suspend receiver HTTP server thread (script interrupted)")
             try:
                 receiver.stop()
                 time.sleep(.1)
@@ -161,7 +169,7 @@ def stopFUs():
     if receiver!=None:
       while receiver.finished==False:
         count+=1
-        if count%100==0:syslog.syslog("hltd stop: trying to stop suspend receiver HTTP server thread")
+        if count%100==0:syslog.syslog("hltd-"+str(instance)+": stop: trying to stop suspend receiver HTTP server thread")
         try:
             receiver.stop()
             time.sleep(.1)
@@ -172,10 +180,10 @@ def stopFUs():
 
     print "Finished FU suspend for:",str(machinelist)
     print "Not successful:",str(activeMachines)
-    syslog.syslog("hltd: unmount script completed. remaining machines :"+str(activeMachines))
+    syslog.syslog("hltd-"+str(instance)+": unmount script completed. remaining machines :"+str(activeMachines))
     if usedTimeout==maxTimeout:
         print "FU suspend failed for hosts:",activeMachines
-        syslog.syslog("hltd: FU suspend failed for hosts"+str(activeMachines))
+        syslog.syslog("hltd-"+str(instance)+": FU suspend failed for hosts"+str(activeMachines))
         return False
 
     return True
diff --git a/python/daemon2.py b/python/daemon2.py
index 97e51f6..a5c78c6 100644
--- a/python/daemon2.py
+++ b/python/daemon2.py
@@ -17,12 +17,25 @@ class Daemon2:
     attn: May change in the near future to use PEP daemon
     """
 
-    def __init__(self, pidfile, processname, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
+    def __init__(self, processname, instance, confname=None, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
                       self.stdin = stdin
                       self.stdout = stdout
                       self.stderr = stderr
-                      self.pidfile = pidfile
                       self.processname = processname
+                      self.instance = instance
+                      if confname==None:confname=processname
+                      if instance=="main": 
+                          instsuffix=""
+                          self.instancemsg=""
+                      else:
+                          instsuffix="-"+instance
+                          self.instancemsg=" instance"+instance
+
+                      self.pidfile = "/var/run/" + processname + instsuffix + ".pid"
+                      self.conffile = "/etc/" + confname + instsuffix + ".conf"
+                      self.lockfile = '/var/lock/subsys/'+processname + instsuffix
+
+
 
     def daemonize(self):
 
@@ -35,7 +48,7 @@ def daemonize(self):
             pid = os.fork()
             if pid > 0:
                 # exit first parent
-                sys.exit(0)
+                return -1
         except OSError, e:
             sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
             sys.exit(1)
@@ -71,14 +84,21 @@ def daemonize(self):
         atexit.register(self.delpid)
         pid = str(os.getpid())
         file(self.pidfile,'w+').write("%s\n" % pid)
+        return 0
 
     def delpid(self):
-        os.remove(self.pidfile)
+        if os.path.exists(self.pidfile):
+            os.remove(self.pidfile)
     def start(self):
         """
         Start the daemon
         """
+        if not os.path.exists(self.conffile): 
+            print "Missing "+self.conffile+" - can not start instance"
+            #raise Exception("Missing "+self.conffile)
+            sys.exit(4)
         # Check for a pidfile to see if the daemon already runs
+
         try:
             pf = file(self.pidfile,'r')
             pid = int(pf.read().strip())
@@ -89,10 +109,13 @@ def start(self):
         if pid:
             message = "pidfile %s already exists. Daemon already running?\n"
             sys.stderr.write(message % self.pidfile)
-            sys.exit(1)
+            sys.exit(3)
         # Start the daemon
-        self.daemonize()
-        self.run()
+        ret = self.daemonize()
+        if ret == 0:
+           self.run()
+           ret = 0
+        return ret
 
     def status(self):
         """
@@ -107,16 +130,22 @@ def status(self):
         except IOError:
             pid = None
         if not pid:
-            message = self.processname+" not running, no pidfile %s\n"
+            message = self.processname + self.instancemsg +" not running, no pidfile %s\n"
         else:
             try:
                 os.kill(pid,0)
-                message = self.processname+" is running with pidfile %s\n"
+                message = self.processname + self.instancemsg + " is running with pidfile %s\n"
                 retval = True
+            except OSError as ex:
+                if ex.errno==1:
+                    message = self.processname + self.instancemsg + " is running with pidfile %s\n"
+                else:
+                    message = self.processname + self.instancemsg + " pid exist in %s but process is not running\n"
             except:
-                message = self.processname+" pid exist in %s but process is not running\n"
+                message = self.processname + self.instancemsg + " pid exist in %s but process is not running\n"
+                #should return true for puppet to detect service crash (also when stopped)
 
-        sys.stderr.write(message % self.pidfile)
+        sys.stdout.write(message % self.pidfile)
         return retval
 
     def silentStatus(self):
@@ -132,7 +161,7 @@ def silentStatus(self):
         except IOError:
             pid = None
         if not pid:
-            message = self.processname+" not running, no pidfile %s\n"
+            message = self.processname + self.instancemsg +" not running, no pidfile %s\n"
         else:
             try:
                 os.kill(pid,0)
@@ -155,12 +184,18 @@ def stop(self):
             pid = None
 
         if not pid:
-            message = "pidfile %s does not exist. Daemon not running?\n"
-            sys.stderr.write(message % self.pidfile)
+            message = " not running, no pidfile %s\n"
+            sys.stdout.write(message % self.pidfile)
+            sys.stdout.flush()
             return # not an error in a restart
 
         # Try killing the daemon process
+        processPresent=False
         try:
+            #check is process is alive
+            os.kill(pid,0)
+            processPresent=True
+            sys.stdout.flush()
             # signal the daemon to stop
             timeout = 5.0 #kill timeout
             os.kill(pid, SIGINT)
@@ -183,25 +218,37 @@ def stop(self):
                 time.sleep(0.5)
                 timeout-=0.5
         except OSError, err:
+            time.sleep(.1)
             err = str(err)
             if err.find("No such process") > 0:
                 #this handles the successful stopping of the daemon...
                 if os.path.exists(self.pidfile):
-                    print 'removing pidfile'
-                    os.remove(self.pidfile)
-                    sys.stdout.write('[OK]\n')
-                    sys.stdout.flush()
+                    if processPresent==False:
+                        sys.stdout.write(" process "+str(pid)+" is dead. Removing pidfile" + self.pidfile+ " pid:" + str(pid))
+                    try:
+                        os.remove(self.pidfile)
+                    except Exception as ex:
+                        sys.stdout.write(' [  \033[1;31mFAILED\033[0;39m  ]\n')
+                        sys.stderr.write(str(ex)+'\n')
+                        sys.exit(1)
+                elif not os.path.exists(self.pidfile):
+                    if processPresent==False:
+                        sys.stdout.write(' service is not running')
             else:
-                print str(err)
+                sys.stdout.write(' [  \033[1;31mFAILED\033[0;39m  ]\n')
+                sys.stderr.write(str(err)+'\n')
                 sys.exit(1)
-        sys.stdout.write('[OK]\n')
+
+        if (self.processname!="hltd"):sys.stdout.write("\t\t")
+        sys.stdout.write('\t\t\t [  \033[1;32mOK\033[0;39m  ]\n')
+        sys.stdout.flush()
 
     def restart(self):
         """
         Restart the daemon
         """
         self.stop()
-        self.start()
+        return self.start()
 
     def run(self):
         """
@@ -212,7 +259,7 @@ def run(self):
     def emergencyUmount(self):
 
         cfg = ConfigParser.SafeConfigParser()
-        cfg.read('/etc/hltd.conf')
+        cfg.read(self.conffile)
 
         bu_base_dir=None#/fff/BU0?
         ramdisk_subdirectory = 'ramdisk'
@@ -229,7 +276,7 @@ def emergencyUmount(self):
         process = subprocess.Popen(['mount'],stdout=subprocess.PIPE)
         out = process.communicate()[0]
         mounts = re.findall('/'+bu_base_dir+'[0-9]+',out)
-        if len(mounts)>1 and mounts[0]==mounts[1]: mounts=[mounts[0]]
+        mounts = sorted(list(set(mounts)))
         for point in mounts:
             sys.stdout.write("trying emergency umount of "+point+"\n")
             try:
@@ -237,7 +284,8 @@ def emergencyUmount(self):
             except subprocess.CalledProcessError, err1:
                 pass
             except Exception as ex:
-                sys.stdout.write(ex.args[0]+"\n")
+                #ok(legacy mountpoint)
+                pass
             try:
                 subprocess.check_call(['umount',os.path.join('/'+point,ramdisk_subdirectory)])
             except subprocess.CalledProcessError, err1:
@@ -252,4 +300,20 @@ def emergencyUmount(self):
                 sys.stdout.write(str(err1.returncode)+"\n")
             except Exception as ex:
                 sys.stdout.write(ex.args[0]+"\n")
+
+
+    def touchLockFile(self):
+        try:
+            with open(self.lockfile,"w+") as fi:
+                pass
+        except:
+            pass
+
+    def removeLockFile(self):
+        try:
+            os.unlink(self.lockfile)
+        except:
+            pass
+
+
  
diff --git a/python/elastic.py b/python/elastic.py
index 28fccdd..f5c6048 100755
--- a/python/elastic.py
+++ b/python/elastic.py
@@ -25,7 +25,6 @@ def __init__(self, esDir, inMonDir):
         self.inputMonDir = inMonDir
         self.movedModuleLegend = False
         self.movedPathLegend = False
-        self.processedHLTRatesLegend = False
 
     def start(self):
         self.run()
@@ -44,7 +43,11 @@ def run(self):
                     self.emptyQueue.clear()
                     self.process() 
                 except (KeyboardInterrupt,Queue.Empty) as e:
-                    self.emptyQueue.set() 
+                    self.emptyQueue.set()
+                except Exception as ex:
+                    self.logger.exception(ex)
+                    self.logger.fatal("Exiting on unhandled exception")
+                    os._exit(1)
             else:
                 time.sleep(0.5)
 
@@ -60,8 +63,8 @@ def process(self):
         infile = self.infile
         filetype = infile.filetype
         eventtype = self.eventtype    
-        if eventtype & inotify.IN_CLOSE_WRITE:
-            if filetype in [FAST,SLOW]:
+        if eventtype & (inotify.IN_CLOSE_WRITE | inotify.IN_MOVED_TO) :
+            if filetype in [FAST,SLOW,QSTATUS]:
                 self.elasticize()
             elif self.esDirName in infile.dir:
                 if filetype in [INDEX,STREAM,OUTPUT,STREAMDQMHISTOUTPUT]:self.elasticize()
@@ -85,13 +88,6 @@ def process(self):
                     logger.error(ex)
                     pass
                 self.movedPathLegend = True
-            elif filetype == HLTRATES:
-                self.logger.debug('received json HLT rates')
-                self.elasticize()
-            elif filetype == HLTRATESLEGEND and self.processedHLTRatesLegend==False:
-                self.logger.debug('received json HLT legend rates')
-                self.elasticize()
- 
 
 
 
@@ -106,47 +102,46 @@ def elasticize(self):
             elif filetype == SLOW: 
                 es.elasticize_prc_sstate(infile)      
                 self.logger.debug(name+" going into prc-sstate")
-                self.infile.deleteFile()  
+                self.infile.deleteFile(silent=True)  
             elif filetype == INDEX: 
                 self.logger.info(name+" going into prc-in")
                 es.elasticize_prc_in(infile)
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
             elif filetype == STREAM:
                 self.logger.info(name+" going into prc-out")
                 es.elasticize_prc_out(infile)
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
             elif filetype in [OUTPUT,STREAMDQMHISTOUTPUT]:
                 self.logger.info(name+" going into fu-out")
                 es.elasticize_fu_out(infile)
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
+            elif filetype == QSTATUS:
+                self.logger.debug(name+" going into qstatus")
+                es.elasticize_queue_status(infile)
             elif filetype == COMPLETE:
                 self.logger.info(name+" going into fu-complete")
                 dt=os.path.getctime(infile.filepath)
                 completed = datetime.datetime.utcfromtimestamp(dt).isoformat()
                 es.elasticize_fu_complete(completed)
-                self.infile.deleteFile()
+                self.infile.deleteFile(silent=True)
                 self.stop()
-            elif filetype == HLTRATESLEGEND:
-                if self.processedHLTRatesLegend==False:
-                    es.elasticize_hltrateslegend(infile)
-                    self.processedHLTRatesLegend=True
-                self.infile.deleteFile()
-            elif filetype == HLTRATES:
-                self.logger.info(name+" going into hlt-rates")
-                es.elasticize_hltrates(infile)
-                self.infile.deleteFile()
  
 
     def elasticizeLS(self):
         ls = self.infile.ls
         es.flushLS(ls)
-        self.infile.deleteFile()
+        self.infile.deleteFile(silent=True)
 
 
 
 if __name__ == "__main__":
+
+    import procname
+    procname.setprocname('elastic')
+
+    conf=initConf()
     logging.basicConfig(filename=os.path.join(conf.log_dir,"elastic.log"),
-                    level=logging.INFO,
+                    level=conf.service_log_level,
                     format='%(levelname)s:%(asctime)s - %(funcName)s - %(message)s',
                     datefmt='%Y-%m-%d %H:%M:%S')
     logger = logging.getLogger(os.path.basename(__file__))
@@ -165,17 +160,14 @@ def elasticizeLS(self):
     expected_processes = int(sys.argv[3])
     indexSuffix = conf.elastic_cluster
     update_modulo=conf.fastmon_insert_modulo
-    dirname = os.path.basename(os.path.normpath(dirname))
-    watchDir = os.path.join(conf.watch_directory,dirname)#???
-    outputDir = conf.micromerge_output
-    monDir = os.path.join(watchDir,"mon")
-    tempDir = os.path.join(watchDir,ES_DIR_NAME)
+    rundirname = os.path.basename(os.path.normpath(dirname))
+    monDir = os.path.join(dirname,"mon")
+    tempDir = os.path.join(dirname,ES_DIR_NAME)
 
-    mask = inotify.IN_CLOSE_WRITE | inotify.IN_MOVED_TO
-    monMask = inotify.IN_CLOSE_WRITE
-    tempMask = inotify.IN_CLOSE_WRITE
+    monMask = inotify.IN_CLOSE_WRITE | inotify.IN_MOVED_TO
+    tempMask = inotify.IN_CLOSE_WRITE | inotify.IN_MOVED_TO
 
-    logger.info("starting elastic for "+dirname)
+    logger.info("starting elastic for "+rundirname[:3]+' '+rundirname[3:])
 
     try:
         os.makedirs(monDir)
@@ -191,12 +183,11 @@ def elasticizeLS(self):
         #starting inotify thread
         mr = MonitorRanger()
         mr.setEventQueue(eventQueue)
-        #mr.register_inotify_path(watchDir,mask)
         mr.register_inotify_path(monDir,monMask)
         mr.register_inotify_path(tempDir,tempMask)
         mr.start_inotify()
 
-        es = elasticBand.elasticBand('http://'+conf.es_local+':9200',dirname,indexSuffix,expected_processes,update_modulo)
+        es = elasticBand.elasticBand('http://'+conf.es_local+':9200',rundirname,indexSuffix,expected_processes,update_modulo)
 
         #starting elasticCollector thread
         ec = elasticCollector(ES_DIR_NAME,inmondir)
diff --git a/python/elasticBand.py b/python/elasticBand.py
index 20c5093..978feed 100644
--- a/python/elasticBand.py
+++ b/python/elasticBand.py
@@ -5,15 +5,13 @@
 from pyelasticsearch.client import ElasticHttpError
 from pyelasticsearch.client import ConnectionError
 from pyelasticsearch.client import Timeout
-import json
+import simplejson as json
 import csv
 import math
 import logging
 
 from aUtils import *
 
-#MONBUFFERSIZE = 50
-es_server_url = 'http://localhost:9200'
 
 class elasticBand():
 
@@ -24,7 +22,7 @@ def __init__(self,es_server_url,runstring,indexSuffix,monBufferSize,fastUpdateMo
         self.prcinBuffer = {}
         self.prcoutBuffer = {}
         self.fuoutBuffer = {}
-        self.es = ElasticSearch(es_server_url,timeout=20) 
+        self.es = ElasticSearch(es_server_url,timeout=20,revival_delay=60) 
         self.hostname = os.uname()[1]
         self.hostip = socket.gethostbyname_ex(self.hostname)[2][0]
         #self.number_of_data_nodes = self.es.health()['number_of_data_nodes']
@@ -36,12 +34,13 @@ def __init__(self,es_server_url,runstring,indexSuffix,monBufferSize,fastUpdateMo
         aliasName = runstring + "_" + indexSuffix
         self.indexName = aliasName# + "_" + self.hostname 
  
-    def imbue_jsn(self,infile):
+    def imbue_jsn(self,infile,silent=False):
         with open(infile.filepath,'r') as fp:
             try:
                 document = json.load(fp)
             except json.scanner.JSONDecodeError,ex:
-                logger.exception(ex)
+                if silent==False:
+                    self.logger.exception(ex)
                 return None,-1
             return document,0
 
@@ -155,58 +154,24 @@ def elasticize_prc_in(self,infile):
         document['data']=datadict
         document['ls']=int(ls[2:])
         document['index']=int(index[5:])
-        document['dest']=os.uname()[1]
+        document['dest']=self.hostname
         document['process']=int(prc[3:])
         try:document.pop('definition')
 	except:pass
         self.prcinBuffer.setdefault(ls,[]).append(document)
         #self.es.index(self.indexName,'prc-in',document)
 
-
-    def elasticize_hltrateslegend(self,infile):
-        document,ret = self.imbue_jsn(infile)
+    def elasticize_queue_status(self,infile):
+        document,ret = self.imbue_jsn(infile,silent=True)
         if ret<0:return False
-        datadict={}
-        #datadict['pid'] = int(infile.pid[3:])
-        try:
-            paths=document['data'][0].strip('[]')
-            datasets=document['data'][1].strip('[]')
-            datadict['dataset-names']=datasets.split(',') if  len(datasets)>0 else []
-            datadict['path-names']=paths.split(',') if len(paths)>0 else []
-        except:
-            pass
-        self.tryIndex('hltrates-legend',datadict)
+        document['fm_date']=str(infile.mtime)
+        document['host']=self.hostname
+        self.tryIndex('qstatus',document)
         return True
- 
 
-    def elasticize_hltrates(self,infile):
-        document,ret = self.imbue_jsn(infile)
-        if ret<0:return False
-        datadict={}
-        try:
-            datadict['ls'] = int(infile.ls[2:])
-            datadict['pid'] = int(infile.pid[3:])
-	    try:
-	        if json.loads(document['data'][0])[0]==0:return True
-            except:
-		pass
-            datadict['processed']=json.loads(document['data'][0])[0]
-            datadict['path-wasrun']=json.loads(document['data'][1])
-            datadict['path-afterl1seed']=json.loads(document['data'][2])
-            datadict['path-afterprescale']=json.loads(document['data'][3])
-            datadict['path-accepted']=json.loads(document['data'][4])
-            datadict['path-rejected']=json.loads(document['data'][5])
-            datadict['path-errors']=json.loads(document['data'][6])
-            datadict['dataset-accepted']=json.loads(document['data'][7])
-        except:
-            return False
-        self.tryIndex('hltrates',datadict)
-        return True
- 
- 
     def elasticize_fu_complete(self,timestamp):
         document = {}
-        document['host']=os.uname()[1]
+        document['host']=self.hostname
         document['fm_date']=timestamp
         self.tryIndex('fu-complete',document)
  
@@ -264,7 +229,7 @@ def tryBulkIndex(self,docname,documents,attempts=1):
                 if attempts==0:
                     self.indexFailures+=1
                     if self.indexFailures<2:
-                        self.logger.error("Elasticsearch connection error.")
+                        self.logger.warning("Elasticsearch connection error.")
                 time.sleep(5)
             except ElasticHttpError as ex:
                 if attempts==0:
diff --git a/python/elasticbu.py b/python/elasticbu.py
index 8565615..2fad372 100755
--- a/python/elasticbu.py
+++ b/python/elasticbu.py
@@ -23,10 +23,12 @@
 
 import requests
 import simplejson as json
-
 import socket
 
-def getURLwithIP(url):
+#silence HTTP connection info from requests package
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+def getURLwithIP(url,nsslock=None):
   try:
       prefix = ''
       if url.startswith('http://'):
@@ -41,7 +43,17 @@ def getURLwithIP(url):
       logging.error('could not parse URL ' +url)
       raise(ex)
   if url!='localhost':
-      ip = socket.gethostbyname(url)
+      if nsslock is not None:
+          try:
+              nsslock.acquire()
+              ip = socket.gethostbyname(url)
+              nsslock.release()
+          except Exception as ex:
+              try:nsslock.release()
+              except:pass
+              raise ex
+      else:
+          ip = socket.gethostbyname(url)
   else: ip='127.0.0.1'
 
   return prefix+str(ip)+suffix
@@ -49,8 +61,9 @@ def getURLwithIP(url):
 
 class elasticBandBU:
 
-    def __init__(self,runnumber,startTime,runMode=True):
+    def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None):
         self.logger = logging.getLogger(self.__class__.__name__)
+        self.conf=conf
         self.es_server_url=conf.elastic_runindex_url
         self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write"
         self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read"
@@ -66,8 +79,14 @@ def __init__(self,runnumber,startTime,runMode=True):
         self.runMode=runMode
         self.boxinfoFUMap = {}
         self.ip_url=None
+        self.nsslock=nsslock
         self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings,mappings.central_runindex_mapping)
         self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings,mappings.central_boxinfo_mapping)
+        self.black_list=None
+        if self.conf.instance=='main':
+            self.hostinst = self.host
+        else:
+            self.hostinst = self.host+'_'+self.conf.instance
 
         #write run number document
         if runMode == True and self.stopping==False:
@@ -89,14 +108,19 @@ def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
             connectionAttempts+=1
             try:
                 if retry or self.ip_url==None:
-                    self.ip_url=getURLwithIP(self.es_server_url)
-                    self.es = ElasticSearch(self.es_server_url)
+                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
+                    self.es = ElasticSearch(self.ip_url,timeout=20,revival_delay=60)
 
                 #check if runindex alias exists
-                self.logger.info('writing to elastic index '+alias_write)
                 if requests.get(self.es_server_url+'/_alias/'+alias_write).status_code == 200: 
+                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                     self.createDocMappingsMaybe(alias_write,mapping)
-                break
+                    break
+                else:
+                    time.sleep(.5)
+                    if (connectionAttempts%10)==0:
+                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
+                    continue
             except ElasticHttpError as ex:
                 #es error, retry
                 self.logger.error(ex)
@@ -110,7 +134,7 @@ def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
                 retry=True
                 continue
 
-            except (ConnectionError,Timeout) as ex:
+            except (socket.gaierror,ConnectionError,Timeout) as ex:
                 #try to reconnect with different IP from DNS load balancing
                 if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
@@ -128,12 +152,19 @@ def createDocMappingsMaybe(self,index_name,mapping):
             doc = {key:mapping[key]}
             res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping')
             #only update if mapping is empty
-            if res.status_code==200 and res.content.strip()=='{}':
-                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
-
-    def resetURL(url):
-        self.es = None
-        self.es = ElasticSearch(url)
+            if res.status_code==200:
+                if res.content.strip()=='{}':
+                    requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
+                else:
+                    #still check if number of properties is identical in each type
+                    inmapping = json.loads(res.content)
+                    for indexname in inmapping:
+                        properties = inmapping[indexname]['mappings'][key]['properties']
+                        #should be size 1
+                        for pdoc in mapping[key]['properties']:
+                            if pdoc not in properties:
+                                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
+                                break
 
     def read_line(self,fullpath):
         with open(fullpath,'r') as fp:
@@ -177,7 +208,31 @@ def elasticize_box(self,infile):
         basename = infile.basename
         self.logger.debug(basename)
         current_time = time.time()
-        if basename.startswith('fu'):
+
+        if infile.data=={}:return
+
+        bu_doc=False
+        if basename.startswith('bu') or basename.startswith('dvbu'):
+            bu_doc=True
+
+        #check box file against blacklist
+        if bu_doc or self.black_list==None:
+            self.black_list=[]
+
+            try:
+                with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi:
+                    try:
+                        self.black_list = json.load(fi)
+                    except ValueError:
+                        #file is being written or corrupted
+                        return
+            except:
+                #blacklist file is not present, do not filter
+                pass
+
+        if basename in self.black_list:return
+
+        if bu_doc==False:
             try:
                 self.boxinfoFUMap[basename] = [infile.data,current_time]
             except Exception as ex:
@@ -185,34 +240,56 @@ def elasticize_box(self,infile):
                 return
         try:
             document = infile.data
-            document['id']=basename
+            #unique id for separate instances
+            if bu_doc:
+                document['id']=self.hostinst
+            else:
+                document['id']=basename
+
+            #both here and in "boxinfo_appliance"
+            document['appliance']=self.host
+            document['instance']=self.conf.instance
+            #only here
+            document['host']=basename
+
             self.index_documents('boxinfo',[document])
         except Exception as ex:
             self.logger.warning('box info not injected: '+str(ex))
             return
-        if basename.startswith('bu') or basename.startswith('dvbu'):
+        if bu_doc:
             try:
                 document = infile.data
+                try:
+                    document.pop('id')
+                except:pass
+                try:
+                    document.pop('host')
+                except:pass
                 #aggregation from FUs
                 document['idles']=0
                 document['used']=0
                 document['broken']=0
                 document['quarantined']=0
+                document['cloud']=0
                 document['usedDataDir']=0
                 document['totalDataDir']=0
                 document['hosts']=[basename]
+                document['blacklistedHosts']=[]
                 for key in self.boxinfoFUMap:
-                        dpair = self.boxinfoFUMap[key]
-                        d = dpair[0]
-                        #check if entry is not older than 10 seconds
-                        if current_time - dpair[1] > 10:continue
-                        document['idles']+=int(d['idles'])
-                        document['used']+=int(d['used'])
-                        document['broken']+=int(d['broken'])
-                        document['quarantined']+=int(d['quarantined'])
-                        document['usedDataDir']+=int(d['usedDataDir'])
-                        document['totalDataDir']+=int(d['totalDataDir'])
-                        document['hosts'].append(key)
+                    dpair = self.boxinfoFUMap[key]
+                    d = dpair[0]
+                    #check if entry is not older than 10 seconds
+                    if current_time - dpair[1] > 10:continue
+                    document['idles']+=int(d['idles'])
+                    document['used']+=int(d['used'])
+                    document['broken']+=int(d['broken'])
+                    document['quarantined']+=int(d['quarantined'])
+                    document['cloud']+=int(d['cloud'])
+                    document['usedDataDir']+=int(d['usedDataDir'])
+                    document['totalDataDir']+=int(d['totalDataDir'])
+                    document['hosts'].append(key)
+                for blacklistedHost in self.black_list:
+                    document['blacklistedHosts'].append(blacklistedHost)
                 self.index_documents('boxinfo_appliance',[document],bulk=False)
             except Exception as ex:
                 #in case of malformed box info
@@ -238,8 +315,10 @@ def elasticize_eols(self,infile):
     def index_documents(self,name,documents,bulk=True):
         attempts=0
         destination_index = ""
+        is_box=False
         if name.startswith("boxinfo"):
           destination_index = self.boxinfo_write
+          is_box=True
         else:
           destination_index = self.runindex_write
         while True:
@@ -253,16 +332,18 @@ def index_documents(self,name,documents,bulk=True):
             except ElasticHttpError as ex:
                 if attempts<=1:continue
                 self.logger.error('elasticsearch HTTP error. skipping document '+name)
+                if is_box==True:break
                 #self.logger.exception(ex)
                 return False
-            except (ConnectionError,Timeout) as ex:
+            except (socket.gaierror,ConnectionError,Timeout) as ex:
                 if attempts>100 and self.runMode:
                     raise(ex)
                 self.logger.error('elasticsearch connection error. retry.')
+                if is_box==True:break
                 if self.stopping:return False
                 time.sleep(0.1)
-                ip_url=getURLwithIP(self.es_server_url)
-                self.es = ElasticSearch(ip_url)
+                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
+                self.es = ElasticSearch(ip_url,timeout=20,revival_delay=60)
         return False
              
 
@@ -290,7 +371,7 @@ def stop(self):
         self.stoprequest.set()
 
     def run(self):
-	self.logger.info("Start main loop")
+	self.logger.info("elasticCollectorBU: start main loop (monitoring:"+self.inRunDir+")")
 	count = 0
 	while not (self.stoprequest.isSet() and self.emptyQueue.isSet()) :
 	    if self.source:
@@ -300,16 +381,16 @@ def run(self):
 		    self.infile = fileHandler(event.fullpath)
 		    self.emptyQueue.clear()
 		    if self.infile.filetype==EOR:
-			if self.es:
-			    try:
-			        dt=os.path.getctime(event.fullpath)
-			        endtime = datetime.datetime.utcfromtimestamp(dt).isoformat()
-			        self.es.elasticize_runend_time(endtime)
-			    except Exception as ex:
-		                self.logger.warning(str(ex))
-			        endtime = datetime.datetime.utcnow().isoformat()
-			        self.es.elasticize_runend_time(endtime)
-			break
+                        if self.es:
+                            try:
+                                dt=os.path.getctime(event.fullpath)
+                                endtime = datetime.datetime.utcfromtimestamp(dt).isoformat()
+                                self.es.elasticize_runend_time(endtime)
+                            except Exception as ex:
+                                self.logger.warning(str(ex))
+                                endtime = datetime.datetime.utcnow().isoformat()
+                                self.es.elasticize_runend_time(endtime)
+                        break
                     self.process()
                 except (KeyboardInterrupt,Queue.Empty) as e:
                     self.emptyQueue.set()
@@ -325,9 +406,9 @@ def run(self):
                 #if run dir deleted
                 if os.path.exists(self.inRunDir)==False:
                     self.logger.info("Exiting because run directory in has disappeared")
-		    #nevertheless put run end time
                     if self.es:
-			endtime = datetime.datetime.utcnow().isoformat()
+                        #write end timestamp in case EoR file was not seen
+                        endtime = datetime.datetime.utcnow().isoformat()
                         self.es.elasticize_runend_time(endtime)
                     break
 	self.logger.info("Stop main loop (watching directory " + str(self.inRunDir) + ")")
@@ -374,7 +455,7 @@ def stop(self):
         self.stoprequest.set()
 
     def run(self):
-        self.logger.info("Start main loop")
+	self.logger.info("elasticBoxCollectorBU: start main loop")
         while not (self.stoprequest.isSet() and self.emptyQueue.isSet()) :
             if self.source:
                 try:
@@ -391,7 +472,7 @@ def run(self):
                     self.logger.warning("IOError on reading "+event.fullpath)
             else:
                 time.sleep(1.0)
-        self.logger.info("Stop main loop")
+        self.logger.info("elasticBoxCollectorBU: stop main loop")
 
     def setSource(self,source):
         self.source = source
@@ -408,9 +489,12 @@ def process(self):
 
 class BoxInfoUpdater(threading.Thread):
 
-    def __init__(self,ramdisk):
+    def __init__(self,ramdisk,conf,nsslock):
         self.logger = logging.getLogger(self.__class__.__name__)
         self.stopping = False
+        self.es=None
+        self.conf=conf
+        self.nsslock=nsslock
 
         try:
             threading.Thread.__init__(self)
@@ -435,7 +519,7 @@ def __init__(self,ramdisk):
 
     def run(self):
         try:
-            self.es = elasticBandBU(0,'',False)
+            self.es = elasticBandBU(self.conf,0,'',False,self.nsslock)
             if self.stopping:return
 
             self.ec = elasticBoxCollectorBU(self.es)
@@ -450,7 +534,7 @@ def stop(self):
         try:
             self.stopping=True
             self.threadEvent.set()
-            if self.es:
+            if self.es is not None:
                 self.es.stopping=True
                 self.es.threadEvent.set()
             if self.mr is not None:
@@ -465,16 +549,19 @@ def stop(self):
 
 class RunCompletedChecker(threading.Thread):
 
-    def __init__(self,mode,nr,nresources,run_dir,active_runs,elastic_process):
+    def __init__(self,conf,mode,nr,nresources,run_dir,active_runs,active_runs_errors,elastic_process):
         self.logger = logging.getLogger(self.__class__.__name__)
+        self.conf=conf
         self.mode = mode
         self.nr = nr
         self.nresources = nresources
-        self.rundirCheckPath = conf.watch_directory +'/run'+ str(nr).zfill(conf.run_number_padding)
+        rundir = 'run'+ str(nr).zfill(conf.run_number_padding)
+        self.rundirCheckPath = os.path.join(conf.watch_directory, rundir)
         self.eorCheckPath = os.path.join(self.rundirCheckPath,'run' +  str(nr).zfill(conf.run_number_padding) + '_ls0000_EoR.jsn')
-        self.url = 'http://localhost:9200/run'+str(nr).zfill(conf.run_number_padding)+'*/fu-complete/_count'
-        self.urlclose = 'http://localhost:9200/run'+str(nr).zfill(conf.run_number_padding)+'*/_close'
-        self.urlsearch = 'http://localhost:9200/run'+str(nr).zfill(conf.run_number_padding)+'*/fu-complete/_search?size=1'
+        self.indexPrefix = 'run'+str(nr).zfill(conf.run_number_padding) + '_' + conf.elastic_cluster
+        self.url =       'http://'+conf.es_local+':9200/' + self.indexPrefix + '*/fu-complete/_count'
+        self.urlclose =  'http://'+conf.es_local+':9200/' + self.indexPrefix + '*/_close'
+        self.urlsearch = 'http://'+conf.es_local+':9200/' + self.indexPrefix + '*/fu-complete/_search?size=1'
         self.url_query = '{  "query": { "filtered": {"query": {"match_all": {}}}}, "sort": { "fm_date": { "order": "desc" }}}'
 
 
@@ -482,6 +569,7 @@ def __init__(self,mode,nr,nresources,run_dir,active_runs,elastic_process):
         self.threadEvent = threading.Event()
         self.run_dir = run_dir
         self.active_runs = active_runs
+        self.active_runs_errors = active_runs_errors
         self.elastic_process=elastic_process
         try:
             threading.Thread.__init__(self)
@@ -492,7 +580,6 @@ def __init__(self,mode,nr,nresources,run_dir,active_runs,elastic_process):
 
     def checkBoxes(self,dir):
 
-
         files = os.listdir(dir)
         endAllowed=True
         runFound=False
@@ -553,7 +640,7 @@ def run(self):
             if os.path.exists(self.eorCheckPath) or os.path.exists(self.rundirCheckPath)==False:
                 break
 
-        dir = conf.resource_base+'/boxes/'
+        dir = self.conf.resource_base+'/boxes/'
         check_boxes=True
         check_es_complete=True
         total_es_elapsed=0
@@ -563,9 +650,14 @@ def run(self):
                 check_boxes = self.checkBoxes(dir)
 
             if check_boxes==False:
+                try:
+                    self.active_runs_errors.pop(self.active_runs.index(int(self.nr)))
+                except:
+                    pass
                 try:
                     self.active_runs.remove(int(self.nr))
-                except:pass
+                except:
+                    pass
 
             if check_es_complete:
                 try:
@@ -578,29 +670,21 @@ def run(self):
                             fm_time = str(dataq['hits']['hits'][0]['_source']['fm_date'])
                             #fill in central index completition time
                             postq = "{runNumber\":\"" + str(self.nr) + "\",\"completedTime\" : \"" + fm_time + "\"}"
-                            requests.post(conf.elastic_runindex_url+'/'+"runindex_"+conf.elastic_runindex_name+'_write/run',postq,timeout=5)
-                            self.logger.info("filled in completition time for run"+str(self.nr))
+                            requests.post(self.conf.elastic_runindex_url+'/'+"runindex_"+self.conf.elastic_runindex_name+'_write/run',postq,timeout=5)
+                            self.logger.info("filled in completition time for run "+str(self.nr))
                         except IndexError:
                             # 0 FU resources present in this run, skip writing completition time
                             pass 
                         except Exception as ex:
                             self.logger.exception(ex)
-                        try:
-                            if conf.close_es_index==True:
-                                #wait a bit for central ES queries to complete
-                                time.sleep(10)
-                                resp = requests.post(self.urlclose,timeout=5)
-                                self.logger.info('closed appliance ES index for run '+str(self.nr))
-                        except Exception as exc:
-                            self.logger.error('Error in run completition check')
-                            self.logger.exception(exc)
                         check_es_complete=False
                         continue
                     else:
+                        #TODO:do this only using active runs
                         time.sleep(5)
                         total_es_elapsed+=5
                         if total_es_elapsed>600:
-                            self.logger.error('run index complete flag was not written by all FUs, giving up after 10 minutes.')
+                            self.logger.warning('run index complete flag was not written by all FUs, giving up checks after 10 minutes.')
                             check_es_complete=False
                             continue
                 except Exception,ex:
@@ -609,7 +693,17 @@ def run(self):
                     check_es_complete=False
 
             #exit if both checks are complete
-            if check_boxes==False and check_es_complete==False:break
+            if check_boxes==False and check_es_complete==False:
+                try:
+                    if self.conf.close_es_index==True:
+                        #wait a bit for queries to complete
+                        time.sleep(10)
+                        resp = requests.post(self.urlclose,timeout=5)
+                        self.logger.info('closed appliance ES index for run '+str(self.nr))
+                except Exception as exc:
+                    self.logger.error('Error in closing run index')
+                    self.logger.exception(exc)
+                break
             #check every 10 seconds
             self.threadEvent.wait(10)
 
@@ -622,10 +716,15 @@ def stop(self):
         self.threadEvent.set() 
 
 
-
 if __name__ == "__main__":
+
+    import procname
+    procname.setprocname('elasticbu')
+
+    conf=initConf(sys.argv[1])
+
     logging.basicConfig(filename=os.path.join(conf.log_dir,"elasticbu.log"),
-                    level=logging.INFO,
+                    level=conf.service_log_level,
                     format='%(levelname)s:%(asctime)s - %(funcName)s - %(message)s',
                     datefmt='%Y-%m-%d %H:%M:%S')
     logger = logging.getLogger(os.path.basename(__file__))
@@ -636,9 +735,8 @@ def stop(self):
 
     eventQueue = Queue.Queue()
 
-    runnumber = sys.argv[1]
+    runnumber = sys.argv[2]
     watchdir = conf.watch_directory
-    
     mainDir = os.path.join(watchdir,'run'+ runnumber.zfill(conf.run_number_padding))
     dt=os.path.getctime(mainDir)
     startTime = datetime.datetime.utcfromtimestamp(dt).isoformat()
@@ -668,7 +766,7 @@ def stop(self):
 
         mr.start_inotify()
 
-        es = elasticBandBU(runnumber,startTime)
+        es = elasticBandBU(conf,runnumber,startTime)
 
         #starting elasticCollector thread
         ec = elasticCollectorBU(es,mainDir)
diff --git a/python/fillresources.py b/python/fillresources.py
index 902c548..cc3c7d1 100755
--- a/python/fillresources.py
+++ b/python/fillresources.py
@@ -3,6 +3,18 @@
 import os
 import shutil
 import hltdconf
+import time
+
+def clearDir(dir):
+  try:
+    files = os.listdir(dir)
+    for file in files:
+      try:
+        os.unlink(os.path.join(dir,file))
+      except:
+        pass
+  except:
+    pass
 
 conf=hltdconf.hltdConf('/etc/hltd.conf')
 
@@ -13,26 +25,14 @@
     elif 'fu' in os.uname()[1]: role='fu'
 else: role = conf.role
 
-if role=='fu' and conf.dqm_machine=="False":
-
-    try:
-        shutil.rmtree('/etc/appliance/online/*')
-    except:
-        pass
-    try:
-        shutil.rmtree('/etc/appliance/offline/*')
-    except:
-        pass
-    try:
-        shutil.rmtree('/etc/appliance/except/*')
-    except:
-        pass
-    try:
-        shutil.rmtree('/etc/appliance/quarantined/*')
-    except:
-        pass
-
+if role=='fu' and not conf.dqm_machine:
 
+    clearDir(conf.resource_base+'/idle')
+    clearDir(conf.resource_base+'/online')
+    clearDir(conf.resource_base+'/except')
+    clearDir(conf.resource_base+'/quarantined')
+    clearDir(conf.resource_base+'/cloud')
+ 
     fp=open('/proc/cpuinfo','r')
     resource_count = 0
     for line in fp:
diff --git a/python/genTestFakeBu_cfg.py b/python/genTestFakeBu_cfg.py
index 39424da..f1963f5 100644
--- a/python/genTestFakeBu_cfg.py
+++ b/python/genTestFakeBu_cfg.py
@@ -50,7 +50,7 @@
 
 process.source = cms.Source("EmptySource",
      firstRun= cms.untracked.uint32(options.runNumber),
-     numberEventsInLuminosityBlock = cms.untracked.uint32(500),
+     numberEventsInLuminosityBlock = cms.untracked.uint32(200),
      numberEventsInRun       = cms.untracked.uint32(0)    
 )
 
@@ -79,7 +79,7 @@
 
 process.out = cms.OutputModule("RawStreamFileWriterForBU",
                                ProductLabel = cms.untracked.string("s"),
-                               numEventsPerFile = cms.untracked.uint32(100),
+                               numEventsPerFile = cms.untracked.uint32(50),
    			       jsonDefLocation = cms.untracked.string(cmsswbase+"/src/EventFilter/Utilities/plugins/budef.jsd"),
 			       debug = cms.untracked.bool(True)
                                )
diff --git a/python/hltd b/python/hltd
index edaedd4..6b125e5 100755
--- a/python/hltd
+++ b/python/hltd
@@ -13,35 +13,47 @@ from applianceumount import checkMode
 import time
 import syslog
 
-def touchLockFile():
-    try:
-        with open('/var/lock/subsys/hltd',"w+") as fi:
-            pass
-    except:
-        pass
-
-def removeLockFile():
-    try:
-        os.unlink('/var/lock/subsys/hltd')
-    except:
-        pass
+
+def startService(daemon,srvInstance):
+   daemon.touchLockFile()
+   proc = Popen(["/opt/hltd/python/hltd.py",srvInstance], stdout=PIPE)
+   output = proc.communicate()[0]
+   time.sleep(.1)
+   if daemon.silentStatus() and proc.returncode==0:
+       print 'Starting hltd instance',srvInstance,':\t\t\t\t [  \033[1;32mOK\033[0;39m  ]'
+
+       daemon.touchLockFile()
+   else:
+       if proc.returncode==3:sys.exit(0)
+       print 'Starting hltd instance',srvInstance,':\t\t\t\t [  \033[1;31mFAILED\033[0;39m  ]'
+       print output
+       sys.exit(1)
 
 if __name__ == "__main__":
-    daemon = hltd('/var/run/hltd.pid')
-    if len(sys.argv) == 2:
+
+  if len(sys.argv) <=2 or sys.argv[2]=="all":
+      try:
+          instances=[]
+          with open('/etc/hltd.instances','r') as fi:
+            for line in fi.readlines():
+              lnstrip = line.strip(' \n')
+              if len(lnstrip)>0 and lnstrip.startswith("#")==False:
+                  instances.append(lnstrip)
+      except:
+          instances = ["main"]
+  else:
+      instances = [sys.argv[2]]
+
+  for instance in instances:
+    daemon = hltd(instance)
+
+    if len(sys.argv) >= 2:
         if 'start' == sys.argv[1]:
-            touchLockFile()
-            output = Popen(["/opt/hltd/python/hltd.py"], stdout=PIPE).communicate()[0]
-            if daemon.silentStatus():
-                print '[OK]'
-            else:
-                print '[Failed]'
-                print output
+            startService(daemon,instance)
+
         elif 'stop' == sys.argv[1]:
-            if daemon.status():
-                daemon.stop()
-            elif os.path.exists('/var/run/hltd.pid'):
-                daemon.delpid()
+            sys.stdout.write('Stopping hltd instance '+instance+':')
+            daemon.stop()
 
             #determine runlevel
             std_out=""
@@ -52,58 +64,55 @@ if __name__ == "__main__":
                 from_level = std_out.split('\t')[0].rstrip('\n').strip().split(' ')[0]
                 to_level = std_out.split('\t')[0].rstrip('\n').strip().split(' ')[1]
                 if to_level.isdigit() and int(to_level) in [0,1,6] and str(from_level)!="1":
- 
-                    if stopFUs()==False:
+
+                    if stopFUs(instance)==False:
                         msg = "Shutdown or reboot is cancelled by hltd - FU umount failed! Switching to runlevel 3..."
-                        syslog.syslog(msg)
+                        syslog.syslog("hltd-"+str(instance)+":"+msg)
                         time.sleep(2)
                         p = Popen("init 3", shell=True, stdout=PIPE)
                         p.wait()
                     else:
-                        removeLockFile()
+                        daemon.removeLockFile()
                 else:
-                    if checkMode()=="fu": 
-                        removeLockFile()
+                    if checkMode(instance)=="fu":
+                        daemon.removeLockFile()
                     else:
                         print "Lock file remains. Run stop-appliance to unmount FUs."
             except:
                 print "Runlevel:",std_out
-                syslog.syslog("Exception when determining runlevel:"+str(std_out))
-                
+                syslog.syslog("hltd-"+str(instance)+":Exception when determining runlevel:"+str(std_out))
+
 
         elif 'stop-appliance' == sys.argv[1]:
-            if daemon.status():
-                daemon.stop()
-            elif os.path.exists('/var/run/hltd.pid'):
-                daemon.delpid()
-
-            if checkMode()=="fu":
-                print "This command is not supported on FU."
-               
-            elif stopFUs()==False:
-                print "FU umount failed, lock file remains. FU umount failed."
+            sys.stdout.write('Stopping hltd instance '+instance+':')
+            daemon.stop()
+
+            if checkMode(instance)=="fu":
+                print "This command is not supported on FU. Performed only service stop."
+
+            elif stopFUs(instance)==False:
+                print "FU umount failed, lock file remains."
             else:
-                removeLockFile()
+                daemon.removeLockFile()
 
         elif 'stop-light' == sys.argv[1]:
-            if daemon.status():
-                daemon.stop()
-            elif os.path.exists('/var/run/hltd.pid'):
-                daemon.delpid()
-            removeLockFile()
- 
+            sys.stdout.write('Stopping hltd instance '+instance+':')
+            daemon.stop()
+            daemon.removeLockFile()
+
         elif 'restart' == sys.argv[1]:
-            daemon.restart()
-            touchLockFile()
+            sys.stdout.write('Stopping hltd instance '+instance+':')
+            daemon.stop()
+            startService(daemon,instance)
+
         elif 'status' == sys.argv[1]:
             daemon.status()
         else:
             print "Unknown command"
             sys.exit(2)
-#        print "hltd "+sys.argv[1]+"ed"
-#        logging.debug("executed "+sys.argv[1])
-        sys.exit(0)
 
     else:
-        print "usage: %s start|stop|stop-light|restart|status" % sys.argv[0]
+        print "usage: %s start|stop|stop-light|restart|status |all|main|instance" % sys.argv[0]
         sys.exit(2)
+
+sys.exit(0)
diff --git a/python/hltd.py b/python/hltd.py
index 3a1ea6f..5adbb70 100755
--- a/python/hltd.py
+++ b/python/hltd.py
@@ -9,10 +9,9 @@
 import subprocess
 from signal import SIGKILL
 from signal import SIGINT
-import json
+import simplejson as json
 #import SOAPpy
 import threading
-import fcntl
 import CGIHTTPServer
 import BaseHTTPServer
 import cgitb
@@ -21,6 +20,8 @@
 import re
 import shutil
 import socket
+#import fcntl
+#import random
 
 #modules distributed with hltd
 import prctl
@@ -34,26 +35,63 @@
 from elasticbu import BoxInfoUpdater
 from elasticbu import RunCompletedChecker
 
-idles = conf.resource_base+'/idle/'
-used = conf.resource_base+'/online/'
-broken = conf.resource_base+'/except/'
-quarantined = conf.resource_base+'/quarantined/'
+from aUtils import fileHandler
+
 nthreads = None
 nstreams = None
 expected_processes = None
 run_list=[]
+runs_pending_shutdown=[]
 bu_disk_list_ramdisk=[]
 bu_disk_list_output=[]
+bu_disk_list_ramdisk_instance=[]
+bu_disk_list_output_instance=[]
 active_runs=[]
+active_runs_errors=[]
 resource_lock = threading.Lock()
+nsslock = threading.Lock()
 suspended=False
+entering_cloud_mode=False
+cloud_mode=False
+
+ramdisk_submount_size=0
+machine_blacklist=[]
+boxinfoFUMap = {}
+
+logCollector = None
+
+def setFromConf(myinstance):
+
+    global conf
+    global logger
+    global idles
+    global used
+    global broken
+    global quarantined
+    global cloud
+
+    conf=initConf(myinstance)
 
-logging.basicConfig(filename=os.path.join(conf.log_dir,"hltd.log"),
+    idles = conf.resource_base+'/idle/'
+    used = conf.resource_base+'/online/'
+    broken = conf.resource_base+'/except/'
+    quarantined = conf.resource_base+'/quarantined/'
+    cloud = conf.resource_base+'/cloud/'
+
+    #prepare log directory
+    if myinstance!='main':
+        if not os.path.exists(conf.log_dir): os.makedirs(conf.log_dir)
+        if not os.path.exists(os.path.join(conf.log_dir,'pid')): os.makedirs(os.path.join(conf.log_dir,'pid'))
+        os.chmod(conf.log_dir,0777)
+        os.chmod(os.path.join(conf.log_dir,'pid'),0777)
+
+    logging.basicConfig(filename=os.path.join(conf.log_dir,"hltd.log"),
                     level=conf.service_log_level,
                     format='%(levelname)s:%(asctime)s - %(message)s',
                     datefmt='%Y-%m-%d %H:%M:%S')
+    logger = logging.getLogger(os.path.basename(__file__))
+    conf.dump()
 
-conf.dump()
 
 def preexec_function():
     dem = demote.demote(conf.user)
@@ -62,35 +100,72 @@ def preexec_function():
     #    os.setpgrp()
 
 def cleanup_resources():
+    try:
+        dirlist = os.listdir(cloud)
+        for cpu in dirlist:
+            os.rename(cloud+cpu,idles+cpu)
+        dirlist = os.listdir(broken)
+        for cpu in dirlist:
+            os.rename(broken+cpu,idles+cpu)
+        dirlist = os.listdir(used)
+        for cpu in dirlist:
+            os.rename(used+cpu,idles+cpu)
+        dirlist = os.listdir(quarantined)
+        for cpu in dirlist:
+            os.rename(quarantined+cpu,idles+cpu)
+        dirlist = os.listdir(idles)
+        #quarantine files beyond use fraction limit (rounded to closest integer)
+        num_excluded = round(len(dirlist)*(1.-conf.resource_use_fraction))
+        for i in range(0,int(num_excluded)):
+            os.rename(idles+dirlist[i],quarantined+dirlist[i])
+        return True
+    except Exception as ex:
+        logger.warning(str(ex))
+        return False
 
+def move_resources_to_cloud():
     dirlist = os.listdir(broken)
     for cpu in dirlist:
-        os.rename(broken+cpu,idles+cpu)
+        os.rename(broken+cpu,cloud+cpu)
     dirlist = os.listdir(used)
     for cpu in dirlist:
-        os.rename(used+cpu,idles+cpu)
+        os.rename(used+cpu,cloud+cpu)
     dirlist = os.listdir(quarantined)
     for cpu in dirlist:
-        os.rename(quarantined+cpu,idles+cpu)
+        os.rename(quarantined+cpu,cloud+cpu)
     dirlist = os.listdir(idles)
-    #quarantine files beyond use fraction limit (rounded to closest integer)
-    num_excluded = round(len(dirlist)*(1.-conf.resource_use_fraction))
-    for i in range(0,int(num_excluded)):
-        os.rename(idles+dirlist[i],quarantined+dirlist[i])
+    for cpu in dirlist:
+        os.rename(idles+cpu,cloud+cpu)
+    dirlist = os.listdir(idles)
+    for cpu in dirlist:
+        os.rename(idles+cpu,cloud+cpu)
+
 
 
 def cleanup_mountpoints(remount=True):
-    bu_disk_list_ramdisk[:] = []
-    bu_disk_list_output[:] = []
+
+    global bu_disk_list_ramdisk
+    global bu_disk_list_ramdisk_instance
+    global bu_disk_list_output
+    global bu_disk_list_output_instance
+
+    bu_disk_list_ramdisk = []
+    bu_disk_list_output = []
+    bu_disk_list_ramdisk_instance = []
+    bu_disk_list_output_instance = []
+ 
     if conf.bu_base_dir[0] == '/':
-        bu_disk_list_ramdisk[:] = [os.path.join(conf.bu_base_dir,conf.ramdisk_subdirectory)]
-        bu_disk_list_output[:] = [os.path.join(conf.bu_base_dir,conf.output_subdirectory)]
+        bu_disk_list_ramdisk = [os.path.join(conf.bu_base_dir,conf.ramdisk_subdirectory)]
+        bu_disk_list_output = [os.path.join(conf.bu_base_dir,conf.output_subdirectory)]
+        if conf.instance=="main":
+            bu_disk_list_ramdisk_instance = bu_disk_list_ramdisk
+            bu_disk_list_output_instance = bu_disk_list_output
+        else:
+            bu_disk_list_ramdisk_instance = [os.path.join(bu_disk_list_ramdisk[0],conf.instance)]
+            bu_disk_list_output_instance = [os.path.join(bu_disk_list_output[0],conf.instance)]
+ 
         #make subdirectories if necessary and return
         if remount==True:
-            try:
-                os.makedirs(conf.bu_base_dir)
-            except OSError:
-                pass
             try:
                 os.makedirs(os.path.join(conf.bu_base_dir,conf.ramdisk_subdirectory))
             except OSError:
@@ -104,59 +179,63 @@ def cleanup_mountpoints(remount=True):
         process = subprocess.Popen(['mount'],stdout=subprocess.PIPE)
         out = process.communicate()[0]
         mounts = re.findall('/'+conf.bu_base_dir+'[0-9]+',out)
-        mounts = list(set(mounts))
-        #if len(mounts)>1 and mounts[0]==mounts[1]: mounts=[mounts[0]]
-        logging.info("cleanup_mountpoints: found following mount points ")
-        logging.info(mounts)
+        mounts = sorted(list(set(mounts)))
+        logger.info("cleanup_mountpoints: found following mount points: ")
+        logger.info(mounts)
         umount_failure=False
         for point in mounts:
-            logging.info("trying umount of "+point)
+
             try:
+                #try to unmount old style mountpoint(ok if fails)
                 subprocess.check_call(['umount','/'+point])
-            except subprocess.CalledProcessError, err1:
-                pass
-            except Exception as ex:
-                logging.exception(ex)
+            except:pass
             try:
                 subprocess.check_call(['umount',os.path.join('/'+point,conf.ramdisk_subdirectory)])
             except subprocess.CalledProcessError, err1:
-                logging.error("Error calling umount in cleanup_mountpoints")
-                logging.error(str(err1.returncode))
-                umount_failure=True
+                logger.info("trying to kill users of ramdisk")
+                try:
+                    subprocess.check_call(['fuser','-km',os.path.join('/'+point,conf.ramdisk_subdirectory)])
+                except subprocess.CalledProcessError, err2:
+                    logger.error("Error calling umount in cleanup_mountpoints (ramdisk), return code:"+str(err2.returncode))
+                try:
+                    subprocess.check_call(['umount',os.path.join('/'+point,conf.ramdisk_subdirectory)])
+                except subprocess.CalledProcessError, err2:
+                    logger.error("Error calling umount in cleanup_mountpoints (ramdisk), return code:"+str(err2.returncode))
+                    umount_failure=True
             try:
                 subprocess.check_call(['umount',os.path.join('/'+point,conf.output_subdirectory)])
             except subprocess.CalledProcessError, err1:
-                logging.error("Error calling umount in cleanup_mountpoints")
-                logging.error(str(err1.returncode))
-                umount_failure=True
-            #this will remove directories only if they are empty (as unomunted mount point should be)
+                logger.info("trying to kill users of output")
+                try:
+                    subprocess.check_call(['fuser','-km',os.path.join('/'+point,conf.output_subdirectory)])
+                except subprocess.CalledProcessError, err2:
+                    logger.error("Error calling umount in cleanup_mountpoints (output), return code:"+str(err2.returncode))
+                try:
+                    subprocess.check_call(['umount',os.path.join('/'+point,conf.output_subdirectory)])
+                except subprocess.CalledProcessError, err2:
+                    logger.error("Error calling umount in cleanup_mountpoints (output), return code:"+str(err2.returncode))
+                    umount_failure=True
+ 
+            #this will remove directories only if they are empty (as unmounted mount point should be)
             try:
                 if os.path.join('/'+point,conf.ramdisk_subdirectory)!='/':
 	            os.rmdir(os.path.join('/'+point,conf.ramdisk_subdirectory))
             except Exception as ex:
-                logging.exception(ex)
+                logger.exception(ex)
             try:
                 if os.path.join('/'+point,conf.output_subdirectory)!='/':
                     os.rmdir(os.path.join('/'+point,conf.output_subdirectory))
             except Exception as ex:
-                logging.exception(ex)
-            try:
-                if os.path.join('/',point)!='/':
-                    os.rmdir('/'+point)
-            except Exception as ex:
-                logging.exception(ex)
+                logger.exception(ex)
         if remount==False:
             if umount_failure:return False
             return True
         i = 0
         bus_config = os.path.join(os.path.dirname(conf.resource_base.rstrip(os.path.sep)),'bus.config')
         if os.path.exists(bus_config):
+            busconfig_age = os.path.getmtime(bus_config)
             for line in open(bus_config):
-                logging.info("found BU to mount at "+line.strip())
-                try:
-                    os.makedirs('/'+conf.bu_base_dir+str(i))
-                except OSError:
-                    pass
+                logger.info("found BU to mount at "+line.strip())
                 try:
                     os.makedirs(os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory))
                 except OSError:
@@ -174,16 +253,20 @@ def cleanup_mountpoints(remount=True):
                         break
                     else:
                         p_end = datetime.datetime.now()
-                        logging.warn('unable to ping '+line.strip())
+                        logger.warn('unable to ping '+line.strip())
                         dt = p_end - p_begin
                         if dt.seconds < 10:
                             time.sleep(10-dt.seconds)
                     attemptsLeft-=1
-                if attemptsLeft==0:
-                    logging.fatal('hltd was unable to ping BU '+line.strip())
-                    sys.exit(1)
-                else:
-                    logging.info("trying to mount "+line.strip()+':/fff/'+conf.ramdisk_subdirectory+' '+os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory))
+                    if attemptsLeft==0:
+                        logger.fatal('hltd was unable to ping BU '+line.strip())
+                        #check if bus.config has been updated
+                        if (os.path.getmtime(bus_config) - busconfig_age)>1:
+                            return cleanup_mountpoints(remount)
+                        attemptsLeft=8
+                        #sys.exit(1)
+                if True:
+                    logger.info("trying to mount "+line.strip()+':/fff/'+conf.ramdisk_subdirectory+' '+os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory))
                     try:
                         subprocess.check_call(
                             [conf.mount_command,
@@ -194,13 +277,18 @@ def cleanup_mountpoints(remount=True):
                              line.strip()+':/fff/'+conf.ramdisk_subdirectory,
                              os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory)]
                             )
-                        bu_disk_list_ramdisk.append(os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory))
+                        toappend = os.path.join('/'+conf.bu_base_dir+str(i),conf.ramdisk_subdirectory)
+                        bu_disk_list_ramdisk.append(toappend)
+                        if conf.instance=="main":
+                            bu_disk_list_ramdisk_instance.append(toappend)
+                        else:
+                            bu_disk_list_ramdisk_instance.append(os.path.join(toappend,conf.instance))
                     except subprocess.CalledProcessError, err2:
-                        logging.exception(err2)
-                        logging.fatal("Unable to mount ramdisk - exiting.")
+                        logger.exception(err2)
+                        logger.fatal("Unable to mount ramdisk - exiting.")
                         sys.exit(1)
 
-                    logging.info("trying to mount "+line.strip()+':/fff/'+conf.output_subdirectory+' '+os.path.join('/'+conf.bu_base_dir+str(i),conf.output_subdirectory))
+                    logger.info("trying to mount "+line.strip()+':/fff/'+conf.output_subdirectory+' '+os.path.join('/'+conf.bu_base_dir+str(i),conf.output_subdirectory))
                     try:
                         subprocess.check_call(
                             [conf.mount_command,
@@ -211,26 +299,49 @@ def cleanup_mountpoints(remount=True):
                              line.strip()+':/fff/'+conf.output_subdirectory,
                              os.path.join('/'+conf.bu_base_dir+str(i),conf.output_subdirectory)]
                             )
-                        bu_disk_list_output.append(os.path.join('/'+conf.bu_base_dir+str(i),conf.output_subdirectory))
+                        toappend = os.path.join('/'+conf.bu_base_dir+str(i),conf.output_subdirectory)
+                        bu_disk_list_output.append(toappend)
+                        if conf.instance=="main" or conf.instance_same_destination==True:
+                            bu_disk_list_output_instance.append(toappend)
+                        else:
+                            bu_disk_list_output_instance.append(os.path.join(toappend,conf.instance))
                     except subprocess.CalledProcessError, err2:
-                        logging.exception(err2)
-                        logging.fatal("Unable to mount output - exiting.")
+                        logger.exception(err2)
+                        logger.fatal("Unable to mount output - exiting.")
                         sys.exit(1)
 
-
                 i+=1
         #clean up suspended state
         try:
-            if remount==True:os.unlink(conf.watch_directory+'/suspend')
+            if remount==True:os.popen('rm -rf '+conf.watch_directory+'/suspend*')
         except:pass
     except Exception as ex:
-        logging.error("Exception in cleanup_mountpoints")
-        logging.exception(ex)
+        logger.error("Exception in cleanup_mountpoints")
+        logger.exception(ex)
         if remount==True:
-            logging.fatal("Unable to handle (un)mounting")
+            logger.fatal("Unable to handle (un)mounting")
             return False
         else:return False
 
+def submount_size(basedir):
+    loop_size=0
+    try:
+        p = subprocess.Popen("mount", shell=False, stdout=subprocess.PIPE)
+        p.wait()
+        std_out=p.stdout.read().split("\n")
+        for l in std_out:
+            try:
+                ls = l.strip()
+                toks = l.split()
+                if toks[0].startswith(basedir) and toks[2].startswith(basedir) and 'loop' in toks[5]:
+                    imgstat = os.stat(toks[0])
+                    imgsize = imgstat.st_size
+                    loop_size+=imgsize
+            except:pass
+    except:pass
+    return loop_size
+
+
 def calculate_threadnumber():
     global nthreads
     global nstreams
@@ -240,12 +351,58 @@ def calculate_threadnumber():
         nthreads = idlecount/conf.cmssw_threads_autosplit
         nstreams = idlecount/conf.cmssw_threads_autosplit
         if nthreads*conf.cmssw_threads_autosplit != nthreads:
-            logging.error("idle cores can not be evenly split to cmssw threads")
+            logger.error("idle cores can not be evenly split to cmssw threads")
     else:
         nthreads = conf.cmssw_threads
-        nstreams = conf.cmssw_threads
+        nstreams = conf.cmssw_streams
     expected_processes = idlecount/nstreams
 
+
+def updateBlacklist():
+    black_list=[]
+    active_black_list=[]
+    #TODO:this will be updated to read blacklist from database
+    if conf.role=='bu':
+        try:
+            if os.stat('/etc/appliance/blacklist').st_size>0:
+                with open('/etc/appliance/blacklist','r') as fi:
+                    try:
+                        static_black_list = json.load(fi)
+                        for item in static_black_list:
+                            black_list.append(item)
+                        logger.info("found these resources in /etc/appliance/blacklist: "+str(black_list))
+                    except ValueError:
+                        logger.error("error parsing /etc/appliance/blacklist")
+        except:
+                #no blacklist file, this is ok
+                pass
+        black_list=list(set(black_list))
+        try:
+            forceUpdate=False
+            with open(os.path.join(conf.watch_directory,'appliance','blacklist'),'r') as fi:
+                active_black_list = json.load(fi)
+        except:
+            forceUpdate=True
+        if forceUpdate==True or active_black_list != black_list:
+            try:
+                with open(os.path.join(conf.watch_directory,'appliance','blacklist'),'w') as fi:
+                    json.dump(black_list,fi)
+            except:
+                return False,black_list
+    #TODO:check on FU if blacklisted
+    return True,black_list
+
+def restartLogCollector(instanceParam):
+        global logCollector
+        if logCollector!=None:
+            logger.info("terminating logCollector")
+            logCollector.terminate()
+            logCollector = None
+        logger.info("starting logcollector.py")
+        logcollector_args = ['/opt/hltd/python/logcollector.py']
+        logcollector_args.append(instanceParam)
+        logCollector = subprocess.Popen(logcollector_args,preexec_fn=preexec_function,close_fds=True)
+
 class system_monitor(threading.Thread):
 
     def __init__(self):
@@ -259,65 +416,152 @@ def __init__(self):
 
     def rehash(self):
         if conf.role == 'fu':
-            self.directory = ['/'+x+'/appliance/boxes/' for x in bu_disk_list_ramdisk]
+            self.directory = [os.path.join(bu_disk_list_ramdisk_instance[0],'appliance','boxes')]
+            #self.directory = ['/'+x+'/appliance/boxes/' for x in bu_disk_list_ramdisk_instance]
+            #write only in one location
         else:
-            self.directory = [conf.watch_directory+'/appliance/boxes/']
-        self.file = [x+self.hostname for x in self.directory]
-        for dir in self.directory:
+            self.directory = [os.path.join(conf.watch_directory,'appliance/boxes/')]
             try:
-                os.makedirs(dir)
+                #if directory does not exist: check if it is renamed to specific name (non-main instance)
+                if not os.path.exists(self.directory[0]) and conf.instance=="main":
+                    os.makedirs(self.directory[0])
             except OSError:
                 pass
-        logging.info("system_monitor: rehash found the following BU disks")
+
+        self.file = [os.path.join(x,self.hostname) for x in self.directory]
+
+        logger.info("system_monitor: rehash found the following BU disk(s):"+str(self.file))
         for disk in self.file:
-            logging.info(disk)
+            logger.info(disk)
 
     def run(self):
         try:
-            logging.debug('entered system monitor thread ')
+            logger.debug('entered system monitor thread ')
             global suspended
+            global ramdisk_submount_size
+            res_path_temp = os.path.join(conf.watch_directory,'appliance','resource_summary_temp')
+            res_path = os.path.join(conf.watch_directory,'appliance','resource_summary')
+            selfhost = os.uname()[1]
+            counter=0
             while self.running:
-#                logging.info('system monitor - running '+str(self.running))
-                self.threadEvent.wait(5)
+                self.threadEvent.wait(5 if counter>0 else 1)
+                counter+=1
+                counter=counter%5
                 if suspended:continue
                 tstring = datetime.datetime.utcfromtimestamp(time.time()).isoformat()
 
-                fp = None
+                ramdisk = None
+                if conf.role == 'bu':
+                    ramdisk = os.statvfs(conf.watch_directory)
+                    ramdisk_occ=1
+                    try:ramdisk_occ = float((ramdisk.f_blocks - ramdisk.f_bavail)*ramdisk.f_bsize - ramdisk_submount_size)/float(ramdisk.f_blocks*ramdisk.f_bsize - ramdisk_submount_size)
+                    except:pass
+                    if ramdisk_occ<0:
+                        ramdisk_occ=0
+                        logger.info('incorrect ramdisk occupancy',ramdisk_occ)
+                    if ramdisk_occ>1:
+                        ramdisk_occ=1
+                        logger.info('incorrect ramdisk occupancy',ramdisk_occ)
+
+                    resource_count_idle = 0
+                    resource_count_used = 0
+                    resource_count_broken = 0
+                    cloud_count = 0
+                    lastFURuns = []
+                    lastFURun=-1
+                    activeRunQueuedLumisNum = -1
+                    current_time = time.time()
+                    for key in boxinfoFUMap:
+                        if key==selfhost:continue
+                        entry = boxinfoFUMap[key]
+                        if current_time - entry[1] > 10:continue
+                        resource_count_idle+=int(entry[0]['idles'])
+                        resource_count_used+=int(entry[0]['used'])
+                        resource_count_broken+=int(entry[0]['broken'])
+                        cloud_count+=int(entry[0]['cloud'])
+                        try:
+                            lastFURuns.append(int(entry[0]['activeRuns'].strip('[]').split(',')[-1]))
+                        except:pass
+                    fuRuns = sorted(list(set(lastFURuns)))
+                    if len(fuRuns)>0:
+                        lastFURun = fuRuns[-1]
+                        #second pass
+                        for key in boxinfoFUMap:
+                            if key==selfhost:continue
+                            entry = boxinfoFUMap[key]
+                            if current_time - entry[1] > 10:continue
+                            try:
+                                lastrun = int(entry[0]['activeRuns'].strip('[]').split(',')[-1])
+                                if lastrun==lastFURun:
+                                    qlumis = int(entry[0]['activeRunNumQueuedLS'])
+                                    if qlumis>activeRunQueuedLumisNum:activeRunQueuedLumisNum=qlumis
+                            except:pass
+                    res_doc = {
+                                "active_resources":resource_count_idle+resource_count_used,
+                                "idle":resource_count_idle,
+                                "used":resource_count_used,
+                                "broken":resource_count_broken,
+                                "cloud":cloud_count,
+                                "activeFURun":lastFURun,
+                                "activeRunNumQueuedLS":activeRunQueuedLumisNum,
+                                "ramdisk_occupancy":ramdisk_occ
+                              }
+                    with open(res_path_temp,'w') as fp:
+                        json.dump(res_doc,fp)
+                    os.rename(res_path_temp,res_path)
+
                 for mfile in self.file:
                     if conf.role == 'fu':
                         dirstat = os.statvfs(conf.watch_directory)
-                        fp=open(mfile,'w+')
-                        fp.write('fm_date='+tstring+'\n')
-                        fp.write('idles='+str(len(os.listdir(idles)))+'\n')
-                        fp.write('used='+str(len(os.listdir(used)))+'\n')
-                        fp.write('broken='+str(len(os.listdir(broken)))+'\n')
-                        fp.write('quarantined='+str(len(os.listdir(quarantined)))+'\n')
-                        fp.write('usedDataDir='+str(((dirstat.f_blocks - dirstat.f_bavail)*dirstat.f_bsize)>>20)+'\n')
-                        fp.write('totalDataDir='+str((dirstat.f_blocks*dirstat.f_bsize)>>20)+'\n')
-                        #two lines with active runs (used to check file consistency)
-                        fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
-                        fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
-                        fp.write('entriesComplete=True')
-                        fp.close()
+                        try:
+                            with open(mfile,'w+') as fp:
+                                fp.write('fm_date='+tstring+'\n')
+                                if cloud_mode==True and entering_cloud_mode==True:
+                                    #lie about cores in cloud if cloud mode enabled, even if still processing
+                                    fp.write('idles=0\n')
+                                    fp.write('used=0\n')
+                                    fp.write('broken=0\n')
+                                    fp.write('cloud='+str(len(os.listdir(cloud))+len(os.listdir(idles))+len(os.listdir(used))+len(os.listdir(broken)))+'\n')
+                                else:
+                                    fp.write('idles='+str(len(os.listdir(idles)))+'\n')
+                                    fp.write('used='+str(len(os.listdir(used)))+'\n')
+                                    fp.write('broken='+str(len(os.listdir(broken)))+'\n')
+                                    fp.write('cloud='+str(len(os.listdir(cloud)))+'\n')
+
+                                fp.write('quarantined='+str(len(os.listdir(quarantined)))+'\n')
+                                fp.write('usedDataDir='+str(((dirstat.f_blocks - dirstat.f_bavail)*dirstat.f_bsize)>>20)+'\n')
+                                fp.write('totalDataDir='+str((dirstat.f_blocks*dirstat.f_bsize)>>20)+'\n')
+                                #two lines with active runs (used to check file consistency)
+                                fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
+                                fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
+                                fp.write('activeRunsErrors='+str(active_runs_errors).strip('[]')+'\n')
+                                fp.write('activeRunNumQueuedLS='+self.getLumiQueueStat()+'\n')
+                                fp.write('entriesComplete=True')
+                        except Exception as ex:
+                            logger.warning('boxinfo file write failed +'+str(ex))
+                            if counter==0:
+                                #in case something happened with the BU server, try remount
+                                cleanup_mountpoints()
+
                     if conf.role == 'bu':
-                        ramdisk = os.statvfs(conf.watch_directory)
+                        #ramdisk = os.statvfs(conf.watch_directory)
                         outdir = os.statvfs('/fff/output')
-                        fp=open(mfile,'w+')
-
-                        fp.write('fm_date='+tstring+'\n')
-                        fp.write('idles=0\n')
-                        fp.write('used=0\n')
-                        fp.write('broken=0\n')
-                        fp.write('quarantined=0\n')
-                        fp.write('usedRamdisk='+str(((ramdisk.f_blocks - ramdisk.f_bavail)*ramdisk.f_bsize)>>20)+'\n')
-                        fp.write('totalRamdisk='+str((ramdisk.f_blocks*ramdisk.f_bsize)>>20)+'\n')
-                        fp.write('usedOutput='+str(((outdir.f_blocks - outdir.f_bavail)*outdir.f_bsize)>>20)+'\n')
-                        fp.write('totalOutput='+str((outdir.f_blocks*outdir.f_bsize)>>20)+'\n')
-                        fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
-                        fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
-                        fp.write('entriesComplete=True')
-                        fp.close()
-
+                        with open(mfile,'w+') as fp:
+                            fp.write('fm_date='+tstring+'\n')
+                            fp.write('idles=0\n')
+                            fp.write('used=0\n')
+                            fp.write('broken=0\n')
+                            fp.write('quarantined=0\n')
+                            fp.write('cloud=0\n')
+                            fp.write('usedRamdisk='+str(((ramdisk.f_blocks - ramdisk.f_bavail)*ramdisk.f_bsize - ramdisk_submount_size)>>20)+'\n')
+                            fp.write('totalRamdisk='+str((ramdisk.f_blocks*ramdisk.f_bsize - ramdisk_submount_size)>>20)+'\n')
+                            fp.write('usedOutput='+str(((outdir.f_blocks - outdir.f_bavail)*outdir.f_bsize)>>20)+'\n')
+                            fp.write('totalOutput='+str((outdir.f_blocks*outdir.f_bsize)>>20)+'\n')
+                            fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
+                            fp.write('activeRuns='+str(active_runs).strip('[]')+'\n')
+                            fp.write('entriesComplete=True')
+
+                #deprecated
                 if conf.role == 'bu':
                     mfile = conf.resource_base+'/disk.jsn'
                     stat=[]
@@ -336,7 +580,7 @@ def run(self):
                     json.dump(stat,fp)
                     fp.close()
         except Exception as ex:
-            logging.error(ex)
+            logger.error(ex)
 
         for mfile in self.file:
             try:
@@ -344,10 +588,20 @@ def run(self):
             except OSError:
                 pass
 
-        logging.debug('exiting system monitor thread ')
+        logger.debug('exiting system monitor thread ')
+
+    def getLumiQueueStat(self):
+        try:
+            with open(os.path.join(conf.watch_directory,'run'+str(active_runs[-1]).zfill(conf.run_number_padding),
+                      'open','queue_status.jsn'),'r') as fp:
+                #fcntl.flock(fp, fcntl.LOCK_EX)
+                statusDoc = json.load(fp)
+                return str(statusDoc["numQueuedLS"])
+        except:
+          return "-1"
 
     def stop(self):
-        logging.debug("system_monitor: request to stop")
+        logger.debug("system_monitor: request to stop")
         self.running = False
         self.threadEvent.set()
 
@@ -358,13 +612,13 @@ def __init__(self):
 
     def startNewRun(self,nr):
         if self.runnumber:
-            logging.error("Another BU emulator run "+str(self.runnumber)+" is already ongoing")
+            logger.error("Another BU emulator run "+str(self.runnumber)+" is already ongoing")
             return
         self.runnumber = nr
         configtouse = conf.test_bu_config
         destination_base = None
         if role == 'fu':
-            destination_base = bu_disk_list_ramdisk[startindex%len(bu_disk_list_ramdisk)]
+            destination_base = bu_disk_list_ramdisk_instance[startindex%len(bu_disk_list_ramdisk_instance)]
         else:
             destination_base = conf.watch_directory
 
@@ -393,8 +647,8 @@ def startNewRun(self,nr):
                                             close_fds=True
                                             )
         except Exception as ex:
-            logging.error("Error in forking BU emulator process")
-            logging.error(ex)
+            logger.error("Error in forking BU emulator process")
+            logger.error(ex)
 
     def stop(self):
         os.kill(self.process.pid,SIGINT)
@@ -424,22 +678,22 @@ def ping(self):
 
     def NotifyNewRun(self,runnumber):
         self.runnumber = runnumber
-        logging.info("calling start of run on "+self.cpu[0]);
+        logger.info("calling start of run on "+self.cpu[0]);
         try:
-            connection = httplib.HTTPConnection(self.cpu[0], conf.cgi_port)
+            connection = httplib.HTTPConnection(self.cpu[0], conf.cgi_port - conf.cgi_instance_port_offset)
             connection.request("GET",'cgi-bin/start_cgi.py?run='+str(runnumber))
             response = connection.getresponse()
             #do something intelligent with the response code
-            logging.error("response was "+str(response.status))
+            logger.error("response was "+str(response.status))
             if response.status > 300: self.hoststate = 1
             else:
-                logging.info(response.read())
+                logger.info(response.read())
         except Exception as ex:
-            logging.exception(ex)
+            logger.exception(ex)
 
     def NotifyShutdown(self):
         try:
-            connection = httplib.HTTPConnection(self.cpu[0], conf.cgi_port)
+            connection = httplib.HTTPConnection(self.cpu[0], conf.cgi_port - self.cgi_instance_port_offset)
             connection.request("GET",'cgi-bin/stop_cgi.py?run='+str(self.runnumber))
             time.sleep(0.05)
             response = connection.getresponse()
@@ -447,10 +701,10 @@ def NotifyShutdown(self):
             #do something intelligent with the response code
             if response.status > 300: self.hoststate = 0
         except Exception as ex:
-            logging.exception(ex)
+            logger.exception(ex)
 
     def StartNewProcess(self ,runnumber, startindex, arch, version, menu,num_threads,num_streams):
-        logging.debug("OnlineResource: StartNewProcess called")
+        logger.debug("OnlineResource: StartNewProcess called")
         self.runnumber = runnumber
 
         """
@@ -458,10 +712,10 @@ def StartNewProcess(self ,runnumber, startindex, arch, version, menu,num_threads
         independent mounts of the BU - it should not be necessary in due course
         IFF it is necessary, it should address "any" number of mounts, not just 2
         """
-        input_disk = bu_disk_list_ramdisk[startindex%len(bu_disk_list_ramdisk)]
+        input_disk = bu_disk_list_ramdisk_instance[startindex%len(bu_disk_list_ramdisk_instance)]
         #run_dir = input_disk + '/run' + str(self.runnumber).zfill(conf.run_number_padding)
 
-        logging.info("starting process with "+version+" and run number "+str(runnumber))
+        logger.info("starting process with "+version+" and run number "+str(runnumber))
 
         if "_patch" in version:
             full_release="cmssw-patch"
@@ -492,7 +746,7 @@ def StartNewProcess(self ,runnumber, startindex, arch, version, menu,num_threads
             if self.watchdog:
                 new_run_args.append("skipFirstLumis=True")
 
-        logging.info("arg array "+str(new_run_args).translate(None, "'"))
+        logger.info("arg array "+str(new_run_args).translate(None, "'"))
         try:
 #            dem = demote.demote(conf.user)
             self.process = subprocess.Popen(new_run_args,
@@ -500,29 +754,29 @@ def StartNewProcess(self ,runnumber, startindex, arch, version, menu,num_threads
                                             close_fds=True
                                             )
             self.processstate = 100
-            logging.info("started process "+str(self.process.pid))
+            logger.info("started process "+str(self.process.pid))
 #            time.sleep(1.)
             if self.watchdog==None:
                 self.watchdog = ProcessWatchdog(self,self.lock)
                 self.watchdog.start()
-                logging.debug("watchdog thread for "+str(self.process.pid)+" is alive "
+                logger.debug("watchdog thread for "+str(self.process.pid)+" is alive "
                              + str(self.watchdog.is_alive()))
             else:
                 self.watchdog.join()
                 self.watchdog = ProcessWatchdog(self,self.lock)
                 self.watchdog.start()
-                logging.debug("watchdog thread restarted for "+str(self.process.pid)+" is alive "
+                logger.debug("watchdog thread restarted for "+str(self.process.pid)+" is alive "
                               + str(self.watchdog.is_alive()))
         except Exception as ex:
-            logging.info("OnlineResource: exception encountered in forking hlt slave")
-            logging.info(ex)
+            logger.info("OnlineResource: exception encountered in forking hlt slave")
+            logger.info(ex)
 
     def join(self):
-        logging.debug('calling join on thread ' +self.watchdog.name)
+        logger.debug('calling join on thread ' +self.watchdog.name)
         self.watchdog.join()
 
     def disableRestart(self):
-        logging.debug("OnlineResource "+str(self.cpu)+" restart is now disabled")
+        logger.debug("OnlineResource "+str(self.cpu)+" restart is now disabled")
         if self.watchdog:
             self.watchdog.disableRestart()
 
@@ -530,11 +784,11 @@ def clearQuarantined(self):
         resource_lock.acquire()
         try:
             for cpu in self.quarantined:
-                logging.info('Clearing quarantined resource '+cpu)
+                logger.info('Clearing quarantined resource '+cpu)
                 os.rename(quarantined+cpu,idles+cpu)
             self.quarantined = []
         except Exception as ex:
-            logging.exception(ex)
+            logger.exception(ex)
         resource_lock.release()
 
 class ProcessWatchdog(threading.Thread):
@@ -549,16 +803,16 @@ def __init__(self,resource,lock):
     def run(self):
         try:
             monfile = self.resource.associateddir+'/hltd.jsn'
-            logging.info('watchdog for process '+str(self.resource.process.pid))
+            logger.info('watchdog for process '+str(self.resource.process.pid))
             self.resource.process.wait()
             returncode = self.resource.process.returncode
             pid = self.resource.process.pid
 
             #update json process monitoring file
             self.resource.processstate=returncode
-            logging.debug('ProcessWatchdog: acquire lock thread '+str(pid))
+            logger.debug('ProcessWatchdog: acquire lock thread '+str(pid))
             self.lock.acquire()
-            logging.debug('ProcessWatchdog: acquired lock thread '+str(pid))
+            logger.debug('ProcessWatchdog: acquired lock thread '+str(pid))
 
             try:
                 with open(monfile,"r+") as fp:
@@ -573,13 +827,13 @@ def run(self):
 
                     fp.flush()
             except IOError,ex:
-                logging.exception(ex)
+                logger.exception(ex)
             except ValueError:
                 pass
 
-            logging.debug('ProcessWatchdog: release lock thread '+str(pid))
+            logger.debug('ProcessWatchdog: release lock thread '+str(pid))
             self.lock.release()
-            logging.debug('ProcessWatchdog: released lock thread '+str(pid))
+            logger.debug('ProcessWatchdog: released lock thread '+str(pid))
 
 
             abortedmarker = self.resource.statefiledir+'/'+Run.ABORTED
@@ -591,20 +845,24 @@ def run(self):
                         try:
                             os.rename(used+cpu,idles+cpu)
                         except Exception as ex:
-                            logging.exception(ex)
+                            logger.exception(ex)
                 except:pass
                 resource_lock.release()
                 return
 
-            #quit codes (configuration errors):
-            quit_codes = [127,90,65,73]
+            #bump error count in active_runs_errors which is logged in the box file
+            if returncode!=0:
+                try:
+                    global active_runs
+                    global active_runs_errors
+                    active_runs_errors[active_runs.index(self.resource.runnumber)]+=1
+                except:
+                    pass
 
-            #cleanup actions- remove process from list and
-            # attempt restart on same resource
-            #dqm mode will treat configuration error as a crash and eventually move to quarantined
-            if returncode != 0 and ( returncode not in quit_codes or conf.dqm_machine==True):
+            #cleanup actions- remove process from list and attempt restart on same resource
+            if returncode != 0:
                 if returncode < 0:
-                    logging.error("process "+str(pid)
+                    logger.error("process "+str(pid)
                               +" for run "+str(self.resource.runnumber)
                               +" on resource(s) " + str(self.resource.cpu)
                               +" exited with signal "
@@ -613,7 +871,7 @@ def run(self):
                               +str(self.retry_enabled)
                               )
                 else:
-                    logging.error("process "+str(pid)
+                    logger.error("process "+str(pid)
                               +" for run "+str(self.resource.runnumber)
                               +" on resource(s) " + str(self.resource.cpu)
                               +" exited with code "
@@ -621,8 +879,23 @@ def run(self):
                               +" restart is enabled ? "
                               +str(self.retry_enabled)
                               )
-
-
+                #quit codes (configuration errors):
+                quit_codes = [127,90,73]
+
+                #removed 65 because it is not only configuration error
+                #quit_codes = [127,90,65,73]
+
+                #dqm mode will treat configuration error as a crash and eventually move to quarantined
+                if conf.dqm_machine==False and returncode in quit_codes:
+                    if self.resource.retry_attempts < self.retry_limit:
+                        logger.warning('for this type of error, restarting this process is disabled')
+                        self.resource.retry_attempts=self.retry_limit
+                    if returncode==127:
+                        logger.fatal('Exit code indicates that CMSSW environment might not be available (cmsRun executable not in path).')
+                    elif returncode==90:
+                        logger.fatal('Exit code indicates that there might be a python error in the CMSSW configuration.')
+                    else:
+                        logger.fatal('Exit code indicates that there might be a C/C++ error in the CMSSW configuration.')
 
                 #generate crashed pid json file like: run000001_ls0000_crash_pid12345.jsn
                 oldpid = "pid"+str(pid).zfill(5)
@@ -635,8 +908,8 @@ def run(self):
                 try:
                     with open(filepath,"w+") as fi:
                         json.dump(document,fi)
-                except: logging.exception("unable to create %r" %filename)
-                logging.info("pid crash file: %r" %filename)
+                except: logger.exception("unable to create %r" %filename)
+                logger.info("pid crash file: %r" %filename)
 
 
                 if self.resource.retry_attempts < self.retry_limit:
@@ -649,7 +922,7 @@ def run(self):
                     self.resource.process = None
                     self.resource.retry_attempts += 1
 
-                    logging.info("try to restart process for resource(s) "
+                    logger.info("try to restart process for resource(s) "
                                  +str(self.resource.cpu)
                                  +" attempt "
                                  + str(self.resource.retry_attempts))
@@ -657,10 +930,10 @@ def run(self):
                     for cpu in self.resource.cpu:
                       os.rename(used+cpu,broken+cpu)
                     resource_lock.release()
-                    logging.debug("resource(s) " +str(self.resource.cpu)+
+                    logger.debug("resource(s) " +str(self.resource.cpu)+
                                   " successfully moved to except")
                 elif self.resource.retry_attempts >= self.retry_limit:
-                    logging.error("process for run "
+                    logger.error("process for run "
                                   +str(self.resource.runnumber)
                                   +" on resources " + str(self.resource.cpu)
                                   +" reached max retry limit "
@@ -680,20 +953,11 @@ def run(self):
                         fp = open(conf.watch_directory+'/quarantined'+str(self.resource.runnumber).zfill(conf.run_number_padding),'w+')
                         fp.close()
                     except Exception as ex:
-                        logging.exception(ex)
+                        logger.exception(ex)
 
             #successful end= release resource (TODO:maybe should mark aborted for non-0 error codes)
-            elif returncode == 0 or returncode in quit_codes:
-                if returncode==0:
-                    logging.info('releasing resource, exit 0 meaning end of run '+str(self.resource.cpu))
-                elif returncode==127:
-                    logging.fatal('error executing start script. Maybe CMSSW environment is not available (cmsRun executable not in path).')
-                elif returncode==90:
-                    logging.fatal('error executing start script: python error.')
-                elif returncode in quit_codes:
-                    logging.fatal('error executing start script: CMSSW configuration error.')
-                else:
-                    logging.fatal('error executing start script: unspecified error.')
+            elif returncode == 0:
+                logger.info('releasing resource, exit 0 meaning end of run '+str(self.resource.cpu))
 
                 # generate an end-of-run marker if it isn't already there - it will be picked up by the RunRanger
                 endmarker = conf.watch_directory+'/end'+str(self.resource.runnumber).zfill(conf.run_number_padding)
@@ -714,12 +978,12 @@ def run(self):
 
                 #self.resource.process=None
 
-            #        logging.info('exiting thread '+str(self.resource.process.pid))
+            #        logger.info('exiting thread '+str(self.resource.process.pid))
 
         except Exception as ex:
             resource_lock.release()
-            logging.info("OnlineResource watchdog: exception")
-            logging.exception(ex)
+            logger.info("OnlineResource watchdog: exception")
+            logger.exception(ex)
         return
 
     def disableRestart(self):
@@ -736,7 +1000,8 @@ class Run:
 
     VALID_MARKERS = [STARTING,ACTIVE,STOPPING,COMPLETE,ABORTED]
 
-    def __init__(self,nr,dirname,bu_dir):
+    def __init__(self,nr,dirname,bu_dir,instance):
+        self.instance = instance
         self.runnumber = nr
         self.dirname = dirname
         self.online_resource_list = []
@@ -754,22 +1019,23 @@ def __init__(self,nr,dirname,bu_dir):
         self.anelasticWatchdog = None
         self.threadEvent = threading.Event()
         global active_runs
+        global active_runs_errors
 
         if conf.role == 'fu':
             self.changeMarkerMaybe(Run.STARTING)
             if int(self.runnumber) in active_runs:
                 raise Exception("Run "+str(self.runnumber)+ "already active")
             active_runs.append(int(self.runnumber))
+            active_runs_errors.append(0)
         else:
-            #currently unused on BU
             active_runs.append(int(self.runnumber))
+            active_runs_errors.append(0)
 
         self.menu_directory = bu_dir+'/'+conf.menu_directory
 
         readMenuAttempts=0
         #polling for HLT menu directory
         while os.path.exists(self.menu_directory)==False and conf.dqm_machine==False and conf.role=='fu':
-            time.sleep(.2)
             readMenuAttempts+=1
             #10 seconds allowed before defaulting to local configuration
             if readMenuAttempts>50: break
@@ -780,19 +1046,17 @@ def __init__(self,nr,dirname,bu_dir):
             while True:
                 self.menu = self.menu_directory+'/'+conf.menu_name
                 if os.path.exists(self.menu_directory+'/'+conf.arch_file):
-                    fp = open(self.menu_directory+'/'+conf.arch_file,'r')
-                    self.arch = fp.readline().strip()
-                    fp.close()
+                    with open(self.menu_directory+'/'+conf.arch_file,'r') as fp:
+                        self.arch = fp.readline().strip()
                 if os.path.exists(self.menu_directory+'/'+conf.version_file):
-                    fp = open(self.menu_directory+'/'+conf.version_file,'r')
-                    self.version = fp.readline().strip()
-                    fp.close()
+                    with open(self.menu_directory+'/'+conf.version_file,'r') as fp:
+                        self.version = fp.readline().strip()
                 try:
-                    logging.info("Run "+str(self.runnumber)+" uses "+ self.version+" ("+self.arch+") with "+self.menu)
+                    logger.info("Run "+str(self.runnumber)+" uses "+ self.version+" ("+self.arch+") with "+self.menu)
                     break
                 except Exception as ex:
-                    logging.exception(ex)
-                    logging.error("Run parameters obtained for run "+str(self.runnumber)+": "+ str(self.version)+" ("+str(self.arch)+") with "+str(self.menu))
+                    logger.exception(ex)
+                    logger.error("Run parameters obtained for run "+str(self.runnumber)+": "+ str(self.version)+" ("+str(self.arch)+") with "+str(self.menu))
                     time.sleep(.5)
                     readMenuAttempts+=1
                     if readMenuAttempts==3: raise Exception("Unable to parse HLT parameters")
@@ -802,73 +1066,83 @@ def __init__(self,nr,dirname,bu_dir):
             self.version = conf.cmssw_default_version
             self.menu = conf.test_hlt_config1
             if conf.role=='fu':
-                logging.warn("Using default values for run "+str(self.runnumber)+": "+self.version+" ("+self.arch+") with "+self.menu)
+                logger.warn("Using default values for run "+str(self.runnumber)+": "+self.version+" ("+self.arch+") with "+self.menu)
 
         self.rawinputdir = None
+        #
         if conf.role == "bu":
             try:
                 self.rawinputdir = conf.watch_directory+'/run'+str(self.runnumber).zfill(conf.run_number_padding)
-                self.buoutputdir = conf.micromerge_output+'/run'+str(self.runnumber).zfill(conf.run_number_padding)
+                #if conf.instance!="main" and conf.instance_same_destination==False:
+                #    try:os.mkdir(os.path.join(conf.micromerge_output,conf.instance))
+                #    except:pass
+                #    self.buoutputdir = os.path.join(conf.micromerge_output,instance,'run'+str(self.runnumber).zfill(conf.run_number_padding))
+                #else:
+                #    self.buoutputdir = os.path.join(conf.micromerge_output,'run'+str(self.runnumber).zfill(conf.run_number_padding))
                 os.mkdir(self.rawinputdir+'/mon')
             except Exception, ex:
-                logging.error("could not create mon dir inside the run input directory")
+                logger.error("could not create mon dir inside the run input directory")
         else:
-            self.rawinputdir= bu_disk_list_ramdisk[0]+'/run' + str(self.runnumber).zfill(conf.run_number_padding)
+            #self.rawinputdir= os.path.join(random.choice(bu_disk_list_ramdisk_instance),'run' + str(self.runnumber).zfill(conf.run_number_padding))
+            self.rawinputdir= os.path.join(bu_disk_list_ramdisk_instance[0],'run' + str(self.runnumber).zfill(conf.run_number_padding))
 
         self.lock = threading.Lock()
-        #conf.use_elasticsearch = False
-            #note: start elastic.py first!
+
         if conf.use_elasticsearch == True:
+            global nsslock
             try:
                 if conf.role == "bu":
-                    logging.info("starting elasticbu.py with arguments:"+self.dirname)
-                    elastic_args = ['/opt/hltd/python/elasticbu.py',str(self.runnumber)]
+                    nsslock.acquire()
+                    logger.info("starting elasticbu.py with arguments:"+self.dirname)
+                    elastic_args = ['/opt/hltd/python/elasticbu.py',self.instance,str(self.runnumber)]
                 else:
-                    logging.info("starting elastic.py with arguments:"+self.dirname)
-                    elastic_args = ['/opt/hltd/python/elastic.py',self.dirname,self.rawinputdir+'/mon',str(expected_processes),str(conf.elastic_cluster)]
+                    logger.info("starting elastic.py with arguments:"+self.dirname)
+                    elastic_args = ['/opt/hltd/python/elastic.py',self.dirname,self.rawinputdir+'/mon',str(expected_processes)]
 
                 self.elastic_monitor = subprocess.Popen(elastic_args,
                                                         preexec_fn=preexec_function,
                                                         close_fds=True
                                                         )
-
             except OSError as ex:
-                logging.error("failed to start elasticsearch client")
-                logging.error(ex)
+                logger.error("failed to start elasticsearch client")
+                logger.error(ex)
+            try:nsslock.release()
+            except:pass
         if conf.role == "fu" and conf.dqm_machine==False:
             try:
-                logging.info("starting anelastic.py with arguments:"+self.dirname)
-                elastic_args = ['/opt/hltd/python/anelastic.py',self.dirname,str(self.runnumber), self.rawinputdir]
+                logger.info("starting anelastic.py with arguments:"+self.dirname)
+                #elastic_args = ['/opt/hltd/python/anelastic.py',self.dirname,str(self.runnumber), self.rawinputdir,random.choice(bu_disk_list_output_instance)]
+                elastic_args = ['/opt/hltd/python/anelastic.py',self.dirname,str(self.runnumber), self.rawinputdir,bu_disk_list_output_instance[0]]
                 self.anelastic_monitor = subprocess.Popen(elastic_args,
                                                     preexec_fn=preexec_function,
                                                     close_fds=True
                                                     )
             except OSError as ex:
-                logging.fatal("failed to start anelastic.py client:")
-                logging.exception(ex)
+                logger.fatal("failed to start anelastic.py client:")
+                logger.exception(ex)
                 sys.exit(1)
 
 
     def AcquireResource(self,resourcenames,fromstate):
         idles = conf.resource_base+'/'+fromstate+'/'
         try:
-            logging.debug("Trying to acquire resource "
+            logger.debug("Trying to acquire resource "
                           +str(resourcenames)
                           +" from "+fromstate)
 
             for resourcename in resourcenames:
               os.rename(idles+resourcename,used+resourcename)
             if not filter(lambda x: x.cpu==resourcenames,self.online_resource_list):
-                logging.debug("resource(s) "+str(resourcenames)
+                logger.debug("resource(s) "+str(resourcenames)
                               +" not found in online_resource_list, creating new")
                 self.online_resource_list.append(OnlineResource(resourcenames,self.lock))
                 return self.online_resource_list[-1]
-            logging.debug("resource(s) "+str(resourcenames)
+            logger.debug("resource(s) "+str(resourcenames)
                           +" found in online_resource_list")
             return filter(lambda x: x.cpu==resourcenames,self.online_resource_list)[0]
         except Exception as ex:
-            logging.info("exception encountered in looking for resources")
-            logging.info(ex)
+            logger.info("exception encountered in looking for resources")
+            logger.info(ex)
 
     def ContactResource(self,resourcename):
         self.online_resource_list.append(OnlineResource(resourcename,self.lock))
@@ -878,28 +1152,39 @@ def ReleaseResource(self,res):
         self.online_resource_list.remove(res)
 
     def AcquireResources(self,mode):
-        logging.info("acquiring resources from "+conf.resource_base)
+        logger.info("acquiring resources from "+conf.resource_base)
         idles = conf.resource_base
         idles += '/idle/' if conf.role == 'fu' else '/boxes/'
         try:
             dirlist = os.listdir(idles)
         except Exception as ex:
-            logging.info("exception encountered in looking for resources")
-            logging.info(ex)
-        logging.info(dirlist)
+            logger.info("exception encountered in looking for resources")
+            logger.info(ex)
+        logger.info(str(dirlist))
         current_time = time.time()
         count = 0
         cpu_group=[]
         #self.lock.acquire()
 
+        global machine_blacklist
+        if conf.role=='bu':
+            update_success,machine_blacklist=updateBlacklist()
+            if update_success==False:
+                logger.fatal("unable to check blacklist: giving up on run start")
+                return False
+
         for cpu in dirlist:
             #skip self
-            if conf.role=='bu' and cpu == os.uname()[1]:continue
-
+            if conf.role=='bu':
+                if cpu == os.uname()[1]:continue
+                if cpu in machine_blacklist:
+                    logger.info("skipping blacklisted resource "+str(cpu))
+                    continue
+ 
             count = count+1
             cpu_group.append(cpu)
             age = current_time - os.path.getmtime(idles+cpu)
-            logging.info("found resource "+cpu+" which is "+str(age)+" seconds old")
+            logger.info("found resource "+cpu+" which is "+str(age)+" seconds old")
             if conf.role == 'fu':
                 if count == nstreams:
                   self.AcquireResource(cpu_group,'idle')
@@ -909,12 +1194,13 @@ def AcquireResources(self,mode):
                 if age < 10:
                     cpus = [cpu]
                     self.ContactResource(cpus)
+        return True
         #self.lock.release()
 
     def Start(self):
         self.is_active_run = True
         for resource in self.online_resource_list:
-            logging.info('start run '+str(self.runnumber)+' on cpu(s) '+str(resource.cpu))
+            logger.info('start run '+str(self.runnumber)+' on cpu(s) '+str(resource.cpu))
             if conf.role == 'fu':
                 self.StartOnResource(resource)
             else:
@@ -929,11 +1215,11 @@ def Start(self):
             self.startCompletedChecker()
 
     def StartOnResource(self, resource):
-        logging.debug("StartOnResource called")
+        logger.debug("StartOnResource called")
         resource.statefiledir=conf.watch_directory+'/run'+str(self.runnumber).zfill(conf.run_number_padding)
         mondir = os.path.join(resource.statefiledir,'mon')
         resource.associateddir=mondir
-        logging.info(str(nthreads)+' '+str(nstreams))
+        logger.info(str(nthreads)+' '+str(nstreams))
         resource.StartNewProcess(self.runnumber,
                                  self.online_resource_list.index(resource),
                                  self.arch,
@@ -941,10 +1227,10 @@ def StartOnResource(self, resource):
                                  self.menu,
                                  int(round((len(resource.cpu)*float(nthreads)/nstreams))),
                                  len(resource.cpu))
-        logging.debug("StartOnResource process started")
-        #logging.debug("StartOnResource going to acquire lock")
+        logger.debug("StartOnResource process started")
+        #logger.debug("StartOnResource going to acquire lock")
         #self.lock.acquire()
-        #logging.debug("StartOnResource lock acquired")
+        #logger.debug("StartOnResource lock acquired")
         try:
             os.makedirs(mondir)
         except OSError:
@@ -954,7 +1240,7 @@ def StartOnResource(self, resource):
         fp=None
         stat = []
         if not os.path.exists(monfile):
-            logging.debug("No log file "+monfile+" found, creating one")
+            logger.debug("No log file "+monfile+" found, creating one")
             fp=open(monfile,'w+')
             attempts=0
             while True:
@@ -966,12 +1252,12 @@ def StartOnResource(self, resource):
                         attempts+=1
                         continue
                     else:
-                        logging.error("could not retrieve process parameters")
-                        logging.exception(ex)
+                        logger.error("could not retrieve process parameters")
+                        logger.exception(ex)
                         break
 
         else:
-            logging.debug("Updating existing log file "+monfile)
+            logger.debug("Updating existing log file "+monfile)
             fp=open(monfile,'r+')
             stat=json.load(fp)
             attempts=0
@@ -990,8 +1276,8 @@ def StartOnResource(self, resource):
                         time.sleep(.05)
                         continue
                     else:
-                        logging.error("could not retrieve process parameters")
-                        logging.exception(ex)
+                        logger.error("could not retrieve process parameters")
+                        logger.exception(ex)
                         break
         fp.seek(0)
         fp.truncate()
@@ -1000,11 +1286,34 @@ def StartOnResource(self, resource):
         fp.flush()
         fp.close()
         #self.lock.release()
-        #logging.debug("StartOnResource lock released")
+        #logger.debug("StartOnResource lock released")
+
+    def Stop(self):
+        #used to gracefully stop CMSSW and finish scripts
+        with open(os.path.join(self.dirname,"temp_CMSSW_STOP"),'w') as f:
+          writedoc = {}
+          bu_lumis = []
+          try:
+            bu_eols_files = filter( lambda x: x.endswith("_EoLS.jsn"),os.listdir(self.rawinputdir))
+            bu_lumis = (sorted([int(x.split('_')[1][2:]) for x in bu_eols_files]))
+          except:
+            logger.error("Unable to parse BU EoLS files")
+          if len(bu_lumis):
+              logger.info('last closed lumisection in ramdisk is '+str(bu_lumis[-1]))
+              writedoc['lastLS']=bu_lumis[-1]+2 #current+2
+          else:  writedoc['lastLS']=2
+          json.dump(writedoc,f)
+        try:
+          os.rename(os.path.join(self.dirname,"temp_CMSSW_STOP"),os.path.join(self.dirname,"CMSSW_STOP"))
+        except:pass
+        
 
-    def Shutdown(self,herod=False):
+    def Shutdown(self,herod):
         #herod mode sends sigkill to all process, however waits for all scripts to finish
-        logging.debug("Run:Shutdown called")
+        logger.debug("Run:Shutdown called")
+        global runs_pending_shutdown
+        if self.runnumber in runs_pending_shutdown: runs_pending_shutdown.remove(self.runnumber)
+
         self.is_active_run = False
         try:
             self.changeMarkerMaybe(Run.ABORTED)
@@ -1017,16 +1326,16 @@ def Shutdown(self,herod=False):
             for resource in self.online_resource_list:
                 if conf.role == 'fu':
                     if resource.processstate==100:
-                        logging.info('terminating process '+str(resource.process.pid)+
+                        logger.info('terminating process '+str(resource.process.pid)+
                                      ' in state '+str(resource.processstate))
 
                         if herod:resource.process.kill()
                         else:resource.process.terminate()
-                        logging.info('process '+str(resource.process.pid)+' join watchdog thread')
+                        logger.info('process '+str(resource.process.pid)+' join watchdog thread')
                         #                    time.sleep(.1)
                         resource.join()
-                        logging.info('process '+str(resource.process.pid)+' terminated')
-                    logging.info('releasing resource(s) '+str(resource.cpu))
+                        logger.info('process '+str(resource.process.pid)+' terminated')
+                    logger.info('releasing resource(s) '+str(resource.cpu))
                     resource.clearQuarantined()
                     
                     resource_lock.acquire()
@@ -1034,8 +1343,8 @@ def Shutdown(self,herod=False):
                         try:
                             os.rename(used+cpu,idles+cpu)
                         except OSError:
-                            #@SM:happens if t was quarantined
-                            logging.warning('Unable to find resource file '+used+cpu+'.')
+                            #@SM:happens if it was quarantined
+                            logger.warning('Unable to find resource file '+used+cpu+'.')
                         except Exception as ex:
                             resource_lock.release()
                             raise(ex)
@@ -1055,8 +1364,8 @@ def Shutdown(self,herod=False):
                         self.anelastic_monitor.terminate()
                         self.anelastic_monitor.wait()
             except Exception as ex:
-                logging.info("exception encountered in shutting down anelastic.py "+ str(ex))
-                #logging.exception(ex)
+                logger.info("exception encountered in shutting down anelastic.py "+ str(ex))
+                #logger.exception(ex)
             if conf.use_elasticsearch == True:
                 try:
                     if self.elastic_monitor:
@@ -1066,18 +1375,21 @@ def Shutdown(self,herod=False):
                             self.elastic_monitor.terminate()
                             self.elastic_monitor.wait()
                 except Exception as ex:
-                    logging.info("exception encountered in shutting down elastic.py")
-                    logging.exception(ex)
+                    logger.info("exception encountered in shutting down elastic.py")
+                    if "No child processes" in str(ex):pass
+                    else:logger.exception(ex)
             if self.waitForEndThread is not None:
                 self.waitForEndThread.join()
         except Exception as ex:
-            logging.info("exception encountered in shutting down resources")
-            logging.exception(ex)
+            logger.info("exception encountered in shutting down resources")
+            logger.exception(ex)
 
         global active_runs
+        global active_runs_errors
         active_runs_copy = active_runs[:]
         for run_num in active_runs_copy:
             if run_num == self.runnumber:
+                active_runs_errors.pop(active_runs.index(run_num))
                 active_runs.remove(run_num)
 
         try:
@@ -1087,7 +1399,7 @@ def Shutdown(self,herod=False):
         except:
             pass
 
-        logging.info('Shutdown of run '+str(self.runnumber).zfill(conf.run_number_padding)+' completed')
+        logger.info('Shutdown of run '+str(self.runnumber).zfill(conf.run_number_padding)+' completed')
 
     def ShutdownBU(self):
 
@@ -1108,16 +1420,18 @@ def ShutdownBU(self):
                     time.sleep(.1)
                     self.elastic_monitor.wait()
             except Exception as ex:
-	        logging.info("exception encountered in shutting down elasticbu.py: " + str(ex))
-                #logging.exception(ex)
+                logger.info("exception encountered in shutting down elasticbu.py: " + str(ex))
+                #logger.exception(ex)
 
         global active_runs
+        global active_runs_errors
         active_runs_copy = active_runs[:]
         for run_num in active_runs_copy:
             if run_num == self.runnumber:
+                active_runs_errors.pop(active_runs.index(run_num))
                 active_runs.remove(run_num)
 
-        logging.info('Shutdown of run '+str(self.runnumber).zfill(conf.run_number_padding)+' on BU completed')
+        logger.info('Shutdown of run '+str(self.runnumber).zfill(conf.run_number_padding)+' on BU completed')
 
 
     def StartWaitForEnd(self):
@@ -1127,11 +1441,13 @@ def StartWaitForEnd(self):
             self.waitForEndThread = threading.Thread(target = self.WaitForEnd)
             self.waitForEndThread.start()
         except Exception as ex:
-            logging.info("exception encountered in starting run end thread")
-            logging.info(ex)
+            logger.info("exception encountered in starting run end thread")
+            logger.info(ex)
 
     def WaitForEnd(self):
-        logging.info("wait for end thread!")
+        logger.info("wait for end thread!")
+        global cloud_mode
+        global entering_cloud_mode
         try:
             for resource in self.online_resource_list:
                 resource.disableRestart()
@@ -1139,19 +1455,19 @@ def WaitForEnd(self):
                 if resource.processstate is not None:#was:100
                     if resource.process is not None and resource.process.pid is not None: ppid = resource.process.pid
                     else: ppid="None"
-                    logging.info('waiting for process '+str(ppid)+
+                    logger.info('waiting for process '+str(ppid)+
                                  ' in state '+str(resource.processstate) +
                                  ' to complete ')
                     try:
                         resource.join()
-                        logging.info('process '+str(resource.process.pid)+' completed')
+                        logger.info('process '+str(resource.process.pid)+' completed')
                     except:pass
 #                os.rename(used+resource.cpu,idles+resource.cpu)
                 resource.clearQuarantined()
                 resource.process=None
             self.online_resource_list = []
             if conf.role == 'fu':
-                logging.info('writing complete file')
+                logger.info('writing complete file')
                 self.changeMarkerMaybe(Run.COMPLETE)
                 try:
                     os.remove(conf.watch_directory+'/end'+str(self.runnumber).zfill(conf.run_number_padding))
@@ -1160,29 +1476,43 @@ def WaitForEnd(self):
                     if conf.dqm_machine==False:
                         self.anelastic_monitor.wait()
                 except OSError,ex:
-                    logging.info("Exception encountered in waiting for termination of anelastic:" +str(ex))
+                    logger.info("Exception encountered in waiting for termination of anelastic:" +str(ex))
 
             if conf.use_elasticsearch == True:
                 try:
                     self.elastic_monitor.wait()
                 except OSError,ex:
-                    logging.info("Exception encountered in waiting for termination of anelastic:" +str(ex))
+                    logger.info("Exception encountered in waiting for termination of anelastic:" +str(ex))
             if conf.delete_run_dir is not None and conf.delete_run_dir == True:
                 try:
                     shutil.rmtree(self.dirname)
                 except Exception as ex:
-                    logging.exception(ex)
+                    logger.exception(ex)
 
             global active_runs
-            logging.info("active runs.."+str(active_runs))
+            global active_runs_errors
+            logger.info("active runs.."+str(active_runs))
             for run_num  in active_runs:
                 if run_num == self.runnumber:
+                    active_runs_errors.pop(active_runs.index(run_num))
                     active_runs.remove(run_num)
-            logging.info("new active runs.."+str(active_runs))
+            logger.info("new active runs.."+str(active_runs))
+
+            if cloud_mode==True:
+                resource_lock.acquire()
+                if len(active_runs)>=1:
+                    logger.info("VM mode: waiting for runs: "+str(active_runs)+" to finish")
+                else:
+                    logger.info("No active runs. moving all resource files to cloud")
+                    #give resources to cloud and bail out
+                    move_resources_to_cloud()
+                    entering_cloud_mode=False 
+                resource_lock.release()
 
         except Exception as ex:
-            logging.error("exception encountered in ending run")
-            logging.exception(ex)
+            resource_lock.release()
+            logger.error("exception encountered in ending run")
+            logger.exception(ex)
 
     def changeMarkerMaybe(self,marker):
         dir = self.dirname
@@ -1192,7 +1522,7 @@ def changeMarkerMaybe(self,marker):
             fp = open(dir+'/'+marker,'w+')
             fp.close()
         else:
-            logging.error("There are more than one markers for run "
+            logger.error("There are more than one markers for run "
                           +str(self.runnumber))
             return
 
@@ -1201,8 +1531,8 @@ def startAnelasticWatchdog(self):
             self.anelasticWatchdog = threading.Thread(target = self.runAnelasticWatchdog)
             self.anelasticWatchdog.start()
         except Exception as ex:
-            logging.info("exception encountered in starting anelastic watchdog thread")
-            logging.info(ex)
+            logger.info("exception encountered in starting anelastic watchdog thread")
+            logger.info(ex)
 
     def runAnelasticWatchdog(self):
         try:
@@ -1210,8 +1540,8 @@ def runAnelasticWatchdog(self):
             if self.is_active_run == True:
                 #abort the run
                 self.anelasticWatchdog=None
-                logging.fatal("Premature end of anelastic.py")
-                self.Shutdown()
+                logger.fatal("Premature end of anelastic.py")
+                self.Shutdown(False)
         except:
             pass
 
@@ -1223,14 +1553,14 @@ def stopAnelasticWatchdog(self):
     def startCompletedChecker(self):
         if conf.role == 'bu': #and conf.use_elasticsearch == True:
             try:
-                logging.info('start checking completition of run '+str(self.runnumber))
+                logger.info('start checking completition of run '+str(self.runnumber))
                 #mode 1: check for complete entries in ES
                 #mode 2: check for runs in 'boxes' files
-                self.endChecker = RunCompletedChecker(1,int(self.runnumber),self.online_resource_list,self.dirname, active_runs,self.elastic_monitor)
+                self.endChecker = RunCompletedChecker(conf,1,int(self.runnumber),self.online_resource_list,self.dirname,active_runs,active_runs_errors,self.elastic_monitor)
                 self.endChecker.start()
             except Exception,ex:
-                logging.error('failure to start run completition checker:')
-                logging.exception(ex)
+                logger.error('failure to start run completition checker:')
+                logger.exception(ex)
 
     def checkQuarantinedLimit(self):
         allQuarantined=True
@@ -1248,8 +1578,9 @@ def checkQuarantinedLimit(self):
 
 class RunRanger:
 
-    def __init__(self):
+    def __init__(self,instance):
         self.inotifyWrapper = InotifyWrapper(self)
+        self.instance = instance
 
     def register_inotify_path(self,path,mask):
         self.inotifyWrapper.registerPath(path,mask)
@@ -1258,25 +1589,64 @@ def start_inotify(self):
         self.inotifyWrapper.start()
 
     def stop_inotify(self):
-        logging.info("RunRanger: Stop inotify wrapper")
         self.inotifyWrapper.stop()
-        logging.info("RunRanger: Join inotify wrapper")
         self.inotifyWrapper.join()
-        logging.info("RunRanger: Inotify wrapper returned")
+        logger.info("RunRanger: Inotify wrapper shutdown done")
 
     def process_IN_CREATE(self, event):
         nr=0
         global run_list
-        logging.info('RunRanger: event '+event.fullpath)
+        global runs_pending_shutdown
+        global active_runs
+        global active_runs_errors
+        global cloud_mode
+        global entering_cloud_mode
+        logger.info('RunRanger: event '+event.fullpath)
         dirname=event.fullpath[event.fullpath.rfind("/")+1:]
-        logging.info('RunRanger: new filename '+dirname)
+        logger.info('RunRanger: new filename '+dirname)
         if dirname.startswith('run'):
+
+            if os.path.islink(event.fullpath):
+                logger.info('directory ' + event.fullpath + ' is link. Ignoring this run')
+                return
+            if not os.path.isdir(event.fullpath):
+                logger.info(event.fullpath +' is a file. A directory is needed to start a run.')
+                return
             nr=int(dirname[3:])
             if nr!=0:
                 try:
-                    logging.info('new run '+str(nr))
+                    logger.info('new run '+str(nr))
+                    #terminate quarantined runs     
+                    for q_runnumber in runs_pending_shutdown:
+                        q_run = filter(lambda x: x.runnumber==q_runnumber,run_list)
+                        if len(q_run):
+                            q_run[0].Shutdown(True)#run abort in herod mode (wait for anelastic/elastic to shut down)
+                            time.sleep(.1)
+
+                    if cloud_mode==True and entering_cloud_mode==False:
+                        logger.info("received new run notification in VM mode. Checking if idle cores are available...")
+                        try:
+                            if len(os.listdir(idles))<1:
+                                logger.info("this run is skipped because FU is in VM mode and resources have not been returned")
+                                return
+                            #return all resources to HLTD (TODO:check if VM tool is done)
+                            while True:
+                                resource_lock.acquire()
+                                #retry this operation in case cores get moved around by other means
+                                if cleanup_resources()==True:
+                                    resource_lock.release()
+                                    break
+                                resource_lock.release()
+                                time.sleep(0.1)
+                                logger.warning("could not move all resources, retrying.")
+                            cloud_mode=False
+                        except Exception as ex:
+                            #resource_lock.release()
+                            logger.fatal("failed to disable VM mode when receiving notification for run "+str(nr))
+                            logger.exception(ex)
                     if conf.role == 'fu':
-                        bu_dir = bu_disk_list_ramdisk[0]+'/'+dirname
+                        #bu_dir = random.choice(bu_disk_list_ramdisk_instance)+'/'+dirname
+                        bu_dir = bu_disk_list_ramdisk_instance[0]+'/'+dirname
                         try:
                             os.symlink(bu_dir+'/jsd',event.fullpath+'/jsd')
                         except:
@@ -1294,17 +1664,23 @@ def process_IN_CREATE(self, event):
                                 # create an EoR file that will trigger all the running jobs to exit nicely
                                 open(EoR_file_name, 'w').close()
                                 
-                    run_list.append(Run(nr,event.fullpath,bu_dir))
+                    run_list.append(Run(nr,event.fullpath,bu_dir,self.instance))
                     resource_lock.acquire()
-                    run_list[-1].AcquireResources(mode='greedy')
-                    run_list[-1].Start()
+                    if run_list[-1].AcquireResources(mode='greedy'):
+                        run_list[-1].Start()
+                    else:
+                        run_list.remove(run_list[-1])
                     resource_lock.release()
+                    if conf.role == 'bu' and conf.instance != 'main':
+                        logger.info('creating run symlink in main ramdisk directory')
+                        main_ramdisk = os.path.dirname(os.path.normpath(conf.watch_directory))
+                        os.symlink(event.fullpath,os.path.join(main_ramdisk,os.path.basename(event.fullpath)))
                 except OSError as ex:
-                    logging.error("RunRanger: "+str(ex)+" "+ex.filename)
-                    logging.exception(ex)
+                    logger.error("RunRanger: "+str(ex)+" "+ex.filename)
+                    logger.exception(ex)
                 except Exception as ex:
-                    logging.error("RunRanger: unexpected exception encountered in forking hlt slave")
-                    logging.exception(ex)
+                    logger.error("RunRanger: unexpected exception encountered in forking hlt slave")
+                    logger.exception(ex)
 
         elif dirname.startswith('emu'):
             nr=int(dirname[3:])
@@ -1316,8 +1692,8 @@ def process_IN_CREATE(self, event):
                     bu_emulator.startNewRun(nr)
 
                 except Exception as ex:
-                    logging.info("exception encountered in starting BU emulator run")
-                    logging.info(ex)
+                    logger.info("exception encountered in starting BU emulator run")
+                    logger.info(ex)
 
                 os.remove(event.fullpath)
 
@@ -1330,7 +1706,7 @@ def process_IN_CREATE(self, event):
                     try:
                         runtoend = filter(lambda x: x.runnumber==nr,run_list)
                         if len(runtoend)==1:
-                            logging.info('end run '+str(nr))
+                            logger.info('end run '+str(nr))
                             #remove from run_list to prevent intermittent restarts
                             #lock used to fix a race condition when core files are being moved around
                             resource_lock.acquire()
@@ -1341,34 +1717,34 @@ def process_IN_CREATE(self, event):
                                 runtoend[0].StartWaitForEnd()
                             if bu_emulator and bu_emulator.runnumber != None:
                                 bu_emulator.stop()
-                            #logging.info('run '+str(nr)+' removing end-of-run marker')
+                            #logger.info('run '+str(nr)+' removing end-of-run marker')
                             #os.remove(event.fullpath)
                         elif len(runtoend)==0:
-                            logging.warning('request to end run '+str(nr)
+                            logger.warning('request to end run '+str(nr)
                                           +' which does not exist')
                             os.remove(event.fullpath)
                         else:
-                            logging.error('request to end run '+str(nr)
+                            logger.error('request to end run '+str(nr)
                                           +' has more than one run object - this should '
                                           +'*never* happen')
 
                     except Exception as ex:
                         resource_lock.release()
-                        logging.info("exception encountered when waiting hltrun to end")
-                        logging.info(ex)
+                        logger.info("exception encountered when waiting hltrun to end")
+                        logger.info(ex)
                 else:
-                    logging.error('request to end run '+str(nr)
+                    logger.error('request to end run '+str(nr)
                                   +' which is an invalid run number - this should '
                                   +'*never* happen')
             else:
-                logging.error('request to end run '+str(nr)
+                logger.error('request to end run '+str(nr)
                               +' which is NOT a run number - this should '
                               +'*never* happen')
 
         elif dirname.startswith('herod'):
             os.remove(event.fullpath)
             if conf.role == 'fu':
-                logging.info("killing all CMSSW child processes")
+                logger.info("killing all CMSSW child processes")
                 for run in run_list:
                     run.Shutdown(True)
             elif conf.role == 'bu':
@@ -1378,46 +1754,48 @@ def process_IN_CREATE(self, event):
                 try:
                     dirlist = os.listdir(boxdir)
                     current_time = time.time()
-                    logging.info("sending herod to child FUs")
+                    logger.info("sending herod to child FUs")
                     for name in dirlist:
                         if name == os.uname()[1]:continue
                         age = current_time - os.path.getmtime(boxdir+name)
-                        logging.info('found box '+name+' with keepalive age '+str(age))
+                        logger.info('found box '+name+' with keepalive age '+str(age))
                         if age < 20:
-                            connection = httplib.HTTPConnection(name, conf.cgi_port)
+                            connection = httplib.HTTPConnection(name, conf.cgi_port - self.cgi_instance_port_offset)
                             connection.request("GET",'cgi-bin/herod_cgi.py')
                             response = connection.getresponse()
-                    logging.info("sent herod to all child FUs")
+                    logger.info("sent herod to all child FUs")
                 except Exception as ex:
-                    logging.error("exception encountered in contacting resources")
-                    logging.info(ex)
+                    logger.error("exception encountered in contacting resources")
+                    logger.info(ex)
             run_list=[]
+            active_runs_errors=[]
             active_runs=[]
-
         elif dirname.startswith('populationcontrol'):
-            logging.info("terminating all ongoing runs")
-            for run in run_list:
-                if conf.role=='fu':
-                    run.Shutdown()
-                elif conf.role=='bu':
-                    run.ShutdownBU()
+            if len(run_list)>0:
+                logger.info("terminating all ongoing runs via cgi interface (populationcontrol): "+str(run_list))
+                for run in run_list:
+                    if conf.role=='fu':
+                        run.Shutdown(run.runnumber in runs_pending_shutdown)
+                    elif conf.role=='bu':
+                        run.ShutdownBU()
+                logger.info("terminated all ongoing runs via cgi interface (populationcontrol)")
             run_list = []
+            active_runs_errors=[]
             active_runs=[]
-            logging.info("terminated all ongoing runs via cgi interface (populationcontrol)")
             os.remove(event.fullpath)
 
         elif dirname.startswith('harakiri') and conf.role == 'fu':
             os.remove(event.fullpath)
             pid=os.getpid()
-            logging.info('asked to commit seppuku:'+str(pid))
+            logger.info('asked to commit seppuku:'+str(pid))
             try:
-                logging.info('sending signal '+str(SIGKILL)+' to myself:'+str(pid))
+                logger.info('sending signal '+str(SIGKILL)+' to myself:'+str(pid))
                 retval = os.kill(pid, SIGKILL)
-                logging.info('sent SIGINT to myself:'+str(pid))
-                logging.info('got return '+str(retval)+'waiting to die...and hope for the best')
+                logger.info('sent SIGINT to myself:'+str(pid))
+                logger.info('got return '+str(retval)+'waiting to die...and hope for the best')
             except Exception as ex:
-                logging.error("exception in committing harakiri - the blade is not sharp enough...")
-                logging.error(ex)
+                logger.error("exception in committing harakiri - the blade is not sharp enough...")
+                logger.error(ex)
 
         elif dirname.startswith('quarantined'):
             try:
@@ -1431,30 +1809,35 @@ def process_IN_CREATE(self, event):
                         runtoend = filter(lambda x: x.runnumber==nr,run_list)
                         if len(runtoend)==1:
                             if runtoend[0].checkQuarantinedLimit()==True:
-                                runtoend[0].Shutdown(True)#run abort in herod mode (wait for anelastic/elastic to shut down)
+                                hasHigherRuns = filter(lambda x: x.runnumber>nr,run_list)
+                                if len(hasHigherRuns)>0:
+                                    runtoend[0].Shutdown(True)
+                                else:
+                                    runs_pending_shutdown.append(nr)
                     except Exception as ex:
-                        logging.exception(ex)
+                        logger.exception(ex)
 
         elif dirname.startswith('suspend') and conf.role == 'fu':
-            logging.info('suspend mountpoints initiated')
+            logger.info('suspend mountpoints initiated')
+            replyport = int(dirname[7:]) if dirname[7:].isdigit()==True else conf.cgi_port
             global suspended
             suspended=True
             for run in run_list:
-                run.Shutdown(False)#terminate all ongoing runs
+                run.Shutdown(run.runnumber in runs_pending_shutdown)#terminate all ongoing runs
             run_list=[]
             time.sleep(.5)
             umount_success = cleanup_mountpoints(remount=False)
 
             if umount_success==False:
                 time.sleep(1)
-                logging.error("Suspend initiated from BU failed, trying again...")
+                logger.error("Suspend initiated from BU failed, trying again...")
                 #notifying itself again
                 try:os.remove(event.fullpath)
                 except:pass
                 fp = open(event.fullpath,"w+")
                 fp.close()
                 return 
-                #logging.info("Suspend failed, preparing for harakiri...")
+                #logger.info("Suspend failed, preparing for harakiri...")
                 #time.sleep(.1)
                 #fp = open(os.path.join(os.path.dirname(event.fullpath.rstrip(os.path.sep)),'harakiri'),"w+")
                 #fp.close()
@@ -1471,15 +1854,15 @@ def process_IN_CREATE(self, event):
             #first report to BU that umount was done
             try:
                 if bu_name==None:
-                    logging.fatal("No BU name was found in the bus.config file. Leaving mount points unmounted until the hltd service restart.")
+                    logger.fatal("No BU name was found in the bus.config file. Leaving mount points unmounted until the hltd service restart.")
                     os.remove(event.fullpath)
                     return
-                connection = httplib.HTTPConnection(bu_name, conf.cgi_port+5,timeout=5)
+                connection = httplib.HTTPConnection(bu_name, replyport+20,timeout=5)
                 connection.request("GET",'cgi-bin/report_suspend_cgi.py?host='+os.uname()[1])
                 response = connection.getresponse()
             except Exception as ex:
-                logging.error("Unable to report suspend state to BU "+str(bu_name)+':'+str(conf.cgi_port+5))
-                logging.exception(ex)
+                logger.error("Unable to report suspend state to BU "+str(bu_name)+':'+str(replyport+20))
+                logger.exception(ex)
 
             #loop while BU is not reachable
             while True:
@@ -1493,26 +1876,26 @@ def process_IN_CREATE(self, event):
                                 bu_name=line.split('.')[0]
                                 break
                         except:
-                            logging.info('exception test 1')
+                            logger.info('exception test 1')
                             time.sleep(5)
                             continue
                     if bu_name==None:
-                        logging.info('exception test 2')
+                        logger.info('exception test 2')
                         time.sleep(5)
                         continue
 
-                    logging.info('checking if BU hltd is available...')
-                    connection = httplib.HTTPConnection(bu_name, conf.cgi_port,timeout=5)
+                    logger.info('checking if BU hltd is available...')
+                    connection = httplib.HTTPConnection(bu_name, replyport,timeout=5)
                     connection.request("GET",'cgi-bin/getcwd_cgi.py')
                     response = connection.getresponse()
-                    logging.info('BU hltd is running !...')
+                    logger.info('BU hltd is running !...')
                     #if we got here, the service is back up
                     break
                 except Exception as ex:
                     try:
-                       logging.info('Failed to contact BU hltd service: ' + str(ex.args[0]) +" "+ str(ex.args[1]))
+                       logger.info('Failed to contact BU hltd service: ' + str(ex.args[0]) +" "+ str(ex.args[1]))
                     except:
-                       logging.info('Failed to contact BU hltd service: ')
+                       logger.info('Failed to contact BU hltd service '+str(ex))
                     time.sleep(5)
 
             #mount again
@@ -1520,12 +1903,59 @@ def process_IN_CREATE(self, event):
             try:os.remove(event.fullpath)
             except:pass
             suspended=False
-            logging.info("Remount is performed")
+            logger.info("Remount is performed")
+
+        elif dirname.startswith('exclude') and conf.role == 'fu':
+            #service on this machine is asked to be excluded for cloud use
+            logger.info('machine exclude initiated')
+            resource_lock.acquire()
+            cloud_mode=True
+            entering_cloud_mode=True
+            try:
+                for run in run_list:
+                    if run.runnumber in runs_pending_shutdown:
+                        run.Shutdown(True)
+                    else:
+                        #write signal file for CMSSW to quit with 0 after certain LS
+                        run.Stop()
+            except Exception as ex:
+                logger.fatal("Unable to clear runs. Will not enter VM mode.")
+                logger.exception(ex)
+                cloud_mode=False
+            resource_lock.release()
+            os.remove(event.fullpath)
+
+        elif dirname.startswith('include') and conf.role == 'fu':
+            #TODO: pick up latest working run..
+            tries=1000
+            if cloud_mode==True:
+                while True:
+                    resource_lock.acquire()
+                    #retry this operation in case cores get moved around by other means
+                    if entering_cloud_mode==False and cleanup_resources()==True:
+                        resource_lock.release()
+                        break
+                    resource_lock.release()
+                    time.sleep(0.1)
+                    tries-=1
+                    if tries==0:
+                        logger.fatal("Timeout: taking resources from cloud after waiting for 100 seconds")
+                        cleanup_resources()
+                        entering_cloud_mode=False
+                        break
+                    if (tries%10)==0:
+                        logger.warning("could not move all resources, retrying.")
+                cloud_mode=False
+            os.remove(event.fullpath)
+        elif dirname.startswith('logrestart'):
+            #hook to restart logcollector process manually
+            restartLogCollector(self.instance)
+            os.remove(event.fullpath)
  
-        logging.debug("RunRanger completed handling of event "+event.fullpath)
+        logger.debug("RunRanger completed handling of event "+event.fullpath)
 
     def process_default(self, event):
-        logging.info('RunRanger: event '+event.fullpath+' type '+str(event.mask))
+        logger.info('RunRanger: event '+event.fullpath+' type '+str(event.mask))
         filename=event.fullpath[event.fullpath.rfind("/")+1:]
 
 class ResourceRanger:
@@ -1543,29 +1973,27 @@ def start_inotify(self):
         self.inotifyWrapper.start()
 
     def stop_managed_monitor(self):
-        logging.info("ResourceRanger: Stop managed monitor")
         self.managed_monitor.stop()
-        logging.info("ResourceRanger: Join managed monitor")
         self.managed_monitor.join()
-        logging.info("ResourceRanger: managed monitor returned")
+        logger.info("ResourceRanger: managed monitor shutdown done")
 
     def stop_inotify(self):
-        logging.info("ResourceRanger: Stop inotify wrapper")
         self.inotifyWrapper.stop()
-        logging.info("ResourceRanger: Join inotify wrapper")
         self.inotifyWrapper.join()
-        logging.info("ResourceRanger: Inotify wrapper returned")
+        logger.info("ResourceRanger: Inotify wrapper shutdown done")
 
     def process_IN_MOVED_TO(self, event):
-        logging.debug('ResourceRanger-MOVEDTO: event '+event.fullpath)
+        logger.debug('ResourceRanger-MOVEDTO: event '+event.fullpath)
+        basename = os.path.basename(event.fullpath)
+        if basename.startswith('resource_summary'):return
         try:
             resourcepath=event.fullpath[1:event.fullpath.rfind("/")]
             resourcestate=resourcepath[resourcepath.rfind("/")+1:]
             resourcename=event.fullpath[event.fullpath.rfind("/")+1:]
             resource_lock.acquire()
-            if not (resourcestate == 'online' or resourcestate == 'offline'
+            if not (resourcestate == 'online' or resourcestate == 'cloud'
                     or resourcestate == 'quarantined'):
-                logging.debug('ResourceNotifier: new resource '
+                logger.debug('ResourceNotifier: new resource '
                               +resourcename
                               +' in '
                               +resourcepath
@@ -1575,7 +2003,7 @@ def process_IN_MOVED_TO(self, event):
                 ongoing_runs = filter(lambda x: x.is_active_run==True,run_list)
                 if ongoing_runs:
                     ongoing_run = ongoing_runs[0]
-                    logging.info("ResourceRanger: found active run "+str(ongoing_run.runnumber))
+                    logger.info("ResourceRanger: found active run "+str(ongoing_run.runnumber))
                     """grab resources that become available
                     #@@EM implement threaded acquisition of resources here
                     """
@@ -1584,8 +2012,8 @@ def process_IN_MOVED_TO(self, event):
 		    try:
                         reslist = os.listdir(idlesdir)
                     except Exception as ex:
-                        logging.info("exception encountered in looking for resources")
-                        logging.exception(ex)
+                        logger.info("exception encountered in looking for resources")
+                        logger.exception(ex)
                     #put inotify-ed resource as the first item
                     for resindex,resname in enumerate(reslist):
                         fileFound=False
@@ -1614,9 +2042,9 @@ def process_IN_MOVED_TO(self, event):
                         res = ongoing_run.AcquireResource(resourcenames,resourcestate)
 
                     if acquired_sufficient:
-                        logging.info("ResourceRanger: acquired resource(s) "+str(res.cpu))
+                        logger.info("ResourceRanger: acquired resource(s) "+str(res.cpu))
                         ongoing_run.StartOnResource(res)
-                        logging.info("ResourceRanger: started process on resource "
+                        logger.info("ResourceRanger: started process on resource "
                                      +str(res.cpu))
                 else:
                     #if no run is active, move (x N threads) files from except to idle to be picked up for the next run
@@ -1650,19 +2078,20 @@ def process_IN_MOVED_TO(self, event):
                                     os.rename(broken+resname,idles+resname)
 
                         except Exception as ex:
-                            logging.info("exception encountered in looking for resources in except")
-                            logging.info(ex)
+                            logger.info("exception encountered in looking for resources in except")
+                            logger.info(ex)
 
         except Exception as ex:
-            logging.error("exception in ResourceRanger")
-            logging.error(ex)
+            logger.error("exception in ResourceRanger")
+            logger.error(ex)
         try:
             resource_lock.release()
         except:pass
 
     def process_IN_MODIFY(self, event):
-
-        logging.debug('ResourceRanger-MODIFY: event '+event.fullpath)
+        logger.debug('ResourceRanger-MODIFY: event '+event.fullpath)
+        basename = os.path.basename(event.fullpath)
+        if basename.startswith('resource_summary'):return
         try:
             bus_config = os.path.join(os.path.dirname(conf.resource_base.rstrip(os.path.sep)),'bus.config')
             if event.fullpath == bus_config:
@@ -1673,21 +2102,58 @@ def process_IN_MODIFY(self, event):
                 if self.managed_monitor:
                     self.managed_monitor = system_monitor()
                     self.managed_monitor.start()
-                    logging.info("ResouceRanger: managed monitor is "+str(self.managed_monitor))
+                    logger.info("ResouceRanger: managed monitor is "+str(self.managed_monitor))
         except Exception as ex:
-            logging.error("exception in ResourceRanger")
-            logging.error(ex)
+            logger.error("exception in ResourceRanger")
+            logger.error(ex)
 
     def process_default(self, event):
-        logging.debug('ResourceRanger: event '+event.fullpath +' type '+ str(event.mask))
+        logger.debug('ResourceRanger: event '+event.fullpath +' type '+ str(event.mask))
         filename=event.fullpath[event.fullpath.rfind("/")+1:]
 
+    def process_IN_CLOSE_WRITE(self, event):
+        logger.debug('ResourceRanger-IN_CLOSE_WRITE: event '+event.fullpath)
+        global machine_blacklist
+        resourcepath=event.fullpath[0:event.fullpath.rfind("/")]
+        basename = os.path.basename(event.fullpath)
+        if basename.startswith('resource_summary'):return
+        if conf.role=='fu':return
+        if basename == os.uname()[1]:return
+        if basename == 'blacklist':
+            with open(os.path.join(conf.watch_directory,'appliance','blacklist','r')) as fi:
+                try:
+                    machine_blacklist = json.load(fi)
+                except:
+                    pass
+        if resourcepath.endswith('boxes'):
+            global boxinfoFUMap
+            if basename in machine_blacklist:
+                try:boxinfoFUMap.remove(basename)
+                except:pass
+            else:
+                try:
+                    infile = fileHandler(event.fullpath)
+                    current_time = time.time()
+                    boxinfoFUMap[basename] = [infile.data,current_time]
+                except Exception as ex:
+                    logger.error("Unable to read of parse boxinfo file "+basename)
+                    logger.exception(ex)
+ 
+
 
 class hltd(Daemon2,object):
-    def __init__(self, pidfile):
-        Daemon2.__init__(self,pidfile,'hltd')
+    def __init__(self, instance):
+        self.instance=instance
+        Daemon2.__init__(self,'hltd',instance,'hltd')
 
     def stop(self):
+        #read configuration file
+        try:
+            setFromConf(self.instance)
+        except Exception as ex:
+            print " CONFIGURATION error:",str(ex),"(check configuration file) [  \033[1;31mFAILED\033[0;39m  ]"
+            sys.exit(4)
+
         if self.silentStatus():
             try:
                 if os.path.exists(conf.watch_directory+'/populationcontrol'):
@@ -1697,13 +2163,18 @@ def stop(self):
                 count = 10
                 while count:
                     os.stat(conf.watch_directory+'/populationcontrol')
-                    sys.stdout.write('o.o')
+                    if count==10:
+                      sys.stdout.write(' o.o')
+                    else:
+                      sys.stdout.write('o.o')
                     sys.stdout.flush()
-                    time.sleep(1.)
+                    time.sleep(.5)
                     count-=1
             except OSError, err:
+                time.sleep(.1)
                 pass
             except IOError, err:
+                time.sleep(.1)
                 pass
         super(hltd,self).stop()
 
@@ -1713,8 +2184,15 @@ def run(self):
         infer it from the name of the machine
         """
 
+        #read configuration file
+        setFromConf(self.instance)
+        logger.info(" ")
+        logger.info(" ")
+        logger.info("<<<< ---- hltd start : instance " + self.instance + " ---- >>>>")
+        logger.info(" ")
+
         if conf.enabled==False:
-            logging.warning("Service is currently disabled.")
+            logger.warning("Service is currently disabled.")
             sys.exit(1)
 
         if conf.role == 'fu':
@@ -1722,8 +2200,11 @@ def run(self):
             """
             cleanup resources
             """
+            while True:
+                if cleanup_resources()==True:break
+                time.sleep(0.1)
+                logger.warning("retrying cleanup_resources")
 
-            cleanup_resources()
             """
             recheck mount points
             this is done at start and whenever the file /etc/appliance/bus.config is modified
@@ -1740,6 +2221,13 @@ def run(self):
             except:
                 pass
 
+        if conf.role == 'bu':
+            global machine_blacklist
+            update_success,machine_blacklist=updateBlacklist()
+            global ramdisk_submount_size
+            if self.instance == 'main':
+                #if there are other instance mountpoints in ramdisk, they will be subtracted from size estimate
+                ramdisk_submount_size = submount_size(conf.watch_directory)
 
         """
         the line below is a VERY DIRTY trick to address the fact that
@@ -1751,39 +2239,47 @@ def run(self):
         watch_directory = os.readlink(conf.watch_directory) if os.path.islink(conf.watch_directory) else conf.watch_directory
         resource_base = os.readlink(conf.resource_base) if os.path.islink(conf.resource_base) else conf.resource_base
 
+        if conf.use_elasticsearch == True:
+            time.sleep(.2)
+            restartLogCollector(self.instance)
+
         #start boxinfo elasticsearch updater
+        global nsslock
         boxInfo = None
         if conf.role == 'bu' and conf.use_elasticsearch == True:
-            boxInfo = BoxInfoUpdater(watch_directory)
+            boxInfo = BoxInfoUpdater(watch_directory,conf,nsslock)
             boxInfo.start()
 
-        logCollector = None
-        if conf.use_elasticsearch == True:
-            logging.info("starting logcollector.py")
-            logcolleccor_args = ['/opt/hltd/python/logcollector.py',]
-            logCollector = subprocess.Popen(['/opt/hltd/python/logcollector.py'],preexec_fn=preexec_function,close_fds=True)
-
-        runRanger = RunRanger()
+        runRanger = RunRanger(self.instance)
         runRanger.register_inotify_path(watch_directory,inotify.IN_CREATE)
         runRanger.start_inotify()
-        logging.info("started RunRanger  - watch_directory " + watch_directory)
+        logger.info("started RunRanger  - watch_directory " + watch_directory)
+
+        appliance_base=resource_base
+        if resource_base.endswith('/'):
+            resource_base = resource_base[:-1]
+        if resource_base.rfind('/')>0:
+            appliance_base = resource_base[:resource_base.rfind('/')]
 
         rr = ResourceRanger()
         try:
-            imask  = inotify.IN_MOVED_TO | inotify.IN_CREATE | inotify.IN_DELETE | inotify.IN_MODIFY
             if conf.role == 'bu':
+                pass
                 #currently does nothing on bu
+                imask  = inotify.IN_MOVED_TO | inotify.IN_CLOSE_WRITE | inotify.IN_DELETE
                 rr.register_inotify_path(resource_base, imask)
                 rr.register_inotify_path(resource_base+'/boxes', imask)
             else:
-                rr.register_inotify_path(resource_base, imask)
+                imask_appl  = inotify.IN_MODIFY
+                imask  = inotify.IN_MOVED_TO
+                rr.register_inotify_path(appliance_base, imask_appl)
                 rr.register_inotify_path(resource_base+'/idle', imask)
                 rr.register_inotify_path(resource_base+'/except', imask)
             rr.start_inotify()
-            logging.info("started ResourceRanger - watch_directory "+resource_base)
+            logger.info("started ResourceRanger - watch_directory "+resource_base)
         except Exception as ex:
-            logging.error("Exception caught in starting notifier2")
-            logging.error(ex)
+            logger.error("Exception caught in starting ResourceRanger notifier")
+            logger.error(ex)
 
         try:
             cgitb.enable(display=0, logdir="/tmp")
@@ -1791,48 +2287,53 @@ def run(self):
             # the following allows the base directory of the http
             # server to be 'conf.watch_directory, which is writeable
             # to everybody
-            if os.path.exists(conf.watch_directory+'/cgi-bin'):
-                os.remove(conf.watch_directory+'/cgi-bin')
-            os.symlink('/opt/hltd/cgi',conf.watch_directory+'/cgi-bin')
+            if os.path.exists(watch_directory+'/cgi-bin'):
+                os.remove(watch_directory+'/cgi-bin')
+            os.symlink('/opt/hltd/cgi',watch_directory+'/cgi-bin')
 
             handler.cgi_directories = ['/cgi-bin']
-            logging.info("starting http server on port "+str(conf.cgi_port))
+            logger.info("starting http server on port "+str(conf.cgi_port))
             httpd = BaseHTTPServer.HTTPServer(("", conf.cgi_port), handler)
 
-            logging.info("hltd serving at port "+str(conf.cgi_port)+" with role "+conf.role)
-            os.chdir(conf.watch_directory)
+            logger.info("hltd serving at port "+str(conf.cgi_port)+" with role "+conf.role)
+            os.chdir(watch_directory)
+            logger.info("<<<< ---- hltd instance " + self.instance + ": init complete, starting httpd ---- >>>>")
+            logger.info("")
             httpd.serve_forever()
         except KeyboardInterrupt:
-            logging.info("terminating all ongoing runs")
-            for run in run_list:
-                if conf.role=='fu':
-                    run.Shutdown()
-                elif conf.role=='bu':
-                    run.ShutdownBU()
-            logging.info("terminated all ongoing runs")
-            logging.info("stopping run ranger inotify helper")
+            logger.info("stop signal detected")
+            if len(run_list)>0:
+                logger.info("terminating all ongoing runs")
+                for run in run_list:
+                    if conf.role=='fu':
+                        global runs_pending_shutdown
+                        run.Shutdown(run.runnumber in runs_pending_shutdown)
+                    elif conf.role=='bu':
+                        run.ShutdownBU()
+                logger.info("terminated all ongoing runs")
             runRanger.stop_inotify()
-            logging.info("stopping resource ranger inotify helper")
             rr.stop_inotify()
             if boxInfo is not None:
-                logging.info("stopping boxinfo updater")
+                logger.info("stopping boxinfo updater")
                 boxInfo.stop()
+            global logCollector
             if logCollector is not None:
+                logger.info("terminating logCollector")
                 logCollector.terminate()
-            logging.info("stopping system monitor")
+            logger.info("stopping system monitor")
             rr.stop_managed_monitor()
-            logging.info("closing httpd socket")
+            logger.info("closing httpd socket")
             httpd.socket.close()
-            logging.info(threading.enumerate())
-            logging.info("unmounting mount points")
+            logger.info(threading.enumerate())
+            logger.info("unmounting mount points")
             if cleanup_mountpoints(remount=False)==False:
               time.sleep(1)
               cleanup_mountpoints(remount=False)
             
-            logging.info("shutdown of service completed")
+            logger.info("shutdown of service (main thread) completed")
         except Exception as ex:
-            logging.info("exception encountered in operating hltd")
-            logging.info(ex)
+            logger.info("exception encountered in operating hltd")
+            logger.info(ex)
             runRanger.stop_inotify()
             rr.stop_inotify()
             rr.stop_managed_monitor()
@@ -1840,5 +2341,7 @@ def run(self):
 
 
 if __name__ == "__main__":
-    daemon = hltd('/var/run/hltd.pid')
+    import procname
+    procname.setprocname('hltd')
+    daemon = hltd(sys.argv[1])
     daemon.start()
diff --git a/python/hltdconf.py b/python/hltdconf.py
index a93765c..70578d8 100644
--- a/python/hltdconf.py
+++ b/python/hltdconf.py
@@ -33,8 +33,14 @@ def __init__(self, conffile):
         self.use_elasticsearch = bool(self.use_elasticsearch=="True")
         self.close_es_index = bool(self.close_es_index=="True")
         self.cgi_port = int(self.cgi_port)
+        self.cgi_instance_port_offset = int(self.cgi_instance_port_offset)
         self.soap2file_port = int(self.soap2file_port)
 
+        try:
+          self.instance_same_destination=bool(self.instance_same_destination=="True")
+        except:
+          self.instance_same_destination = True
+
         self.dqm_machine = bool(self.dqm_machine=="True")
         if self.dqm_machine:
             self.resource_base = self.dqm_resource_base
@@ -48,7 +54,7 @@ def __init__(self, conffile):
         self.service_log_level = getattr(logging,self.service_log_level)
         self.autodetect_parameters()
 
-        #read cluster name from elastic search configuration file (used to specify index name)
+        #read cluster name from elastic search configuration file (if not set up directly)
         if not self.elastic_cluster and self.use_elasticsearch == True:
             f = None
             try:
@@ -63,14 +69,10 @@ def __init__(self, conffile):
                         self.elastic_cluster = line.split(':')[1].strip()
       
     def dump(self):
-        logging.info( '<CONFIGURATION time='+str(datetime.datetime.now())+'>')
-        logging.info( 'conf.user            '+self.user)
-        logging.info( 'conf.role            '+ self.role)
-        logging.info( 'conf.cmssw_base      '+ self.cmssw_base)
-        logging.info( '</CONFIGURATION>')
+        logging.info( '<hltd STATUS time="' + str(datetime.datetime.now()).split('.')[0] + '" user:' + self.user + ' role:' + self.role + '>')
 
     def autodetect_parameters(self):
-        if not self.role and 'bu' in os.uname()[1]:
+        if not self.role and (os.uname()[1].startswith('bu-') or os.uname()[1].startswith('dvbu-')):
             self.role = 'bu'
         elif not self.role:
             self.role = 'fu'
@@ -78,5 +80,12 @@ def autodetect_parameters(self):
             if self.role == 'bu': self.watch_directory='/fff/ramdisk'
             if self.role == 'fu': self.watch_directory='/fff/data'
 
+def initConf(instance='main'):
+    conf=None
+    try:
+        if instance!='main':
+            conf = hltdConf('/etc/hltd-'+instance+'.conf')
+    except:pass
+    if conf==None and instance=='main': conf = hltdConf('/etc/hltd.conf')
+    return conf
 
-conf = hltdConf('/etc/hltd.conf')
diff --git a/python/logcollector.py b/python/logcollector.py
index 4662558..ac82a9e 100755
--- a/python/logcollector.py
+++ b/python/logcollector.py
@@ -15,7 +15,7 @@
 import _inotify as inotify
 import threading
 import Queue
-import json
+import simplejson as json
 import logging
 import collections
 import subprocess
@@ -733,12 +733,7 @@ def __init__(self,es_server_url):
                 ip_url=getURLwithIP(es_server_url)
                 self.es = ElasticSearch(ip_url)
                 #update in case of new documents added to mapping definition
-                for key in mappings.central_hltdlogs_mapping:
-                    doc = mappings.central_hltdlogs_mapping[key]
-                    res = requests.get(ip_url+'/'+self.index_name+'/'+key+'/_mapping')
-                    #only update if mapping is empty
-                    if res.status_code==200 and res.content.strip()=='{}':
-                        requests.post(ip_url+'/'+self.index_name+'/'+key+'/_mapping',str(doc))
+                self.updateMappingMaybe(ip_url)
                 break
             except (ElasticHttpError,ConnectionError,Timeout) as ex:
                 #try to reconnect with different IP from DNS load balancing
@@ -783,6 +778,14 @@ def elasticize_log(self,type,severity,timestamp,msg):
                 self.es.index(self.index_name,'hltdlog',document)
             except:
                 logger.warning('failed connection attempts to ' + self.es_server_url)
+
+    def updateMappingMaybe(self,ip_url):
+        for key in mappings.central_hltdlogs_mapping:
+                doc = mappings.central_hltdlogs_mapping[key]
+                res = requests.get(ip_url+'/'+self.index_name+'/'+key+'/_mapping')
+                #only update if mapping is empty
+                if res.status_code==200 and res.content.strip()=='{}':
+                    requests.post(ip_url+'/'+self.index_name+'/'+key+'/_mapping',json.dumps(doc))
  
 class HLTDLogParser(threading.Thread):
     def __init__(self,dir,file,loglevel,esHandler,skipToEnd):
@@ -951,8 +954,14 @@ def registerSignal(eventRef):
     
 
 if __name__ == "__main__":
+
+    import procname
+    procname.setprocname('logcol')
+
+    conf=initConf(sys.argv[1])
+
     logging.basicConfig(filename=os.path.join(conf.log_dir,"logcollector.log"),
-                    level=logging.INFO,
+                    level=conf.service_log_level,
                     format='%(levelname)s:%(asctime)s - %(funcName)s - %(message)s',
                     datefmt='%Y-%m-%d %H:%M:%S')
     logger = logging.getLogger(os.path.basename(__file__))
@@ -988,9 +997,10 @@ def registerSignal(eventRef):
     threadEvent = threading.Event()
     registerSignal(threadEvent)
 
-    hltdlogdir = '/var/log/hltd'
+    hltdlogdir = conf.log_dir
     hltdlogs = ['hltd.log','anelastic.log','elastic.log','elasticbu.log']
-    cmsswlogdir = '/var/log/hltd/pid'
+    cmsswlogdir = os.path.join(conf.log_dir,'pid')
+
 
     mask = inotify.IN_CREATE
     logger.info("starting CMSSW log collector for "+cmsswlogdir)
diff --git a/python/mappings.py b/python/mappings.py
index fbc6141..9da6b62 100644
--- a/python/mappings.py
+++ b/python/mappings.py
@@ -139,7 +139,7 @@
                     'processed'     :{'type':'integer'},
                     'accepted'      :{'type':'integer'},
                     'errorEvents'   :{'type':'integer'},
-                    'size'          :{'type':'integer'},
+                    'size'          :{'type':'long'},
                     }
                 },
             'macromerge' : {
@@ -154,7 +154,7 @@
                     'processed'     :{'type':'integer'},
                     'accepted'      :{'type':'integer'},
                     'errorEvents'   :{'type':'integer'},
-                    'size'          :{'type':'integer'},
+                    'size'          :{'type':'long'},
                     }
                 }
 
@@ -165,17 +165,22 @@
             'properties' : {
               'fm_date'       :{'type':'date'},
               'id'            :{'type':'string'},
+              'host'          :{'type':'string',"index":"not_analyzed"},
+              'appliance'     :{'type':'string',"index":"not_analyzed"},
+              'instance'      :{'type':'string',"index":"not_analyzed"},
               'broken'        :{'type':'integer'},
               'used'          :{'type':'integer'},
               'idles'         :{'type':'integer'},
               'quarantined'   :{'type':'integer'},
+              'cloud'         :{'type':'integer'},
               'usedDataDir'   :{'type':'integer'},
               'totalDataDir'  :{'type':'integer'},
               'usedRamdisk'   :{'type':'integer'},
               'totalRamdisk'  :{'type':'integer'},
               'usedOutput'    :{'type':'integer'},
               'totalOutput'   :{'type':'integer'},
-              'activeRuns'    :{'type':'string'}
+              'activeRuns'    :{'type':'string'},
+              'activeRunsErrors':{'type':'string',"index":"not_analyzed"},
               },
             '_timestamp' : { 
               'enabled'   : True,
@@ -193,6 +198,7 @@
               'used'          :{'type':'integer'},
               'idles'         :{'type':'integer'},
               'quarantined'   :{'type':'integer'},
+              'cloud'         :{'type':'integer'},
               'usedDataDir'   :{'type':'integer'},
               'totalDataDir'  :{'type':'integer'},
               'usedRamdisk'   :{'type':'integer'},
@@ -200,38 +206,17 @@
               'usedOutput'    :{'type':'integer'},
               'totalOutput'   :{'type':'integer'},
               'activeRuns'    :{'type':'string'},
-              'hosts'         :{'type':'string',"index":"not_analyzed"}
-              },
-            '_timestamp' : { 
-              'enabled'   : True,
-              'store'     : "yes",
-              "path"      : "fm_date"
-              }
-          },
-          'boxinfo_last' : {#deprecated
-            '_id'        :{'path':'id'},
-            'properties' : {
-              'fm_date'       :{'type':'date'},
-              'id'            :{'type':'string'},
-              'broken'        :{'type':'integer'},
-              'used'          :{'type':'integer'},
-              'idles'         :{'type':'integer'},
-              'quarantined'   :{'type':'integer'},
-              'usedDataDir'   :{'type':'integer'},
-              'totalDataDir'  :{'type':'integer'},
-              'usedRamdisk'   :{'type':'integer'},
-              'totalRamdisk'  :{'type':'integer'},
-              'usedOutput'    :{'type':'integer'},
-              'totalOutput'   :{'type':'integer'},
-              'activeRuns'    :{'type':'string'}
+              'hosts'           :{'type':'string',"index":"not_analyzed"},
+              'blacklistedHosts':{'type':'string',"index":"not_analyzed"},
+              'host'            :{'type':'string',"index":"not_analyzed"},
+              'instance'        :{'type':'string',"index":"not_analyzed"}
               },
             '_timestamp' : { 
               'enabled'   : True,
               'store'     : "yes",
               "path"      : "fm_date"
               }
-            }
-          
+            },
           }
 
 
diff --git a/python/setupmachine.py b/python/setupmachine.py
index cf5dde4..9e875c9 100755
--- a/python/setupmachine.py
+++ b/python/setupmachine.py
@@ -2,6 +2,9 @@
 
 import os,sys,socket
 import shutil
+import json
+import subprocess
+import shutil
 
 import time
 
@@ -34,10 +37,6 @@
 dblogin = 'empty'
 dbpwd = 'empty'
 equipmentSet = 'latest'
-default_eqset_daq2val = 'eq_140325_attributes'
-#default_eqset_daq2 = 'eq_140430_mounttest'
-#default_eqset_daq2 = 'eq_14-508_emu'
-default_eqset_daq2 = 'eq_140522_emu'
 minidaq_list = ["bu-c2f13-21-01","bu-c2f13-23-01","bu-c2f13-25-01","bu-c2f13-27-01",
                 "fu-c2f13-17-01","fu-c2f13-17-02","fu-c2f13-17-03","fu-c2f13-17-04"
                 "fu-c2f13-19-01","fu-c2f13-19-02","fu-c2f13-19-03","fu-c2f13-19-04"]
@@ -45,27 +44,52 @@
             "fu-c2f13-39-03","fu-c2f13-39-04"]
 ed_list = ["bu-c2f13-29-01","fu-c2f13-41-01","fu-c2f13-41-02",
            "fu-c2f13-41-03","fu-c2f13-41-04"]
+
+#es_cdaq_list = ["srv-c2a11-07-01","srv-c2a11-08-01","srv-c2a11-09-01","srv-c2a11-10-01",
+#                "srv-c2a11-11-01","srv-c2a11-14-01","srv-c2a11-15-01","srv-c2a11-16-01",
+#                "srv-c2a11-17-01","srv-c2a11-18-01","srv-c2a11-19-01","srv-c2a11-20-01",
+#                "srv-c2a11-21-01","srv-c2a11-22-01","srv-c2a11-23-01","srv-c2a11-26-01",
+#                "srv-c2a11-27-01","srv-c2a11-28-01","srv-c2a11-29-01","srv-c2a11-30-01"]
+#
+#es_tribe_list = ["srv-c2a11-31-01","srv-c2a11-32-01","srv-c2a11-33-01","srv-c2a11-34-01",
+#                "srv-c2a11-35-01","srv-c2a11-38-01","srv-c2a11-39-01","srv-c2a11-40-01",
+#                "srv-c2a11-41-01","srv-c2a11-42-01"]
+
+tribe_ignore_list = ['bu-c2f13-29-01','bu-c2f13-31-01']
+
 myhost = os.uname()[1]
 
-def countCPUs():
-    fp=open('/proc/cpuinfo','r')
-    resource_count = 0
-    for line in fp:
-        if line.startswith('processor'):
-            resource_count+=1
-    return resource_count
+#testing dual mount point
+vm_override_buHNs = {
+                     "fu-vm-01-01.cern.ch":["bu-vm-01-01","bu-vm-01-01"],
+                     "fu-vm-01-02.cern.ch":["bu-vm-01-01"],
+                     "fu-vm-02-01.cern.ch":["bu-vm-01-01","bu-vm-01-01"],
+                     "fu-vm-02-02.cern.ch":["bu-vm-01-01"]
+                     }
 
 def getmachinetype():
 
     #print "running on host ",myhost
     if   myhost.startswith('dvrubu-') or myhost.startswith('dvfu-') : return 'daq2val','fu'
     elif myhost.startswith('dvbu-') : return 'daq2val','bu'
-    elif myhost.startswith('bu-') : return 'daq2','bu'
     elif myhost.startswith('fu-') : return 'daq2','fu'
-    elif myhost.startswith('cmsdaq-401b28') : return 'test','fu'
-    elif myhost.startswith('dvfu-') : return 'test','fu'
+    elif myhost.startswith('bu-') : return 'daq2','bu'
+    elif myhost.startswith('srv-') :
+        try:
+            es_cdaq_list = socket.gethostbyname_ex('es-cdaq')[2]
+            es_tribe_list = socket.gethostbyname_ex('es-tribe')[2]
+            myaddr = socket.gethostbyname(myhost)
+            if myaddr in es_cdaq_list:
+                return 'es','escdaq'
+            elif myaddr in es_tribe_list:
+                return 'es','tribe'
+            else:
+                return 'unknown','unknown'
+        except socket.gaierror, ex:
+            print 'dns lookup error ',str(ex)
+            raise ex  
     else: 
-       print "debug"
+       print "unknown machine type"
        return 'unknown','unknown'
     
 
@@ -94,7 +118,7 @@ def checkModifiedConfigInFile(file):
     else:zone=tzones[0]
 
     for l in lines:
-        if l.strip().startswith("#edited by fff meta rpm at "+getTimeString()):
+        if l.strip().startswith("#edited by fff meta rpm"):
             return True
     return False
     
@@ -102,24 +126,36 @@ def checkModifiedConfigInFile(file):
 
 def checkModifiedConfig(lines):
     for l in lines:
-        if l.strip().startswith("#edited by fff meta rpm at "+getTimeString()):
+        if l.strip().startswith("#edited by fff meta rpm"):
             return True
     return False
-    
+
+
+#alternates between two data inteface indices based on host naming convention
+def name_identifier():
+  try:
+      nameParts = os.uname()[1].split('-')
+      return (int(nameParts[-1]) * int(nameParts[-2]/2)) % 2
+  except:
+      return 0
+
+
 
 def getBUAddr(parentTag,hostname):
 
     global equipmentSet
     #con = cx_Oracle.connect('CMS_DAQ2_TEST_HW_CONF_W/'+dbpwd+'@'+dbhost+':10121/int2r_lb.cern.ch',
-    #equipmentSet = 'eq_140325_attributes'
-
-    if equipmentSet == 'default':
-        if parentTag == 'daq2val':
-            equipmentSet = default_eqset_daq2val
-        if parentTag == 'daq2':
-            equipmentSet = default_eqset_daq2
 
     if env == "vm":
+
+        try:
+            #cluster in openstack that is not (yet) in mysql
+            retval = []
+            for bu_hn in vm_override_buHNs[hostname]:
+              retval.append(["myBU",bu_hn])
+            return retval
+        except:
+            pass
         con = MySQLdb.connect( host= dbhost, user = dblogin, passwd = dbpwd, db = dbsid)
     else:
         if parentTag == 'daq2':
@@ -131,7 +167,7 @@ def getBUAddr(parentTag,hostname):
                 con = cx_Oracle.connect(dblogin+'/'+dbpwd+'@'+dbhost+':10121/'+dbsid,
                           cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
         else:
-            con = cx_Oracle.connect('CMS_DAQ2_TEST_HW_CONF_W/'+dbpwd+'@int2r2-v.cern.ch:10121/int2r_lb.cern.ch',
+            con = cx_Oracle.connect('CMS_DAQ2_TEST_HW_CONF_R/'+dbpwd+'@int2r2-v.cern.ch:10121/int2r_lb.cern.ch',
                           cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
     
     #print con.version
@@ -175,7 +211,6 @@ def getBUAddr(parentTag,hostname):
       cur.execute(qstring)
     else:
       print "query equipment set",parentTag+'/'+equipmentSet
-      #print '\n',qstring2
       cur.execute(qstring2)
 
     retval = []
@@ -185,19 +220,64 @@ def getBUAddr(parentTag,hostname):
     #print retval
     return retval
 
+def getAllBU(requireFU=False):
+
+    #setups = ['daq2','daq2val']
+    parentTag = 'daq2'
+    if True:
+    #if parentTag == 'daq2':
+        if dbhost.strip()=='null':
+            #con = cx_Oracle.connect('CMS_DAQ2_HW_CONF_W','pwd','cms_rcms',
+            con = cx_Oracle.connect(dblogin,dbpwd,dbsid,
+                      cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
+        else:
+            con = cx_Oracle.connect(dblogin+'/'+dbpwd+'@'+dbhost+':10121/'+dbsid,
+                      cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
+    #else:
+    #    con = cx_Oracle.connect('CMS_DAQ2_TEST_HW_CONF_W/'+dbpwd+'@int2r2-v.cern.ch:10121/int2r_lb.cern.ch',
+    #                  cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
+ 
+    cur = con.cursor()
+    retval = []
+    if requireFU==False:
+        qstring= "select dnsname from DAQ_EQCFG_DNSNAME where (dnsname like 'bu-%' OR dnsname like '__bu-%') \
+                  AND eqset_id = (select eqset_id from DAQ_EQCFG_EQSET where tag='"+parentTag.upper()+"' AND \
+                                  ctime = (SELECT MAX(CTIME) FROM DAQ_EQCFG_EQSET WHERE tag='"+parentTag.upper()+"'))"
+
+    else:
+        qstring = "select attr_value from \
+	                DAQ_EQCFG_HOST_ATTRIBUTE ha,       \
+	                DAQ_EQCFG_HOST_NIC hn,              \
+	                DAQ_EQCFG_DNSNAME d                  \
+	                where                                 \
+	                ha.eqset_id=hn.eqset_id AND            \
+			hn.eqset_id=d.eqset_id AND              \
+			ha.host_id = hn.host_id AND              \
+			ha.attr_name like 'myBU%' AND             \
+			hn.nic_id = d.nic_id AND                   \
+			d.dnsname like 'fu-%'                       \
+			AND d.eqset_id = (select eqset_id from DAQ_EQCFG_EQSET \
+			where tag='"+parentTag.upper()+"' AND                    \
+			ctime = (SELECT MAX(CTIME) FROM DAQ_EQCFG_EQSET WHERE tag='"+parentTag.upper()+"'))"
+
+
+
+
+    cur.execute(qstring)
+
+    for res in cur:
+        retval.append(res[0])
+    cur.close()
+    retval = sorted(list(set(map(lambda v: v.split('.')[0], retval))))
+    print retval
+    return retval
+
 
 def getSelfDataAddr(parentTag):
 
 
     global equipmentSet
     #con = cx_Oracle.connect('CMS_DAQ2_TEST_HW_CONF_W/'+dbpwd+'@'+dbhost+':10121/int2r_lb.cern.ch',
-    #equipmentSet = 'eq_140325_attributes'
-
-    if equipmentSet == 'default':
-        if parentTag == 'daq2val':
-            equipmentSet = default_eqset_daq2val
-        if parentTag == 'daq2':
-            equipmentSet = default_eqset_daq2
 
     con = cx_Oracle.connect(dblogin+'/'+dbpwd+'@'+dbhost+':10121/'+dbsid,
                         cclass="FFFSETUP",purity = cx_Oracle.ATTR_PURITY_SELF)
@@ -235,13 +315,27 @@ def getSelfDataAddr(parentTag):
 
     return retval
 
+def getInstances(hostname):
+    #instance.input example:
+    #{"cmsdaq-401b28.cern.ch":{"names":["main","ecal"],"sizes":[40,20]}} #size is in megabytes
+    #BU can have multiple instances, FU should have only one specified. If none, any host is assumed to have only main instance
+    try:
+       with open('/opt/fff/instances.input','r') as fi:
+           doc = json.load(fi)
+           return doc[hostname]['names'],doc[hostname]['sizes']
+    except:
+        return ["main"],0
+
 
 class FileManager:
-    def __init__(self,file,sep,edited,os1='',os2=''):
+    def __init__(self,file,sep,edited,os1='',os2='',recreate=False):
         self.name = file
-        f = open(file,'r')
-        self.lines = f.readlines()
-        f.close()
+        if recreate==False:
+            f = open(file,'r')
+            self.lines = f.readlines()
+            f.close()
+        else:
+            self.lines=[]
         self.sep = sep
         self.regs = []
         self.remove = []
@@ -259,7 +353,7 @@ def removeEntry(self,key):
     def commit(self):
         out = []
         if self.edited  == False:
-            out.append('#edited by fff meta rpm\n')
+            out.append('#edited by fff meta rpm at '+getTimeString()+'\n')
 
         #first removing elements
         for rm in self.remove:
@@ -298,6 +392,8 @@ def commit(self):
                 if insertionDone == False:
                     self.lines.append(toAdd)
         for l in self.lines:
+            #already written
+            if l.startswith("#edited by fff meta rpm"):continue
             out.append(l)
         #print "file ",self.name,"\n\n"
         #for o in out: print o
@@ -344,11 +440,6 @@ def restoreFileMaybe(file):
         if 'elasticsearch' in selection:
             restoreFileMaybe(elasticsysconf)
             restoreFileMaybe(elasticconf)
-        if 'hltd' in selection:
-            try:
-                os.remove(os.path.join(backup_dir,os.path.basename(busconfig)))
-            except:
-                pass
 
         sys.exit(0)
 
@@ -444,13 +535,15 @@ def restoreFileMaybe(file):
     dqmmachine = 'False'
     execdir = '/opt/hltd'
     resourcefract = '0.5'
+
     if cluster == 'daq2val':
-        runindex_name = 'dv'        
+        runindex_name = 'dv'
     elif cluster == 'daq2':
         runindex_name = 'cdaq'
         if myhost in minidaq_list:
             runindex_name = 'minidaq'
         if myhost in dqm_list or myhost in ed_list:
+
             use_elasticsearch = 'False'
             runindex_name = 'dqm'
             cmsswloglevel = 'DISABLED'
@@ -470,66 +563,35 @@ def restoreFileMaybe(file):
                 cmssw_base = '/home/dqmdevlocal'
                 execdir = '/home/dqmdevlocal/output' ##not yet 
 
-        #hardcode minidaq hosts until role is available
-        #if cnhostname == 'bu-c2f13-27-01.cms' or cnhostname == 'fu-c2f13-19-03.cms' or cnhostname == 'fu-c2f13-19-04.cms':
-        #    runindex_name = 'runindex_minidaq'
-        #hardcode dqm hosts until role is available
-        #if cnhostname == 'bu-c2f13-31-01.cms' or cnhostname == 'fu-c2f13-39-01.cms' or cnhostname == 'fu-c2f13-39-02.cms' or cnhostname == 'fu-c2f13-39-03.cms' or cnhostname == 'fu-c2f13-39-04.cms':
-        #    runindex_name = 'runindex_dqm'
-    else:
-        runindex_name = 'test' 
+    buName = None
+    buDataAddr=[]
 
-    buName = ''
-    budomain = ''
     if type == 'fu':
-        if cluster == 'daq2val' or cluster == 'daq2': 
-            addrList =  getBUAddr(cluster,cnhostname)
-            selectedAddr = False
-            for addr in addrList:
-                #result = os.system("ping -c 1 "+ str(addr[1])+" >& /dev/null")
-                result = 0#ping disabled for now
-                #os.system("clear")
-                if result == 0:
-                    buDataAddr = addr[1]
-                    if addr[1].find('.'):
-                        buName = addr[1].split('.')[0]
-                        budomain = addr[1][addr[1].find('.'):]
-                    else:
-                        buName = addr[1]
-                    selectedAddr=True
-                    break
-                else:
-                    print "failed to ping",str(addr[1])
+      if cluster == 'daq2val' or cluster == 'daq2': 
+        for addr in getBUAddr(cluster,cnhostname):
+            if buName==None:
+                buName = addr[1].split('.')[0]
+            elif buName != addr[1].split('.')[0]:
+                print "BU name not same for all interfaces:",buName,buNameCheck
+                continue
+            buDataAddr.append(addr[1])
             #if none are pingable, first one is picked
-            if selectedAddr==False:
-                if len(addrList)>0:
-                    addr = addrList[0]
-                    buDataAddr = addr[1]
-                    if addr[1].find('.'):
-                        buName = addr[1].split('.')[0]
-                    else:
-                        buName = addr[1]
-            if buName == '':
+            if buName == None or len(buDataAddr)==0:
                 print "no BU found for this FU in the dabatase"
                 sys.exit(-1)
+      else:
+          print "FU configuration in cluster",cluster,"not supported yet !!"
+          sys.exit(-2)
  
-        elif cluster =='test':
-            hn = os.uname()[1].split(".")[0]
-            addrList = [hn]
-            buName = hn
-            buDataAddr = hn
-        else:
-            print "FU configuration in cluster",cluster,"not supported yet !!"
-            sys.exit(-2)
-
     elif type == 'bu':
         if env == "vm":
             buName = os.uname()[1].split(".")[0]
         else:
             buName = os.uname()[1]
-        addrList = buName
+    elif type == 'tribe':
+        buDataAddr = getAllBU(requireFU=False)
+        buName='es-tribe'
 
-    #print "detected address", addrList," and name ",buName
     print "running configuration for machine",cnhostname,"of type",type,"in cluster",cluster,"; appliance bu is:",buName
 
     clusterName='appliance_'+buName
@@ -543,7 +605,7 @@ def restoreFileMaybe(file):
         #print "will modify sysconfig elasticsearch configuration"
         #maybe backup vanilla versions
         essysEdited =  checkModifiedConfigInFile(elasticsysconf)
-        if essysEdited == False and type == 'fu': #modified only on FU
+        if essysEdited == False:
           #print "elasticsearch sysconfig configuration was not yet modified"
           shutil.copy(elasticsysconf,os.path.join(backup_dir,os.path.basename(elasticsysconf)))
 
@@ -551,97 +613,224 @@ def restoreFileMaybe(file):
         if esEdited == False:
           shutil.copy(elasticconf,os.path.join(backup_dir,os.path.basename(elasticconf)))
 
-        escfg = FileManager(elasticconf,':',esEdited,'',' ')
+        if type == 'fu' or type == 'bu':
 
-        escfg.reg('cluster.name',clusterName)
-        escfg.reg('node.name',cnhostname)
-        essyscfg = FileManager(elasticsysconf,'=',essysEdited)
-        essyscfg.reg('ES_HEAP_SIZE','1G')
-        essyscfg.commit()
+            essyscfg = FileManager(elasticsysconf,'=',essysEdited)
+            essyscfg.reg('ES_HEAP_SIZE','1G')
+            essyscfg.commit()
 
-        if type == 'fu':
+            escfg = FileManager(elasticconf,':',esEdited,'',' ')
+            escfg.reg('cluster.name',clusterName)
+            escfg.reg('node.name',cnhostname)
             escfg.reg('discovery.zen.ping.multicast.enabled','false')
-            if env=="vm":
-                escfg.reg('discovery.zen.ping.unicast.hosts',"[\"" + buName + "\"]")
-            else:
-                escfg.reg('discovery.zen.ping.unicast.hosts',"[\"" + buName + ".cms" + "\"]")
             escfg.reg('network.publish_host',es_publish_host)
             escfg.reg('transport.tcp.compress','true')
-            escfg.reg('indices.fielddata.cache.size', '50%')
-            if cluster != 'test':
+
+            if type == 'fu':
+                if env=="vm":
+                    escfg.reg('discovery.zen.ping.unicast.hosts',"[\"" + buName + "\"]")
+                else:
+                    escfg.reg('discovery.zen.ping.unicast.hosts',"[\"" + buName + ".cms" + "\"]")
+                escfg.reg('indices.fielddata.cache.size', '50%')
                 escfg.reg('node.master','false')
                 escfg.reg('node.data','true')
-        if type == 'bu':
-            escfg.reg('network.publish_host',es_publish_host)
-            #escfg.reg('discovery.zen.ping.multicast.enabled','false')
-            #escfg.reg('discovery.zen.ping.unicast.hosts','[ \"'+elastic_host2+'\" ]')
+            if type == 'bu':
+                #escfg.reg('discovery.zen.ping.unicast.hosts','[ \"'+elastic_host2+'\" ]')
+                escfg.reg('node.master','true')
+                escfg.reg('node.data','false')
+            escfg.commit()
+
+        if type == 'tribe':
+            essyscfg = FileManager(elasticsysconf,'=',essysEdited)
+            essyscfg.reg('ES_HEAP_SIZE','12G')
+            essyscfg.commit()
+
+            escfg = FileManager(elasticconf,':',esEdited,'',' ',recreate=True)
+            escfg.reg('cluster.name','es-tribe')
+            escfg.reg('discovery.zen.ping.multicast.enabled','false')
+            #escfg.reg('discovery.zen.ping.unicast.hosts','['+','.join(buDataAddr)+']')
+            escfg.reg('transport.tcp.compress','true')
+            bustring = "["
+            for bu in buDataAddr:
+                if bu in tribe_ignore_list:continue
+
+                try:
+                    socket.gethostbyname_ex(bu+'.cms')
+                except:
+                    print "skipping",bu," - unable to lookup IP address"
+                    continue
+                if bustring!="[":bustring+=','
+                bustring+='"'+bu+'.cms'+'"'
+            bustring += "]"
+            escfg.reg('discovery.zen.ping.unicast.hosts',bustring)
+
+            escfg.reg('tribe','')
+            i=1;
+            for bu in buDataAddr:
+                if bu in tribe_ignore_list:continue
+
+                try:
+                    socket.gethostbyname_ex(bu+'.cms')
+                except:
+                #    print "skipping",bu," - unable to lookup IP address"
+                    continue
+
+                escfg.reg('    t'+str(i),'')
+                #escfg.reg('         discovery.zen.ping.unicast.hosts', '["'+bu+'.cms"]')
+                escfg.reg('         cluster.name', 'appliance_'+bu)
+                i=i+1
+            escfg.commit()
+
+        if type == 'escdaq':
+            essyscfg = FileManager(elasticsysconf,'=',essysEdited)
+            essyscfg.reg('ES_HEAP_SIZE','10G')
+            essyscfg.commit()
+
+            escfg = FileManager(elasticconf,':',esEdited,'',' ',recreate=True)
+            escfg.reg('cluster.name','es-cdaq')
+            escfg.reg('discovery.zen.minimum_master_nodes','11')
+            escfg.reg('index.mapper.dynamic','false')
+            escfg.reg('action.auto_create_index','false')
             escfg.reg('transport.tcp.compress','true')
             escfg.reg('node.master','true')
-            escfg.reg('node.data','false')
+            escfg.reg('node.data','true')
+            escfg.commit()
 
-        escfg.commit()
 
     if "hltd" in selection:
 
       #first prepare bus.config file
       if type == 'fu':
-        try:
-          shutil.copy(busconfig,os.path.join(backup_dir,os.path.basename(busconfig)))
-          os.remove(busconfig)
-        except Exception,ex:
-          print "problem with copying bus.config? ",ex
-          pass
+
+        #permissive:try to remove old bus.config
+        try:os.remove(os.path.join(backup_dir,os.path.basename(busconfig)))
+        except:pass
+        try:os.remove(busconfig)
+        except:pass
 
       #write bu ip address
-        print "WRITING BUS CONFIG ", busconfig
         f = open(busconfig,'w+')
-        f.writelines(getIPs(buDataAddr)[0])
+
+        #swap entries based on name (only C6100 hosts with two data interfaces):
+        if len(buDataAddr)>1 and name_identifier()==1:
+            temp = buDataAddr[0]
+            buDataAddr[0]=buDataAddr[1]
+            buDataAddr[1]=temp
+
+        newline=False
+        for addr in buDataAddr:
+            if newline:f.writelines('\n')
+            newline=True
+            f.writelines(getIPs(addr)[0])
+            #break after writing first entry. it is not yet safe to use secondary interface
+            break
         f.close()
 
+      #FU should have one instance assigned, BUs can have multiple
+      watch_dir_bu = '/fff/ramdisk'
+      out_dir_bu = '/fff/output'
+      log_dir_bu = '/var/log/hltd'
+
+      instances,sizes=getInstances(os.uname()[1])
+      if len(instances)==0: instances=['main']
+
       hltdEdited = checkModifiedConfigInFile(hltdconf)
-      #print "was modified?",hltdEdited
+
       if hltdEdited == False:
         shutil.copy(hltdconf,os.path.join(backup_dir,os.path.basename(hltdconf)))
-      hltdcfg = FileManager(hltdconf,'=',hltdEdited,' ',' ')
 
-      hltdcfg.reg('enabled','True','[General]')
       if type=='bu':
+        try:os.remove('/etc/hltd.instances')
+        except:pass
+
+        #do major ramdisk cleanup (unmount existing loop mount points, run directories and img files)
+        try:
+            subprocess.check_call(['/opt/hltd/scripts/unmountloopfs.sh','/fff/ramdisk'])
+            #delete existing run directories to ensure there is space (if this machine has a non-main instance)
+            if instances!=["main"]:
+              os.popen('rm -rf /fff/ramdisk/run*')
+        except subprocess.CalledProcessError, err1:
+            print 'failed to cleanup ramdisk',err1
+        except Exception as ex:
+            print 'failed to cleanup ramdisk',ex
+ 
+        cgibase=9000
+
+        for idx,val in enumerate(instances):
+          if idx!=0 and val=='main':
+            instances[idx]=instances[0]
+            instances[0]=val
+            break
+        for idx, instance in enumerate(instances):
+
+          watch_dir_bu = '/fff/ramdisk'
+          out_dir_bu = '/fff/output'
+          log_dir_bu = '/var/log/hltd'
+
+          cfile = hltdconf
+          if instance != 'main':
+            cfile = '/etc/hltd-'+instance+'.conf'
+            shutil.copy(hltdconf,cfile)
+            watch_dir_bu = os.path.join(watch_dir_bu,instance)
+            out_dir_bu = os.path.join(out_dir_bu,instance)
+            log_dir_bu = os.path.join(log_dir_bu,instance)
+
+            #run loopback setup for non-main instances (is done on every boot since ramdisk is volatile)
+            try:
+                subprocess.check_call(['/opt/hltd/scripts/makeloopfs.sh','/fff/ramdisk',instance, str(sizes[idx])])
+            except subprocess.CalledProcessError, err1:
+                print 'failed to configure loopback device mount in ramdisk'
+
+          soap2file_port='0'
+ 
+          if myhost in dqm_list or myhost in ed_list or cluster == 'daq2val' or env=='vm':
+              soap2file_port='8010'
+
+          hltdcfg = FileManager(cfile,'=',hltdEdited,' ',' ')
+
+          hltdcfg.reg('enabled','True','[General]')
+          hltdcfg.reg('role','bu','[General]')
       
-          #get needed info here
           hltdcfg.reg('user',username,'[General]')
-          hltdcfg.reg('cgi_port','9000','[Web]')
+          hltdcfg.reg('instance',instance,'[General]')
+
+          #port for multiple instances
+          hltdcfg.reg('cgi_port',str(cgibase+idx),'[Web]')
+          hltdcfg.reg('cgi_instance_port_offset',str(idx),'[Web]')
+          hltdcfg.reg('soap2file_port',soap2file_port,'[Web]')
+
           hltdcfg.reg('elastic_cluster',clusterName,'[Monitoring]')
-          hltdcfg.reg('watch_directory','/fff/ramdisk','[General]')
-          hltdcfg.reg('role','bu','[General]')
-          hltdcfg.reg('micromerge_output','/fff/output','[General]')
+          hltdcfg.reg('watch_directory',watch_dir_bu,'[General]')
+          #hltdcfg.reg('micromerge_output',out_dir_bu,'[General]')
           hltdcfg.reg('elastic_runindex_url',elastic_host,'[Monitoring]')
           hltdcfg.reg('elastic_runindex_name',runindex_name,'[Monitoring]')
           hltdcfg.reg('use_elasticsearch',use_elasticsearch,'[Monitoring]')
           hltdcfg.reg('es_cmssw_log_level',cmsswloglevel,'[Monitoring]')
           hltdcfg.reg('dqm_machine',dqmmachine,'[DQM]')
-          #hltdcfg.removeEntry('watch_directory')
+          hltdcfg.reg('log_dir',log_dir_bu,'[Logs]')
           hltdcfg.commit()
-          #remove /fff/data from BU (hack)
-          try:
-              shutil.rmtree('/fff/data')
-          except:
-              pass
+
+        #write all instances in a file
+        if 'main' not in instances or len(instances)>1:
+          with open('/etc/hltd.instances',"w") as fi:
+            for instance in instances: fi.write(instance+"\n")
+
 
       if type=='fu':
+          hltdcfg = FileManager(hltdconf,'=',hltdEdited,' ',' ')
 
-          #max_cores_done = False
-          #do_max_cores = True
-          #num_max_cores = countCPUs()
+          hltdcfg.reg('enabled','True','[General]')
+          hltdcfg.reg('role','fu','[General]')
 
-          #num_threads_done = False
-          #do_num_threads = True
-          #num_threads = nthreads 
-          hltdcfg.reg('exec_directory',execdir,'[General]') 
           hltdcfg.reg('user',username,'[General]')
+          #FU can only have one instance (so we take instance[0] and ignore others)
+          hltdcfg.reg('instance',instances[0],'[General]')
+
+          hltdcfg.reg('exec_directory',execdir,'[General]') 
           hltdcfg.reg('watch_directory','/fff/data','[General]')
-          hltdcfg.reg('role','fu','[General]')
           hltdcfg.reg('cgi_port','9000','[Web]')
-          #hltdcfg.reg('mount_options_output','rw,vers=4,rsize=65536,wsize=65536,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys','[General]')
+          hltdcfg.reg('cgi_instance_port_offset',"0",'[Web]')
+          hltdcfg.reg('soap2file_port','0','[Web]')
           hltdcfg.reg('elastic_cluster',clusterName,'[Monitoring]')
           hltdcfg.reg('es_cmssw_log_level',cmsswloglevel,'[Monitoring]')
           hltdcfg.reg('elastic_runindex_url',elastic_host,'[Monitoring]')
@@ -653,6 +842,11 @@ def restoreFileMaybe(file):
           hltdcfg.reg('cmssw_threads',nthreads,'[CMSSW]')
           hltdcfg.reg('cmssw_streams',nfwkstreams,'[CMSSW]')
           hltdcfg.reg('resource_use_fraction',resourcefract,'[Resources]')
-          #hltdcfg.removeEntry('watch_directory')
           hltdcfg.commit()
+    if "web" in selection:
+          try:os.rmdir('/var/www/html')
+          except:
+              try:os.unlink('/var/www/html')
+              except:pass
+          os.symlink('/es-web','/var/www/html')
 
diff --git a/python/soap2file b/python/soap2file
new file mode 100755
index 0000000..9126c7d
--- /dev/null
+++ b/python/soap2file
@@ -0,0 +1,64 @@
+#!/bin/env python
+#
+# chkconfig:   2345 81 03
+#
+
+import sys
+import SOAPpy
+import time
+from subprocess import Popen
+from subprocess import PIPE
+
+sys.path.append('/opt/hltd/python')
+#sys.path.append('/opt/hltd/lib')
+
+from soap2file import Soap2file
+
+
+def startService(daemon):
+   proc = Popen(["/opt/hltd/python/soap2file.py"], stdout=PIPE)
+   output = proc.communicate()[0]
+   time.sleep(.1)
+   if daemon.silentStatus() and proc.returncode==0:
+       print 'Starting soap2file:\t\t\t\t\t [  \033[1;32mOK\033[0;39m  ]'
+   else:
+       if proc.returncode==3:sys.exit(0)
+       print 'Starting soap2file instance: [  \032[1;32mFAILED\033[0;39m  ]'
+       print output
+       sys.exit(1)
+
+
+
+if __name__ == "__main__":
+
+    soap2file = Soap2file()
+
+    if not soap2file.checkEnabled():
+        print "Soap2file service is disabled"
+        sys.exit(0)
+
+    if len(sys.argv) == 2:
+
+        if 'start' == sys.argv[1]:
+            startService(soap2file)
+
+        elif 'stop' == sys.argv[1]:
+            sys.stdout.write('Stopping soap2file:')
+            soap2file.stop()
+
+        elif 'restart' == sys.argv[1]:
+            sys.stdout.write('Stopping soap2file:')
+            soap2file.stop()
+            startService(soap2file)
+
+        elif 'status' == sys.argv[1]:
+            soap2file.status()
+
+        else:
+            print "Unknown command"
+            sys.exit(2)
+        sys.exit(0)
+    else:
+        print "usage: %s start|stop|restart|status" % sys.argv[0]
+        sys.exit(2)
+
diff --git a/python/soap2file.py b/python/soap2file.py
index d8e6cae..ca63a88 100755
--- a/python/soap2file.py
+++ b/python/soap2file.py
@@ -4,12 +4,11 @@
 #
 
 import os
-import pwd
 import sys
 import SOAPpy
 
 sys.path.append('/opt/hltd/python')
-sys.path.append('/opt/hltd/lib')
+#sys.path.append('/opt/hltd/lib')
 
 import demote
 import hltdconf
@@ -30,7 +29,6 @@ def writeToFile(filename,content,overwrite):
     except IOError as ex:
         return "Failed to write data: "+str(ex)
 
-
 def createDirectory(dirname):
     try:
         os.mkdir(dirname)
@@ -38,15 +36,25 @@ def createDirectory(dirname):
     except OSError as ex:
         return "Failed to create directory: "+str(ex)
 
+def renamePath(oldpath,newpath):
+    try:
+        os.rename(oldpath,newpath)
+        return "Success"
+    except Exception as ex:
+        return  "Failed to rename file: "+str(ex)
 
 class Soap2file(Daemon2):
 
-    def __init__(self,pidfile):
-        Daemon2.__init__(self,pidfile,'soap2file')
+    def __init__(self):
+        Daemon2.__init__(self,'soap2file','main','hltd')
         #SOAPpy.Config.debug = 1
         self._conf=hltdconf.hltdConf('/etc/hltd.conf')
         self._hostname = os.uname()[1]
 
+    def checkEnabled(self):
+        if self._conf.soap2file_port>0:return True
+        return False
+
     def run(self):
         dem = demote.demote(self._conf.user)
         dem()
@@ -54,43 +62,13 @@ def run(self):
         server = SOAPpy.SOAPServer((self._hostname, self._conf.soap2file_port))
         server.registerFunction(writeToFile)
         server.registerFunction(createDirectory)
+        server.registerFunction(renamePath)
         server.serve_forever()
 
 
 if __name__ == "__main__":
-
-    pidfile = '/var/run/soap2file.pid'
-    soap2file = Soap2file(pidfile)
-
-    if len(sys.argv) == 2:
-
-        if 'start' == sys.argv[1]:
-            try:
-                soap2file.start()
-                if soap2file.silentStatus():
-                    print '[OK]'
-                else:
-                    print '[Failed]'
-            except:
-                pass
-
-        elif 'stop' == sys.argv[1]:
-            if soap2file.status():
-                soap2file.stop()
-            elif os.path.exists(pidfile):
-                soap2file.delpid()
-
-        elif 'restart' == sys.argv[1]:
-            soap2file.restart()
-
-        elif 'status' == sys.argv[1]:
-            soap2file.status()
-
-        else:
-            print "Unknown command"
-            sys.exit(2)
-            sys.exit(0)
-    else:
-        print "usage: %s start|stop|restart|status" % sys.argv[0]
-        sys.exit(2)
+    daemon = Soap2file()
+    import procname
+    procname.setprocname('soap2file')
+    daemon.start()
 
diff --git a/python/testFUHistograms_cfg2.py b/python/testFUHistograms_cfg2.py
index 796b453..4d69212 100644
--- a/python/testFUHistograms_cfg2.py
+++ b/python/testFUHistograms_cfg2.py
@@ -1,178 +1,40 @@
-import FWCore.ParameterSet.Config as cms
-import FWCore.ParameterSet.VarParsing as VarParsing
-import DQMServices.Components.test.checkBooking as booking
-import DQMServices.Components.test.createElements as c
-import os,sys
-
-cmsswbase = os.path.expandvars('$CMSSW_BASE/')
-
-options = VarParsing.VarParsing ('analysis')
-
-options.register ('runNumber',
-                  1, # default value
-                  VarParsing.VarParsing.multiplicity.singleton,
-                  VarParsing.VarParsing.varType.int,          # string, int, or float
-                  "Run Number")
-
-options.register ('buBaseDir',
-                  '/fff/BU0', # default value
-                  VarParsing.VarParsing.multiplicity.singleton,
-                  VarParsing.VarParsing.varType.string,          # string, int, or float
-                  "BU base directory")
-
-options.register ('dataDir',
-                  '/fff/data', # default value
-                  VarParsing.VarParsing.multiplicity.singleton,
-                  VarParsing.VarParsing.varType.string,          # string, int, or float
-                  "FU data directory")
-
-options.register ('numThreads',
-                  1, # default value
-                  VarParsing.VarParsing.multiplicity.singleton,
-                  VarParsing.VarParsing.varType.int,          # string, int, or float
-                  "Number of CMSSW threads")
-
-options.register ('numFwkStreams',
-                  1, # default value
-                  VarParsing.VarParsing.multiplicity.singleton,
-                  VarParsing.VarParsing.varType.int,          # string, int, or float
-                  "Number of CMSSW streams")
-
-
-
-options.parseArguments()
-
-process = cms.Process("HLT")
-
-# load DQM
-process.load("DQMServices.Core.DQM_cfg")
-process.load("DQMServices.Components.DQMEnvironment_cfi")
-
-#b = booking.BookingParams(sys.argv)
-#b = booking.BookingParams(["CTOR","BJ","BR"])
-#b.doCheck(testOnly=False)
-
-elements = c.createElements()
-readRunElements = c.createReadRunElements()
-readLumiElements = c.createReadLumiElements()
-
+# /users/avetisya/LS1/DAQTest/HLT/V3 (CMSSW_7_2_1)
 
+import FWCore.ParameterSet.Config as cms
 
+process = cms.Process( "HLT" )
 
-process.maxEvents = cms.untracked.PSet(
-    input = cms.untracked.int32(-1)
+process.HLTConfigVersion = cms.PSet(
+  tableName = cms.string('/users/avetisya/LS1/DAQTest/HLT/V3')
 )
 
-process.options = cms.untracked.PSet(
-    numberOfThreads = cms.untracked.uint32(options.numThreads),
-    numberOfStreams = cms.untracked.uint32(options.numFwkStreams),
-    multiProcesses = cms.untracked.PSet(
-    maxChildProcesses = cms.untracked.int32(0)
-    )
+process.streams = cms.PSet( 
+  A = cms.vstring( 'A1' ),
+  B = cms.vstring( 'B' ),
+  DQM = cms.vstring( 'DQM1' )
+)
+process.datasets = cms.PSet( 
+  A1 = cms.vstring( 'p1' ),
+  B = cms.vstring( 'p3' ),
+  DQM1 = cms.vstring( 'p2' )
 )
 
-process.MessageLogger = cms.Service("MessageLogger",
-                                    destinations = cms.untracked.vstring( 'cout' ),
-                                    cout = cms.untracked.PSet( FwkReport =
-                                                               cms.untracked.PSet(reportEvery = cms.untracked.int32(10),
-                                                                                  optionalPSet = cms.untracked.bool(True),
-                                                                                  #limit = cms.untracked.int32(10000000)
-                                                                                  ),
-                                                               threshold = cms.untracked.string( "INFO" )
-                                                               )
-                                    )
-
-process.FastMonitoringService = cms.Service("FastMonitoringService",
-    sleepTime = cms.untracked.int32(1),
-    microstateDefPath = cms.untracked.string( cmsswbase+'/src/EventFilter/Utilities/plugins/microstatedef.jsd' ),
-    #fastMicrostateDefPath = cms.untracked.string( cmsswbase+'/src/EventFilter/Utilities/plugins/microstatedeffast.jsd' ),
-    fastName = cms.untracked.string( 'fastmoni' ),
-    slowName = cms.untracked.string( 'slowmoni' ))
-
-process.EvFDaqDirector = cms.Service("EvFDaqDirector",
-                                     buBaseDir = cms.untracked.string(options.buBaseDir),
-                                     baseDir = cms.untracked.string(options.dataDir),
-                                     directorIsBU = cms.untracked.bool(False ),
-                                     testModeNoBuilderUnit = cms.untracked.bool(False),
-                                     runNumber = cms.untracked.uint32(options.runNumber)
-                                     )
-process.PrescaleService = cms.Service( "PrescaleService",
-                                       lvl1DefaultLabel = cms.string( "B" ),
-                                       lvl1Labels = cms.vstring( 'A',
-                                                                 'B'
-                                                                 ),
-                                       prescaleTable = cms.VPSet(
-    cms.PSet(  pathName = cms.string( "p1" ),
-               prescales = cms.vuint32( 0, 10)
-               ),
-    cms.PSet(  pathName = cms.string( "p2" ),
-               prescales = cms.vuint32( 0, 100)
-               )
-    ))
-
-
-process.source = cms.Source("FedRawDataInputSource",
-                            getLSFromFilename = cms.untracked.bool(True),
-                            testModeNoBuilderUnit = cms.untracked.bool(False),
-                            eventChunkSize = cms.untracked.uint32(128),
-                            numBuffers = cms.untracked.uint32(2),
-                            eventChunkBlock = cms.untracked.uint32(128),
-                            useL1EventID=cms.untracked.bool(True)
-                            )
-
-
-process.filter1 = cms.EDFilter("HLTPrescaler",
-                               L1GtReadoutRecordTag = cms.InputTag( "hltGtDigis" )
-                               )
-process.filter2 = cms.EDFilter("HLTPrescaler",
-                               L1GtReadoutRecordTag = cms.InputTag( "hltGtDigis" )
-                               )
-
-process.a = cms.EDAnalyzer("ExceptionGenerator",
-                           defaultAction = cms.untracked.int32(0),
-                           defaultQualifier = cms.untracked.int32(120))
-
-process.b = cms.EDAnalyzer("ExceptionGenerator",
-                           defaultAction = cms.untracked.int32(0),
-                           defaultQualifier = cms.untracked.int32(0))
-
-
-process.filler = cms.EDAnalyzer("DummyBookFillDQMStoreMultiThread",
-                                folder = cms.untracked.string("TestFolder/"),
-                                elements = cms.untracked.VPSet(*elements),
-                                fillRuns = cms.untracked.bool(True),
-                                fillLumis = cms.untracked.bool(True),
-                                book_at_constructor = cms.untracked.bool(False),
-                                book_at_beginJob = cms.untracked.bool(False),
-                                book_at_beginRun = cms.untracked.bool(True))
-
-
-
-
-
-process.p1 = cms.Path(process.a*process.filter1)
-process.p2 = cms.Path(process.b*process.filter2)
-
-process.dqmsave_step = cms.Path(process.filler*process.dqmSaver)
-
-### global options Online ###
-process.add_(cms.Service("DQMStore"))
-process.DQMStore.LSbasedMode = cms.untracked.bool(True)
-process.DQMStore.verbose = cms.untracked.int32(5)
-process.DQMStore.enableMultiThread = cms.untracked.bool(True)
-
-process.dqmSaver.workflow = ''
-process.dqmSaver.convention = 'FilterUnit'
-process.dqmSaver.saveByLumiSection = True
-process.dqmSaver.fileFormat = cms.untracked.string('PB')
-process.dqmSaver.fakeFilterUnitMode = cms.untracked.bool(False)
-
+process.source = cms.Source( "FedRawDataInputSource",
+    numBuffers = cms.untracked.uint32( 1 ),
+    useL1EventID = cms.untracked.bool( True ),
+    eventChunkSize = cms.untracked.uint32( 128 ),
+    eventChunkBlock = cms.untracked.uint32( 128 ),
+    getLSFromFilename = cms.untracked.bool( True ),
+    verifyAdler32 = cms.untracked.bool( True )
+)
 
-process.GlobalTag = cms.ESSource( "PoolDBESSource",
+process.PoolDBESSource = cms.ESSource( "PoolDBESSource",
     globaltag = cms.string( "GR_H_V39::All" ),
-    toGet = cms.VPSet(
+    RefreshEachRun = cms.untracked.bool( False ),
+    RefreshOpenIOVs = cms.untracked.bool( False ),
+    toGet = cms.VPSet( 
     ),
-    DBParameters = cms.PSet(
+    DBParameters = cms.PSet( 
       authenticationPath = cms.untracked.string( "." ),
       connectionRetrialTimeOut = cms.untracked.int32( 60 ),
       idleConnectionCleanupPeriod = cms.untracked.int32( 10 ),
@@ -181,32 +43,306 @@
       enableConnectionSharing = cms.untracked.bool( True ),
       enableReadOnlySessionOnUpdateConnection = cms.untracked.bool( False ),
       connectionTimeOut = cms.untracked.int32( 0 ),
+      authenticationSystem = cms.untracked.int32( 0 ),
       connectionRetrialPeriod = cms.untracked.int32( 10 )
     ),
     RefreshAlways = cms.untracked.bool( False ),
-    ReconnectEachRun = cms.untracked.bool( False ),
-    RefreshEachRun = cms.untracked.bool( False ),
-    RefreshOpenIOVs = cms.untracked.bool( False ),
     connect = cms.string( "frontier://(proxyurl=http://localhost:3128)(serverurl=http://localhost:8000/FrontierOnProd)(serverurl=http://localhost:8000/FrontierOnProd)(retrieve-ziplevel=0)/CMS_COND_31X_GLOBALTAG" ),
+    ReconnectEachRun = cms.untracked.bool( False ),
     BlobStreamerName = cms.untracked.string( "TBufferBlobStreamingService" )
 )
 
+process.FastTimerService = cms.Service( "FastTimerService",
+    dqmPath = cms.untracked.string( "HLT/TimerService" ),
+    dqmModuleTimeRange = cms.untracked.double( 40.0 ),
+    useRealTimeClock = cms.untracked.bool( True ),
+    enableTimingModules = cms.untracked.bool( True ),
+    enableDQM = cms.untracked.bool( True ),
+    enableDQMbyModule = cms.untracked.bool( False ),
+    enableTimingExclusive = cms.untracked.bool( False ),
+    skipFirstPath = cms.untracked.bool( False ),
+    enableDQMbyLumiSection = cms.untracked.bool( True ),
+    dqmPathTimeResolution = cms.untracked.double( 0.5 ),
+    dqmPathTimeRange = cms.untracked.double( 100.0 ),
+    dqmTimeRange = cms.untracked.double( 1000.0 ),
+    dqmLumiSectionsRange = cms.untracked.uint32( 2500 ),
+    enableDQMbyProcesses = cms.untracked.bool( True ),
+    enableDQMSummary = cms.untracked.bool( True ),
+    enableTimingSummary = cms.untracked.bool( False ),
+    enableDQMbyPathTotal = cms.untracked.bool( True ),
+    enableTimingPaths = cms.untracked.bool( True ),
+    enableDQMbyPathExclusive = cms.untracked.bool( True ),
+    dqmTimeResolution = cms.untracked.double( 5.0 ),
+    dqmModuleTimeResolution = cms.untracked.double( 0.2 ),
+    enableDQMbyPathActive = cms.untracked.bool( True ),
+    enableDQMbyPathDetails = cms.untracked.bool( True ),
+    enableDQMbyPathOverhead = cms.untracked.bool( True ),
+    enableDQMbyPathCounters = cms.untracked.bool( True ),
+    enableDQMbyModuleType = cms.untracked.bool( False )
+)
+process.DQMStore = cms.Service( "DQMStore",
+    verbose = cms.untracked.int32( 0 ),
+    collateHistograms = cms.untracked.bool( False ),
+    enableMultiThread = cms.untracked.bool( True ),
+    forceResetOnBeginLumi = cms.untracked.bool( False ),
+    LSbasedMode = cms.untracked.bool( True ),
+    verboseQT = cms.untracked.int32( 0 )
+)
+process.EvFDaqDirector = cms.Service( "EvFDaqDirector",
+    buBaseDir = cms.untracked.string( "." ),
+    runNumber = cms.untracked.uint32( 0 ),
+    outputAdler32Recheck = cms.untracked.bool( False ),
+    baseDir = cms.untracked.string( "." )
+)
+process.FastMonitoringService = cms.Service( "FastMonitoringService",
+    slowName = cms.untracked.string( "slowmoni" ),
+    sleepTime = cms.untracked.int32( 1 ),
+    fastMonIntervals = cms.untracked.uint32( 2 ),
+    fastName = cms.untracked.string( "fastmoni" )
+)
+process.PrescaleService = cms.Service( "PrescaleService",
+    forceDefault = cms.bool( False ),
+    prescaleTable = cms.VPSet( 
+      cms.PSet(  pathName = cms.string( "p3" ),
+        prescales = cms.vuint32( 50, 50, 50, 50, 50, 50, 50, 50, 50 )
+      ),
+      cms.PSet(  pathName = cms.string( "p2" ),
+        prescales = cms.vuint32( 100, 100, 100, 100, 100, 100, 100, 100, 100 )
+      ),
+      cms.PSet(  pathName = cms.string( "p1" ),
+        prescales = cms.vuint32( 10, 10, 10, 10, 10, 10, 10, 10, 10 )
+      )
+    ),
+    lvl1DefaultLabel = cms.string( "1e33" ),
+    lvl1Labels = cms.vstring( '2e33',
+      '1.4e33',
+      '1e33',
+      '7e32',
+      '5e32',
+      '3e32',
+      '2e32',
+      '1.4e32',
+      '1e32' )
+)
+process.MessageLogger = cms.Service( "MessageLogger",
+    suppressInfo = cms.untracked.vstring( 'hltGtDigis' ),
+    debugs = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "INFO" ),
+      placeholder = cms.untracked.bool( True ),
+    ),
+    cout = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "ERROR" ),
+    ),
+    cerr_stats = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "WARNING" ),
+      output = cms.untracked.string( "cerr" ),
+      optionalPSet = cms.untracked.bool( True )
+    ),
+    warnings = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "INFO" ),
+      placeholder = cms.untracked.bool( True ),
+    ),
+    statistics = cms.untracked.vstring( 'cerr' ),
+    cerr = cms.untracked.PSet( 
+      INFO = cms.untracked.PSet(  limit = cms.untracked.int32( 0 ) ),
+      noTimeStamps = cms.untracked.bool( False ),
+      FwkReport = cms.untracked.PSet( 
+        reportEvery = cms.untracked.int32( 1 ),
+        limit = cms.untracked.int32( 0 )
+      ),
+      default = cms.untracked.PSet(  limit = cms.untracked.int32( 10000000 ) ),
+      Root_NoDictionary = cms.untracked.PSet(  limit = cms.untracked.int32( 0 ) ),
+      FwkJob = cms.untracked.PSet(  limit = cms.untracked.int32( 0 ) ),
+      FwkSummary = cms.untracked.PSet( 
+        reportEvery = cms.untracked.int32( 1 ),
+        limit = cms.untracked.int32( 10000000 )
+      ),
+      threshold = cms.untracked.string( "INFO" ),
+    ),
+    FrameworkJobReport = cms.untracked.PSet( 
+      default = cms.untracked.PSet(  limit = cms.untracked.int32( 0 ) ),
+      FwkJob = cms.untracked.PSet(  limit = cms.untracked.int32( 10000000 ) )
+    ),
+    suppressWarning = cms.untracked.vstring( 'hltGtDigis' ),
+    errors = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "INFO" ),
+      placeholder = cms.untracked.bool( True ),
+    ),
+    fwkJobReports = cms.untracked.vstring( 'FrameworkJobReport' ),
+    infos = cms.untracked.PSet( 
+      threshold = cms.untracked.string( "INFO" ),
+      Root_NoDictionary = cms.untracked.PSet(  limit = cms.untracked.int32( 0 ) ),
+      placeholder = cms.untracked.bool( True ),
+    ),
+    categories = cms.untracked.vstring( 'FwkJob',
+      'FwkReport',
+      'FwkSummary',
+      'Root_NoDictionary' ),
+    destinations = cms.untracked.vstring( 'warnings',
+      'errors',
+      'infos',
+      'debugs',
+      'cout',
+      'cerr' ),
+    threshold = cms.untracked.string( "INFO" ),
+    suppressError = cms.untracked.vstring( 'hltGtDigis' )
+)
 
-process.hltTriggerJSONMonitoring = cms.EDAnalyzer('TriggerJSONMonitoring',
-    triggerResults = cms.InputTag( 'TriggerResults','','HLT')
+process.ExceptionGenerator2 = cms.EDAnalyzer( "ExceptionGenerator",
+    defaultAction = cms.untracked.int32( 0 ),
+    defaultQualifier = cms.untracked.int32( 0 )
+)
+process.HLTPrescaler = cms.EDFilter( "HLTPrescaler",
+    L1GtReadoutRecordTag = cms.InputTag( "hltGtDigis" ),
+    offset = cms.uint32( 0 )
+)
+process.HLTPrescaler2 = cms.EDFilter( "HLTPrescaler",
+    L1GtReadoutRecordTag = cms.InputTag( "hltGtDigis" ),
+    offset = cms.uint32( 0 )
 )
+process.hltL1GtObjectMap = cms.EDProducer( "L1GlobalTrigger",
+    TechnicalTriggersUnprescaled = cms.bool( True ),
+    ProduceL1GtObjectMapRecord = cms.bool( True ),
+    AlgorithmTriggersUnmasked = cms.bool( False ),
+    EmulateBxInEvent = cms.int32( 1 ),
+    AlgorithmTriggersUnprescaled = cms.bool( True ),
+    ProduceL1GtDaqRecord = cms.bool( False ),
+    ReadTechnicalTriggerRecords = cms.bool( True ),
+    RecordLength = cms.vint32( 3, 0 ),
+    TechnicalTriggersUnmasked = cms.bool( False ),
+    ProduceL1GtEvmRecord = cms.bool( False ),
+    GmtInputTag = cms.InputTag( "hltGtDigis" ),
+    TechnicalTriggersVetoUnmasked = cms.bool( True ),
+    AlternativeNrBxBoardEvm = cms.uint32( 0 ),
+    TechnicalTriggersInputTags = cms.VInputTag( 'simBscDigis' ),
+    CastorInputTag = cms.InputTag( "castorL1Digis" ),
+    GctInputTag = cms.InputTag( "hltGctDigis" ),
+    AlternativeNrBxBoardDaq = cms.uint32( 0 ),
+    WritePsbL1GtDaqRecord = cms.bool( False ),
+    BstLengthBytes = cms.int32( -1 )
+)
+process.TriggerJSONMonitoring = cms.EDAnalyzer( "TriggerJSONMonitoring",
+    triggerResults = cms.InputTag( 'TriggerResults','','HLT' )
+)
+process.DQMFileSaver = cms.EDAnalyzer( "DQMFileSaver",
+    runIsComplete = cms.untracked.bool( False ),
+    referenceHandling = cms.untracked.string( "all" ),
+    producer = cms.untracked.string( "DQM" ),
+    forceRunNumber = cms.untracked.int32( -1 ),
+    saveByRun = cms.untracked.int32( 1 ),
+    saveAtJobEnd = cms.untracked.bool( False ),
+    saveByLumiSection = cms.untracked.int32( 1 ),
+    version = cms.untracked.int32( 1 ),
+    referenceRequireStatus = cms.untracked.int32( 100 ),
+    convention = cms.untracked.string( "FilterUnit" ),
+    dirName = cms.untracked.string( "." ),
+    fileFormat = cms.untracked.string( "PB" )
+)
+process.ExceptionGenerator = cms.EDAnalyzer( "ExceptionGenerator",
+    defaultAction = cms.untracked.int32( 0 ),
+    defaultQualifier = cms.untracked.int32( 64 )
+)
+process.ExceptionGenerator3 = cms.EDAnalyzer( "ExceptionGenerator",
+    defaultAction = cms.untracked.int32( 0 ),
+    defaultQualifier = cms.untracked.int32( 0 )
+)
+process.HLTPrescaler3 = cms.EDFilter( "HLTPrescaler",
+    L1GtReadoutRecordTag = cms.InputTag( "hltGtDigis" ),
+    offset = cms.uint32( 0 )
+)
+
+process.hltOutputA = cms.OutputModule( "ShmStreamConsumer",
+    SelectEvents = cms.untracked.PSet(  SelectEvents = cms.vstring( 'p1' ) ),
+    outputCommands = cms.untracked.vstring( 'drop *',
+      'keep FEDRawDataCollection_rawDataCollector_*_*',
+      'keep FEDRawDataCollection_source_*_*' )
+)
+process.hltOutputB = cms.OutputModule( "ShmStreamConsumer",
+    SelectEvents = cms.untracked.PSet(  SelectEvents = cms.vstring( 'p3' ) ),
+    outputCommands = cms.untracked.vstring( 'drop *',
+      'keep FEDRawDataCollection_rawDataCollector_*_*',
+      'keep FEDRawDataCollection_source_*_*' )
+)
+process.hltOutputDQM = cms.OutputModule( "ShmStreamConsumer",
+    SelectEvents = cms.untracked.PSet(  SelectEvents = cms.vstring( 'p2' ) ),
+    outputCommands = cms.untracked.vstring( 'drop *',
+      'keep FEDRawDataCollection_rawDataCollector_*_*',
+      'keep FEDRawDataCollection_source_*_*' )
+)
+
+process.p3 = cms.Path( process.ExceptionGenerator3 + process.HLTPrescaler3 )
+process.ep3 = cms.EndPath( process.hltOutputB )
+process.pDQMhisto = cms.Path( process.DQMFileSaver )
+process.json = cms.EndPath( process.TriggerJSONMonitoring )
+process.L1Gt = cms.Path( process.hltL1GtObjectMap )
+process.ep2 = cms.EndPath( process.hltOutputDQM )
+process.ep1 = cms.EndPath( process.hltOutputA )
+process.p2 = cms.Path( process.ExceptionGenerator2 + process.HLTPrescaler )
+process.p1 = cms.Path( process.ExceptionGenerator + process.HLTPrescaler2 )
+
+process.transferSystem = cms.PSet(
+  destinations = cms.vstring("Tier0","DQM","ECAL","None"),
+  transferModes = cms.vstring("tier0_on","tier0_off","test"),
+  streamA = cms.PSet(tier0_on=cms.vstring( "Tier0" ),tier0_off=cms.vstring( "None" ),test=cms.vstring( "None" )),
+  streamB = cms.PSet(tier0_on=cms.vstring( "None" ),tier0_off=cms.vstring( "None" ),test=cms.vstring( "None" )),
+  streamDQM = cms.PSet(tier0_on=cms.vstring( "DQM","Tier0" ),tier0_off=cms.vstring( "DQM" ),test=cms.vstring( "None" )),
+  streamL1Rates = cms.PSet(tier0_on=cms.vstring( "Tier0" ),tier0_off=cms.vstring( "None" ),test=cms.vstring( "None" )),
+  streamHLTRates = cms.PSet(tier0_on=cms.vstring( "Tier0" ),tier0_off=cms.vstring( "None" ),test=cms.vstring( "None" )),
+  streamDQMHistograms = cms.PSet(tier0_on=cms.vstring( "DQM" ),tier0_off=cms.vstring( "DQM" ),test=cms.vstring( "None" ))
+)
+
+import FWCore.ParameterSet.VarParsing as VarParsing 
+
+import os 
+
+cmsswbase = os.path.expandvars('$CMSSW_BASE/') 
+
+options = VarParsing.VarParsing ('analysis') 
+
+options.register ('runNumber', 
+                  1, # default value 
+                  VarParsing.VarParsing.multiplicity.singleton, 
+                  VarParsing.VarParsing.varType.int,          # string, int, or float 
+                  "Run Number") 
+
+options.register ('buBaseDir', 
+                  '/fff/BU0', # default value 
+                  VarParsing.VarParsing.multiplicity.singleton, 
+                  VarParsing.VarParsing.varType.string,          # string, int, or float 
+                  "BU base directory") 
+
+options.register ('dataDir', 
+                  '/fff/data', # default value 
+                  VarParsing.VarParsing.multiplicity.singleton, 
+                  VarParsing.VarParsing.varType.string,          # string, int, or float 
+                  "FU data directory") 
+
+options.register ('numThreads', 
+                  1, # default value 
+                  VarParsing.VarParsing.multiplicity.singleton, 
+                  VarParsing.VarParsing.varType.int,          # string, int, or float 
+                  "Number of CMSSW threads") 
 
+options.register ('numFwkStreams', 
+                  1, # default value 
+                  VarParsing.VarParsing.multiplicity.singleton, 
+                  VarParsing.VarParsing.varType.int,          # string, int, or float 
+                  "Number of CMSSW streams") 
 
-process.streamA = cms.OutputModule("EvFOutputModule",
-                                   SelectEvents = cms.untracked.PSet(SelectEvents = cms.vstring( 'p1' ))
-                                   )
+options.parseArguments() 
 
-process.streamDQM = cms.OutputModule("EvFOutputModule",
-                                   SelectEvents = cms.untracked.PSet(SelectEvents = cms.vstring( 'p2' ))
-                                   )
+process.options = cms.untracked.PSet( 
+    numberOfThreads = cms.untracked.uint32(options.numThreads), 
+    numberOfStreams = cms.untracked.uint32(options.numFwkStreams), 
+    multiProcesses = cms.untracked.PSet( 
+    maxChildProcesses = cms.untracked.int32(0) 
+    ) 
+) 
 
-process.ep = cms.EndPath(process.streamA+process.streamDQM+process.hltTriggerJSONMonitoring)
+process.PoolDBESSource.connect   = 'frontier://FrontierProd/CMS_COND_31X_GLOBALTAG'
+process.PoolDBESSource.pfnPrefix = cms.untracked.string('frontier://FrontierProd/')
 
-process.GlobalTag.connect   = 'frontier://FrontierProd/CMS_COND_31X_GLOBALTAG'
-process.GlobalTag.pfnPrefix = cms.untracked.string('frontier://FrontierProd/')
 
+process.EvFDaqDirector.buBaseDir    = options.buBaseDir 
+process.EvFDaqDirector.baseDir      = options.dataDir 
+process.EvFDaqDirector.runNumber    = options.runNumber 
diff --git a/rpm/fffmeta-1.5.3-6.noarch.rpm b/rpm/fffmeta-1.5.3-6.noarch.rpm
deleted file mode 100644
index 158f1fe..0000000
Binary files a/rpm/fffmeta-1.5.3-6.noarch.rpm and /dev/null differ
diff --git a/rpm/fffmeta-1.6.0-0.noarch.rpm b/rpm/fffmeta-1.6.0-0.noarch.rpm
new file mode 100644
index 0000000..4723008
Binary files /dev/null and b/rpm/fffmeta-1.6.0-0.noarch.rpm differ
diff --git a/rpm/fffmeta-vm-1.5.3-6.noarch.rpm b/rpm/fffmeta-vm-1.5.3-6.noarch.rpm
deleted file mode 100644
index 62405c9..0000000
Binary files a/rpm/fffmeta-vm-1.5.3-6.noarch.rpm and /dev/null differ
diff --git a/rpm/fffmeta-vm-1.6.0-0.noarch.rpm b/rpm/fffmeta-vm-1.6.0-0.noarch.rpm
new file mode 100644
index 0000000..f809c9b
Binary files /dev/null and b/rpm/fffmeta-vm-1.6.0-0.noarch.rpm differ
diff --git a/rpm/hltd-1.5.3-6.x86_64.rpm b/rpm/hltd-1.5.3-6.x86_64.rpm
deleted file mode 100644
index 11c46c5..0000000
Binary files a/rpm/hltd-1.5.3-6.x86_64.rpm and /dev/null differ
diff --git a/rpm/hltd-1.6.0-0.x86_64.rpm b/rpm/hltd-1.6.0-0.x86_64.rpm
new file mode 100644
index 0000000..54dc3e5
Binary files /dev/null and b/rpm/hltd-1.6.0-0.x86_64.rpm differ
diff --git a/scripts/hltdrpm.sh b/scripts/hltdrpm.sh
index eb09fa0..4da465c 100755
--- a/scripts/hltdrpm.sh
+++ b/scripts/hltdrpm.sh
@@ -36,15 +36,17 @@ mkdir -p etc/init.d
 mkdir -p etc/logrotate.d
 mkdir -p etc/appliance/resources/idle
 mkdir -p etc/appliance/resources/online
-mkdir -p etc/appliance/resources/offline
 mkdir -p etc/appliance/resources/except
 mkdir -p etc/appliance/resources/quarantined
+mkdir -p etc/appliance/resources/cloud
 mkdir -p usr/lib64/python2.6/site-packages
 mkdir -p usr/lib64/python2.6/site-packages/pyelasticsearch
 ls
 cp -r $BASEDIR/python/hltd $TOPDIR/etc/init.d/hltd
-cp -r $BASEDIR/python/soap2file.py $TOPDIR/etc/init.d/soap2file
+cp -r $BASEDIR/python/soap2file $TOPDIR/etc/init.d/soap2file
 cp -r $BASEDIR/* $TOPDIR/opt/hltd
+rm -rf $TOPDIR/opt/hltd/python/hltd
+rm -rf $TOPDIR/opt/hltd/python/soap2file
 cp -r $BASEDIR/etc/hltd.conf $TOPDIR/etc/
 cp -r $BASEDIR/etc/logrotate.d/hltd $TOPDIR/etc/logrotate.d/
 echo "working in $PWD"
@@ -53,9 +55,9 @@ ls opt/hltd
 echo "Creating DQM directories"
 mkdir -p etc/appliance/dqm_resources/idle
 mkdir -p etc/appliance/dqm_resources/online
-mkdir -p etc/appliance/dqm_resources/offline
 mkdir -p etc/appliance/dqm_resources/except
 mkdir -p etc/appliance/dqm_resources/quarantined
+mkdir -p etc/appliance/dqm_resources/cloud
 
 cd $TOPDIR
 #pyelasticsearch
@@ -152,12 +154,18 @@ Classifier: Topic :: System :: Filesystems
 Classifier: Topic :: System :: Monitoring
 EOF
 
+
+cd $TOPDIR
+cd opt/hltd/lib/python-procname/
+./setup.py -q build
+cp build/lib.linux-x86_64-2.6/procname.so $TOPDIR/usr/lib64/python2.6/site-packages
+
 cd $TOPDIR
 # we are done here, write the specs and make the fu***** rpm
 cat > hltd.spec <<EOF
 Name: hltd
-Version: 1.5.3
-Release: 6
+Version: 1.6.0
+Release: 0
 Summary: hlt daemon
 License: gpl
 Group: DAQ
@@ -192,11 +200,7 @@ tar -C $TOPDIR -c usr | tar -xC \$RPM_BUILD_ROOT
 rm \$RPM_BUILD_ROOT/opt/hltd/python/setupmachine.py
 rm \$RPM_BUILD_ROOT/opt/hltd/rpm/*.rpm
 %post
-rm -rf /etc/appliance/online/*
-rm -rf /etc/appliance/offline/*
-rm -rf /etc/appliance/except/*
 #/opt/hltd/python/fillresources.py #--> in fffmeta
-#/sbin/service hltd restart #restart delegated to fffmeta!
 %files
 %dir %attr(777, -, -) /var/log/hltd
 %dir %attr(777, -, -) /var/log/hltd/pid
@@ -212,10 +216,11 @@ rm -rf /etc/appliance/except/*
 /usr/lib64/python2.6/site-packages/*_inotify.so*
 /usr/lib64/python2.6/site-packages/*python_inotify*
 /usr/lib64/python2.6/site-packages/pyelasticsearch
+/usr/lib64/python2.6/site-packages/procname.so
 %preun
 if [ \$1 == 0 ]; then
-  /sbin/service hltd stop
-  /sbin/service hltd stop
+  /sbin/service hltd stop || true
+  /sbin/service soap2file stop || true
 fi
 EOF
 mkdir -p RPMBUILD/{RPMS/{noarch},SPECS,BUILD,SOURCES,SRPMS}
diff --git a/scripts/makeloopfs.sh b/scripts/makeloopfs.sh
new file mode 100755
index 0000000..f745ad0
--- /dev/null
+++ b/scripts/makeloopfs.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+if [ -n "$1" ]; then
+  if [ -n "$2" ]; then
+    if [ -n "$3" ]; then
+
+      if [ -d $1 ]; then
+
+        basedir=`readlink -e $1`
+        image=$basedir/$2.img
+        mountpoint=$basedir/$2
+        sizemb=$3
+        ret=0
+        umask 0
+
+        #protect from going wrong
+        if [ "$mountpoint" == "/" ]; then exit 99; fi
+        if [ "$mountpoint" == "//" ]; then exit 99; fi
+        if [ "$mountpoint" == "/fff" ]; then exit 99; fi
+        if [ "$mountpoint" == "/fff/" ]; then exit 99; fi
+        if [ "$mountpoint" == "/fff/ramdisk" ]; then exit 99; fi 
+        if [ "$mountpoint" == "/fff/ramdisk/" ]; then exit 99; fi 
+        if [ "$mountpoint" == "fff/ramdisk" ]; then exit 99; fi 
+        if [ "$mountpoint" == "fff/ramdisk/" ]; then exit 99; fi 
+
+        echo "makeloop script invoked for creating loop device disk $2 in ${basedir} of size $3 MB"
+
+        if [ -d $mountpoint ]; then
+
+          point=`mount | grep $mountpoint | grep /dev/loop | awk '{print $3}'`
+
+          if [ "$point" != "" ]; then
+            #kill any processes that might use the mount point and remove from NFS
+            fuser -km $point
+            exportfs -u *:$point
+            #unmunt loop device
+            umount $point
+            if [ $? != 0 ]; then
+              sleep 0.1
+              fuser -km $point
+              exportfs -u *:$point
+              umount $point
+              if [ $? != 0 ]; then
+                echo "Unsuccessful umount of $point !"
+                exit 1
+              fi
+            fi
+            exportfs -u *:$point
+          fi
+        fi
+        #deleting mount point
+        rm -rf $mountpoint
+        if [ $? != 0 ]; then
+          echo "Unsuccessful delete of unmounted mount point $mountpoint !"
+          exit 2
+        fi
+
+        if [ -f $image ]; then
+          chmod 755 $image
+          rm -rf $image
+          if [ $? != 0 ]; then
+            echo "Unsuccessful delete old image file $image"
+            exit 3
+          fi
+        fi
+    
+        dd if=/dev/zero of=$image bs=1048576 count=$sizemb >& /dev/null
+        echo y | mkfs.ext3 $image > /dev/null
+        #try mount
+        mkdir $mountpoint
+        if [ $? != 0 ]; then
+          echo "Unsuccessful make mount point directory!"
+          exit 4
+        fi
+
+        echo "mounting image directory..."
+        mount -o loop,noatime $image $mountpoint
+        if [ $? != 0 ]; then
+          echo "Unsuccessful mount with parameters $image $mountpoint"
+          exit 5
+        fi
+
+        chmod -R 777 $mountpoint
+
+        exportfs -o rw,sync,no_root_squash,no_subtree_check *:$mountpoint
+        if [ $? != 0 ]; then
+          echo "exportfs command failed for $mountpoint !"
+          exit 6
+        fi
+        exit 0
+        #end
+      else
+        echo "base directory not found!"
+      fi
+    else
+      echo "No parameter 3 given!"
+    fi
+  else
+    echo "No parameter 2 given!"
+  fi
+else
+  echo "No parameter 1 given!"
+fi
+
+echo "Usage: makeloopfs.sh basedir subdir imgsize(MB)"
+exit 1
+
diff --git a/scripts/metarpm.sh b/scripts/metarpm.sh
index b303dc3..56c66d4 100755
--- a/scripts/metarpm.sh
+++ b/scripts/metarpm.sh
@@ -4,16 +4,11 @@ SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 cd $SCRIPTDIR/..
 BASEDIR=$PWD
 
-PACKAGENAME="fffmeta"
-
 PARAMCACHE="paramcache"
 
 if [ -n "$1" ]; then
-  PARAMCACHE=$1
-fi
-
-if [ -n "$2" ]; then
-  PACKAGENAME=$2
+  #PARAMCACHE=$1
+  PARAMCACHE=${1##*/}
 fi
 
 echo "Using cache file $PARAMCACHE"
@@ -32,10 +27,6 @@ else
   done
 fi
 
-
-
-
-
 echo "Enviroment (prod,vm) (press enter for \"${lines[0]}\"):"
 readin=""
 read readin
@@ -86,7 +77,7 @@ if [ ${#readin} != "0" ]; then
 lines[6]=$readin
 fi
 
-echo "Equipment set (press enter for: \"${lines[7]}\") - type 'latest' to use latest eq set or 'default' for default one or 'test' for VM enviroment:"
+echo "Equipment set (press enter for: \"${lines[7]}\") - type 'latest' or enter a specific one:"
 readin=""
 read readin
 if [ ${#readin} != "0" ]; then
@@ -107,7 +98,6 @@ if [ ${#readin} != "0" ]; then
 lines[9]=$readin
 fi
 
-
 echo "number of framework streams per process (press enter for: ${lines[10]}):"
 readin=""
 read readin
@@ -115,8 +105,6 @@ if [ ${#readin} != "0" ]; then
 lines[10]=$readin
 fi
 
-
-
 echo "CMSSW log collection level (DEBUG,INFO,WARNING,ERROR or FATAL) (press enter for: ${lines[11]}):"
 readin=""
 read readin
@@ -145,12 +133,21 @@ done
 chmod 500 $SCRIPTDIR/$PARAMCACHE
 # create a build area
 
+if [ ${lines[0]} == "prod" ]; then
+  PACKAGENAME="fffmeta"
+elif [ ${lines[0]} == "vm" ]; then
+  PACKAGENAME="fffmeta-vm"
+else
+  echo "Environment ${lines[0]} not supported. Available: prod or vm"
+  exit 1
+fi
+
 echo "removing old build area"
-rm -rf /tmp/fffmeta-build-tmp
+rm -rf /tmp/$PACKAGENAME-build-tmp
 echo "creating new build area"
-mkdir  /tmp/fffmeta-build-tmp
+mkdir  /tmp/$PACKAGENAME-build-tmp
 ls
-cd     /tmp/fffmeta-build-tmp
+cd     /tmp/$PACKAGENAME-build-tmp
 mkdir BUILD
 mkdir RPMS
 TOPDIR=$PWD
@@ -160,13 +157,19 @@ ls
 pluginpath="/opt/fff/esplugins/"
 pluginname1="bigdesk"
 pluginfile1="lukas-vlcek-bigdesk-v2.4.0-2-g9807b92-mod.zip"
+pluginname2="head"
+pluginfile2="head-master.zip"
+pluginname3="HQ"
+pluginfile3="hq-master.zip"
+pluginname4="paramedic"
+pluginfile4="paramedic-master.zip"
 
 cd $TOPDIR
 # we are done here, write the specs and make the fu***** rpm
 cat > fffmeta.spec <<EOF
 Name: $PACKAGENAME
-Version: 1.5.3
-Release: 6
+Version: 1.6.0
+Release: 0
 Summary: hlt daemon
 License: gpl
 Group: DAQ
@@ -175,10 +178,11 @@ Source: none
 %define _topdir $TOPDIR
 BuildArch: $BUILD_ARCH
 AutoReqProv: no
-Requires:elasticsearch >= 1.2.0, hltd >= 1.5.3, cx_Oracle >= 5.1.2, java-1.7.0-openjdk
+Requires:elasticsearch >= 1.4.2, hltd >= 1.6.0, cx_Oracle >= 5.1.2, java-1.7.0-openjdk
 
 Provides:/opt/fff/configurefff.sh
 Provides:/opt/fff/setupmachine.py
+Provides:/opt/fff/instances.input
 Provides:/etc/init.d/fffmeta
 
 #Provides:/opt/fff/backup/elasticsearch.yml
@@ -203,10 +207,15 @@ mkdir -p opt/fff/esplugins
 mkdir -p opt/fff/backup
 mkdir -p etc/init.d/
 cp $BASEDIR/python/setupmachine.py %{buildroot}/opt/fff/setupmachine.py
+cp $BASEDIR/etc/instances.input %{buildroot}/opt/fff/instances.input
 echo "#!/bin/bash" > %{buildroot}/opt/fff/configurefff.sh
+echo python2.6 /opt/hltd/python/fillresources.py >>  %{buildroot}/opt/fff/configurefff.sh
 echo python2.6 /opt/fff/setupmachine.py elasticsearch,hltd $params >> %{buildroot}/opt/fff/configurefff.sh 
 
 cp $BASEDIR/esplugins/$pluginfile1 %{buildroot}/opt/fff/esplugins/$pluginfile1
+cp $BASEDIR/esplugins/$pluginfile2 %{buildroot}/opt/fff/esplugins/$pluginfile2
+cp $BASEDIR/esplugins/$pluginfile3 %{buildroot}/opt/fff/esplugins/$pluginfile3
+cp $BASEDIR/esplugins/$pluginfile4 %{buildroot}/opt/fff/esplugins/$pluginfile4
 cp $BASEDIR/esplugins/install.sh %{buildroot}/opt/fff/esplugins/install.sh
 cp $BASEDIR/esplugins/uninstall.sh %{buildroot}/opt/fff/esplugins/uninstall.sh
 
@@ -234,9 +243,13 @@ echo "fi"                                >> %{buildroot}/etc/init.d/fffmeta
 %attr( 755 ,root, root) /opt/fff/setupmachine.py
 %attr( 755 ,root, root) /opt/fff/setupmachine.pyc
 %attr( 755 ,root, root) /opt/fff/setupmachine.pyo
+%attr( 755 ,root, root) /opt/fff/instances.input
 %attr( 700 ,root, root) /opt/fff/configurefff.sh
 %attr( 755 ,root, root) /etc/init.d/fffmeta
 %attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile1
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile2
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile3
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile4
 %attr( 755 ,root, root) /opt/fff/esplugins/install.sh
 %attr( 755 ,root, root) /opt/fff/esplugins/uninstall.sh
 
@@ -254,10 +267,20 @@ python2.6 /opt/fff/setupmachine.py elasticsearch $params
 #update permissions in case new rpm changed uid/guid
 chown -R elasticsearch:elasticsearch /var/log/elasticsearch
 chown -R elasticsearch:elasticsearch /var/lib/elasticsearch
-echo /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1
-/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1
-echo /opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile1 $pluginname1
+
+#plugins
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1 > /dev/null
 /opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile1 $pluginname1
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname2 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile2 $pluginname2
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname3 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile3 $pluginname3
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname4 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile4 $pluginname4
+
 /sbin/service elasticsearch start
 chkconfig --del elasticsearch
 chkconfig --add elasticsearch
@@ -271,7 +294,11 @@ chkconfig --add elasticsearch
 
 %triggerin -- hltd
 #echo "triggered on hltd update or install"
+
 /sbin/service hltd stop || true
+/sbin/service soap2file stop || true
+rm -rf /etc/hltd.instances
+
 python2.6 /opt/fff/setupmachine.py restore,hltd
 python2.6 /opt/fff/setupmachine.py hltd $params
 
@@ -288,11 +315,14 @@ fi
 #set up resources for hltd
 /opt/hltd/python/fillresources.py
 
-/sbin/service hltd restart
+/sbin/service hltd restart || true
+/sbin/service soap2file restart || true
+
 chkconfig --del hltd
-#chkconfig --del soap2file
+chkconfig --del soap2file
+
 chkconfig --add hltd
-#chkconfig --add soap2file
+chkconfig --add soap2file
 %preun
 
 if [ \$1 == 0 ]; then 
@@ -300,12 +330,16 @@ if [ \$1 == 0 ]; then
   chkconfig --del fffmeta
   chkconfig --del elasticsearch
   chkconfig --del hltd
-#  chkconfig --del soap2file
+  chkconfig --del soap2file
+
+  /sbin/service hltd stop || true
 
   /sbin/service elasticsearch stop || true
   /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname2 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname3 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname4 || true
 
-  /sbin/service hltd stop || true
 
   python2.6 /opt/fff/setupmachine.py restore,hltd,elasticsearch
 fi
diff --git a/scripts/paramcache-vm b/scripts/paramcache-vm
index e70c022..170fc26 100755
--- a/scripts/paramcache-vm
+++ b/scripts/paramcache-vm
@@ -1,12 +1,12 @@
 vm
 http://cu-01.cern.ch:9200
-/opt/cmssw
+/opt/offline
 rcms-flightsim
 fffsetup
 rcms
 ominozzo2
 test
-bufu
+daqlocal
 1
 1
 INFO
diff --git a/scripts/tribe-metarpm.sh b/scripts/tribe-metarpm.sh
new file mode 100755
index 0000000..2521664
--- /dev/null
+++ b/scripts/tribe-metarpm.sh
@@ -0,0 +1,248 @@
+#!/bin/bash -e
+BUILD_ARCH=noarch
+SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd $SCRIPTDIR/..
+BASEDIR=$PWD
+
+PACKAGENAME="fffmeta-tribe"
+
+PARAMCACHE="paramcache"
+
+echo "Using cache file $PARAMCACHE"
+
+if [ -f $SCRIPTDIR/$PARAMCACHE ];
+then
+  readarray lines < $SCRIPTDIR/$PARAMCACHE
+  for (( i=0; i < 12; i++ ))
+  do
+    lines[$i]=`echo -n ${lines[$i]} | tr -d "\n"`
+  done
+else
+  for (( i=0; i < 12; i++ ))
+  do
+    lines[$i]=""
+  done
+fi
+
+echo "Enviroment (prod,vm) (press enter for \"${lines[0]}\"):"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[0]=$readin
+fi
+nousevar=$readin
+nousevar=$readin
+lines[1]="null"
+lines[2]="null"
+
+echo "HWCFG DB server (press enter for \"${lines[3]}\"):"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[3]=$readin
+fi
+
+echo "HWCFG DB SID (or db name in VM enviroment) (press enter for: \"${lines[4]}\"):"
+echo "(SPECIFIES address in TNSNAMES.ORA file if DB server field was \"null\"!)"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[4]=$readin
+fi
+
+echo "HWCFG DB username (press enter for: \"${lines[5]}\"):"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[5]=$readin
+fi
+
+echo "HWCFG DB password (press enter for: \"${lines[6]}\"):"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[6]=$readin
+fi
+
+echo "Equipment set (press enter for: \"${lines[7]}\") - type 'latest' or enter a specific one:"
+readin=""
+read readin
+if [ ${#readin} != "0" ]; then
+lines[7]=$readin
+fi
+
+lines[8]="null"
+lines[9]="null"
+lines[10]="null"
+lines[11]="null"
+
+params=""
+for (( i=0; i < 12; i++ ))
+do
+  params="$params ${lines[i]}"
+done
+
+# create a build area
+
+echo "removing old build area"
+rm -rf /tmp/fffmeta-tribe-build-tmp
+echo "creating new build area"
+mkdir  /tmp/fffmeta-tribe-build-tmp
+ls
+cd     /tmp/fffmeta-tribe-build-tmp
+mkdir BUILD
+mkdir RPMS
+TOPDIR=$PWD
+echo "working in $PWD"
+ls
+
+pluginpath="/opt/fff/esplugins/"
+pluginname1="bigdesk"
+pluginfile1="lukas-vlcek-bigdesk-v2.4.0-2-g9807b92-mod.zip"
+pluginname2="head"
+pluginfile2="head-master.zip"
+pluginname3="HQ"
+pluginfile3="hq-master.zip"
+pluginname4="paramedic"
+pluginfile4="paramedic-master.zip"
+
+cd $TOPDIR
+# we are done here, write the specs and make the fu***** rpm
+cat > fffmeta-tribe.spec <<EOF
+Name: $PACKAGENAME
+Version: 1.6.0
+Release: 0
+Summary: hlt daemon
+License: gpl
+Group: DAQ
+Packager: smorovic
+Source: none
+%define _topdir $TOPDIR
+BuildArch: $BUILD_ARCH
+AutoReqProv: no
+Requires:elasticsearch >= 1.4.2, cx_Oracle >= 5.1.2, java-1.7.0-openjdk, httpd >= 2.2.15, php >= 5.3.3, php-oci8 >= 1.4.9 
+
+Provides:/opt/fff/configurefff.sh
+Provides:/opt/fff/setupmachine.py
+Provides:/etc/init.d/fffmeta
+
+%description
+fffmeta configuration setup package
+
+%prep
+%build
+
+%install
+rm -rf \$RPM_BUILD_ROOT
+mkdir -p \$RPM_BUILD_ROOT
+%__install -d "%{buildroot}/opt/fff"
+%__install -d "%{buildroot}/opt/fff/backup"
+%__install -d "%{buildroot}/opt/fff/esplugins"
+%__install -d "%{buildroot}/etc/init.d"
+
+mkdir -p opt/fff/esplugins
+mkdir -p opt/fff/backup
+mkdir -p etc/init.d/
+cp $BASEDIR/python/setupmachine.py %{buildroot}/opt/fff/setupmachine.py
+echo "#!/bin/bash" > %{buildroot}/opt/fff/configurefff.sh
+echo python2.6 /opt/fff/setupmachine.py elasticsearch,web $params >> %{buildroot}/opt/fff/configurefff.sh 
+
+cp $BASEDIR/esplugins/$pluginfile1 %{buildroot}/opt/fff/esplugins/$pluginfile1
+cp $BASEDIR/esplugins/$pluginfile2 %{buildroot}/opt/fff/esplugins/$pluginfile2
+cp $BASEDIR/esplugins/$pluginfile3 %{buildroot}/opt/fff/esplugins/$pluginfile3
+cp $BASEDIR/esplugins/$pluginfile4 %{buildroot}/opt/fff/esplugins/$pluginfile4
+cp $BASEDIR/esplugins/install.sh %{buildroot}/opt/fff/esplugins/install.sh
+cp $BASEDIR/esplugins/uninstall.sh %{buildroot}/opt/fff/esplugins/uninstall.sh
+
+echo "#!/bin/bash"                       >> %{buildroot}/etc/init.d/fffmeta
+echo "#"                                 >> %{buildroot}/etc/init.d/fffmeta
+echo "# chkconfig:   2345 79 22"         >> %{buildroot}/etc/init.d/fffmeta
+echo "#"                                 >> %{buildroot}/etc/init.d/fffmeta
+echo "if [ \\\$1 == \"start\" ]; then"   >> %{buildroot}/etc/init.d/fffmeta
+echo "  /opt/fff/configurefff.sh"  >> %{buildroot}/etc/init.d/fffmeta
+echo "  exit 0"                          >> %{buildroot}/etc/init.d/fffmeta
+echo "fi"                                >> %{buildroot}/etc/init.d/fffmeta
+echo "if [ \\\$1 == \"restart\" ]; then" >> %{buildroot}/etc/init.d/fffmeta
+echo "/opt/fff/configurefff.sh"    >> %{buildroot}/etc/init.d/fffmeta
+echo "  exit 0"                          >> %{buildroot}/etc/init.d/fffmeta
+echo "fi"                                >> %{buildroot}/etc/init.d/fffmeta
+echo "if [ \\\$1 == \"status\" ]; then"  >> %{buildroot}/etc/init.d/fffmeta
+echo "echo fffmeta does not have status" >> %{buildroot}/etc/init.d/fffmeta
+echo "  exit 0"                          >> %{buildroot}/etc/init.d/fffmeta
+echo "fi"                                >> %{buildroot}/etc/init.d/fffmeta
+
+
+%files
+%defattr(-, root, root, -)
+#/opt/fff
+%attr( 755 ,root, root) /opt/fff/setupmachine.py
+%attr( 755 ,root, root) /opt/fff/setupmachine.pyc
+%attr( 755 ,root, root) /opt/fff/setupmachine.pyo
+%attr( 700 ,root, root) /opt/fff/configurefff.sh
+%attr( 755 ,root, root) /etc/init.d/fffmeta
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile1
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile2
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile3
+%attr( 444 ,root, root) /opt/fff/esplugins/$pluginfile4
+%attr( 755 ,root, root) /opt/fff/esplugins/install.sh
+%attr( 755 ,root, root) /opt/fff/esplugins/uninstall.sh
+
+%post
+#echo "post install trigger"
+chkconfig --del fffmeta
+chkconfig --add fffmeta
+#disabled, can be run manually for now
+
+%triggerin -- elasticsearch
+#echo "triggered on elasticsearch update or install"
+/sbin/service elasticsearch stop
+python2.6 /opt/fff/setupmachine.py restore,elasticsearch
+python2.6 /opt/fff/setupmachine.py elasticsearch,web $params
+#update permissions in case new rpm changed uid/guid
+chown -R elasticsearch:elasticsearch /var/log/elasticsearch
+chown -R elasticsearch:elasticsearch /var/lib/elasticsearch
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile1 $pluginname1
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname2 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile2 $pluginname2
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname3 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile3 $pluginname3
+
+/opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname4 > /dev/null
+/opt/fff/esplugins/install.sh /usr/share/elasticsearch $pluginfile4 $pluginname4
+
+chkconfig --del elasticsearch
+chkconfig --add elasticsearch
+chkconfig --add httpd
+#todo:kill java process if running to have clean restart
+/sbin/service elasticsearch start
+/sbin/service httpd restart || true
+
+%preun
+
+if [ \$1 == 0 ]; then 
+
+  chkconfig --del fffmeta
+  chkconfig --del elasticsearch
+  chkconfig --del httpd
+
+  /sbin/service elasticsearch stop || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname1 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname2 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname3 || true
+  /opt/fff/esplugins/uninstall.sh /usr/share/elasticsearch $pluginname4 || true
+  /sbin/service httpd stop || true
+
+
+  python2.6 /opt/fff/setupmachine.py restore,elasticsearch
+fi
+
+#%verifyscript
+
+EOF
+
+rpmbuild --target noarch --define "_topdir `pwd`/RPMBUILD" -bb fffmeta-tribe.spec
+
diff --git a/scripts/unmountloopfs.sh b/scripts/unmountloopfs.sh
new file mode 100755
index 0000000..7079446
--- /dev/null
+++ b/scripts/unmountloopfs.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+if [ -n "$1" ]; then
+  if [ -d $1 ]; then
+
+    basedir=`readlink -e $1`
+    umask 0
+    points=`mount | grep $basedir/ | grep /dev/loop | awk '{print $3}'`
+    imgs=`mount | grep $basedir/ | grep /dev/loop | awk '{print $1}'`
+    pointarr=( $points )
+    imgarr=( $imgs )
+
+    len=${#pointarr[@]}
+    len2=${#imgarr[@]}
+    if [[ $len == 0 ]]; then
+      exit 0
+    fi
+    max=$((len))
+
+    for i in $(seq 0 1 $max)
+    do
+      if [ $i == $max ]; then continue; fi
+      point=${pointarr[$i]}
+      image=${imgarr[$i]}
+      #protect from dangerous action
+      if [ $point == "/" ]; then continue; fi
+      if [ $point == "//" ]; then continue; fi
+      if [ $point == "/fff" ]; then continue; fi
+      if [ $point == "/fff/" ]; then continue; fi
+      if [ $point == "/fff/ramdisk" ]; then continue; fi
+      if [ $point == "/fff/ramdisk/" ]; then continue; fi
+      if [ $point == "fff/ramdisk" ]; then continue; fi
+      if [ $point == "fff/ramdisk/" ]; then continue; fi
+
+      echo "found mountpoint $point $image"
+      #kill any processes that might use the mount point and remove from NFS
+      fuser -km $point
+      #unmunt loop device
+      sleep 0.2
+      exportfs -u *:$point
+      umount $point
+      if [ $? != 0 ]; then
+        sleep 0.1
+        fuser -km $point
+        sleep 0.2
+        exportfs -u *:$point
+        umount $point
+        if [ $? != 0 ]; then
+          echo "Unsuccessful unmount of $point !"
+          exit 1
+        fi
+      fi
+
+      #deleting mount point
+      exportfs -u *:$point
+      rm -rf $point
+      if [ $? != 0 ]; then
+        echo "Unsuccessful delete of unmounted mount point $point !"
+        exit 2
+      fi
+
+      #remove image
+      chmod 755 $image
+      rm -rf $image
+      if [ $? != 0 ]; then
+        echo "Unsuccessful delete of image file $image"
+        exit 3
+      fi
+    done
+    exit 0
+  else
+    echo "base directory not found!"
+  fi
+fi
+exit 1
diff --git a/test/crashtest.py b/test/crashtest.py
index 52d8d46..72b6d49 100644
--- a/test/crashtest.py
+++ b/test/crashtest.py
@@ -88,7 +88,7 @@ def process(self):
     dirname = sys.argv[1]
     dirname = os.path.basename(os.path.normpath(dirname))
     watchDir = os.path.join(conf.watch_directory,dirname)
-    outputDir = conf.micromerge_output
+    #outputDir = conf.micromerge_output
 
     
 
@@ -119,4 +119,4 @@ def process(self):
     notifier.stop()
 
     print "Quit"
-    sys.exit(0)
\ No newline at end of file
+    sys.exit(0)