diff --git a/ConfigurationSystem/Client/Helpers/Operations.py b/ConfigurationSystem/Client/Helpers/Operations.py index e0560900dfd..a2953be2e78 100644 --- a/ConfigurationSystem/Client/Helpers/Operations.py +++ b/ConfigurationSystem/Client/Helpers/Operations.py @@ -9,7 +9,7 @@ class Operations( object ): __cache = {} __cacheVersion = 0 - __cacheLock = LockRing.LockRing().getLock( "CSOperations.cache" ) + __cacheLock = LockRing.LockRing().getLock() def __init__( self, vo = False, group = False, setup = False ): self.__uVO = vo diff --git a/ConfigurationSystem/Client/Helpers/Registry.py b/ConfigurationSystem/Client/Helpers/Registry.py index 39880319c62..9da94c0b631 100644 --- a/ConfigurationSystem/Client/Helpers/Registry.py +++ b/ConfigurationSystem/Client/Helpers/Registry.py @@ -25,25 +25,31 @@ def getDNForUsername( username ): return S_OK( dnList ) return S_ERROR( "No DN found for user %s" % username ) -def getGroupsForUser( username ): +def getGroupsForDN( dn ): + retVal = getUsernameForDN( dn ) + if not retVal[ 'OK' ]: + return retVal + return getGroupsForUser( retVal[ 'Value' ] ) + +def __getGroupsWithProperty( property,value ): retVal = gConfig.getSections( "%s/Groups" % gBaseSecuritySection ) if not retVal[ 'OK' ]: return retVal groupsList = retVal[ 'Value' ] - userGroups = [] + groups = [] for group in groupsList: - if username in gConfig.getValue( "%s/Groups/%s/Users" % ( gBaseSecuritySection, group ), [] ): - userGroups.append( group ) - if not userGroups: - return S_ERROR( "No groups found for user %s" % username ) - userGroups.sort() - return S_OK( userGroups ) + if value in gConfig.getValue( "%s/Groups/%s/%s" % ( gBaseSecuritySection, group, property ), [] ): + groups.append( group ) + if not groups: + return S_ERROR( "No groups found for %s=%s" % ( property,value ) ) + groups.sort() + return S_OK( groups ) -def getGroupsForDN( dn ): - retVal = getUsernameForDN( dn ) - if not retVal[ 'OK' ]: - return retVal - return getGroupsForUser( retVal[ 'Value' ] ) +def getGroupsForUser( username ): + return __getGroupsWithProperty( 'Users',username ) + +def getGroupsForVO( vo ): + return __getGroupsWithProperty( 'VO',vo ) def getHostnameForDN( dn ): retVal = gConfig.getSections( "%s/Hosts" % gBaseSecuritySection ) diff --git a/ConfigurationSystem/Client/Helpers/Resources.py b/ConfigurationSystem/Client/Helpers/Resources.py index d887c8c277c..3b46eef085e 100644 --- a/ConfigurationSystem/Client/Helpers/Resources.py +++ b/ConfigurationSystem/Client/Helpers/Resources.py @@ -59,6 +59,70 @@ def getStorageElementOptions( seName ): return S_OK( options ) +def getQueues( siteList=None, ceList=None, ceTypeList=None, community=None, mode=None ): + """ Get CE/queue options according to the specified selection + """ + + result = gConfig.getSections('/Resources/Sites') + if not result['OK']: + return result + + resultDict = {} + + grids = result['Value'] + for grid in grids: + result = gConfig.getSections( '/Resources/Sites/%s' % grid ) + if not result['OK']: + continue + sites = result['Value'] + for site in sites: + if siteList is not None and not site in siteList: + continue + if community: + comList = gConfig.getValue( '/Resources/Sites/%s/%s/VO' % (grid,site), [] ) + if comList and not community in comList: + continue + result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs' % (grid,site) ) + if not result['OK']: + continue + ces = result['Value'] + for ce in ces: + if mode: + ceMode = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/SubmissionMode' % (grid,site,ce), 'InDirect' ) + if not ceMode or ceMode != mode: + continue + if ceTypeList: + ceType = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/CEType' % (grid,site,ce), None ) + if not ceType or not ceType in ceTypeList: + continue + if community: + comList = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/VO' % (grid,site,ce), [] ) + if comList and not community in comList: + continue + result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s' % (grid,site,ce) ) + if not result['OK']: + continue + ceOptionsDict = result['Value'] + result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs/%s/Queues' % (grid,site,ce) ) + if not result['OK']: + continue + queues = result['Value'] + for queue in queues: + if community: + comList = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/Queues/%s/VO' % (grid,site,ce,queue), [] ) + if comList and not community in comList: + continue + resultDict.setdefault(site,{}) + resultDict[site].setdefault(ce,ceOptionsDict) + resultDict[site][ce].setdefault('Queues',{}) + result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s/Queues/%s' % (grid,site,ce,queue) ) + if not result['OK']: + continue + queueOptionsDict = result['Value'] + resultDict[site][ce]['Queues'][queue] = queueOptionsDict + + return S_OK(resultDict) + def getCatalogPath(catalogName): """ Return the configuration path of the description for a a given catalog """ diff --git a/ConfigurationSystem/private/ConfigurationData.py b/ConfigurationSystem/private/ConfigurationData.py index 6ef51b818a6..34495fc2a13 100755 --- a/ConfigurationSystem/private/ConfigurationData.py +++ b/ConfigurationSystem/private/ConfigurationData.py @@ -17,9 +17,9 @@ class ConfigurationData: def __init__( self, loadDefaultCFG = True ): lr = LockRing() - self.threadingEvent = lr.getEvent( "configdata.dangerZone" ) + self.threadingEvent = lr.getEvent() self.threadingEvent.set() - self.threadingLock = lr.getLock( "configdata.update" ) + self.threadingLock = lr.getLock() self.runningThreadsNumber = 0 self.compressedConfigurationData = "" self.configurationPath = "/DIRAC/Configuration" diff --git a/ConfigurationSystem/private/Refresher.py b/ConfigurationSystem/private/Refresher.py index 9a042eb6574..1ebddac7edd 100755 --- a/ConfigurationSystem/private/Refresher.py +++ b/ConfigurationSystem/private/Refresher.py @@ -39,7 +39,7 @@ def __init__( self ): self.__callbacks = { 'newVersion' : [] } gEventDispatcher.registerEvent( "CSNewVersion" ) random.seed() - self.__triggeredRefreshLock = LockRing.LockRing().getLock( "Refresher.triggerUpdate" ) + self.__triggeredRefreshLock = LockRing.LockRing().getLock() def disable( self ): self.__refreshEnabled = False diff --git a/Core/DISET/private/GatewayService.py b/Core/DISET/private/GatewayService.py index 57cade8a8dd..f6012e7d4c2 100644 --- a/Core/DISET/private/GatewayService.py +++ b/Core/DISET/private/GatewayService.py @@ -387,7 +387,7 @@ def forwardListBulk( self, clientFileHelper, params ): class MessageForwarder: def __init__( self, msgBroker ): - self.__inOutLock = LockRing().getLock( "DISET.MsgFwd.IO" ) + self.__inOutLock = LockRing().getLock() self.__msgBroker = msgBroker self.__byClient = {} self.__srvToCliTrid = {} diff --git a/Core/DISET/private/Transports/SSL/SocketInfo.py b/Core/DISET/private/Transports/SSL/SocketInfo.py index adb089d1a9a..fad98b91367 100755 --- a/Core/DISET/private/Transports/SSL/SocketInfo.py +++ b/Core/DISET/private/Transports/SSL/SocketInfo.py @@ -16,7 +16,7 @@ class SocketInfo: __cachedCAsCRLs = False __cachedCAsCRLsLastLoaded = 0 - __cachedCAsCRLsLoadLock = LockRing().getLock( "DISET.SocketInfo.CAs" ) + __cachedCAsCRLsLoadLock = LockRing().getLock() def __init__( self, infoDict, sslContext = False ): @@ -162,7 +162,7 @@ def __getCAStore( self ): if fileName.find( ".0" ) == len( fileName ) - 2: gLogger.exception( "LOADING %s" % filePath ) if 'IgnoreCRLs' not in self.infoDict or not self.infoDict[ 'IgnoreCRLs' ]: - #Try to load CRL + #Try to load CRL try: crl = GSI.crypto.load_crl( GSI.crypto.FILETYPE_PEM, pemData ) if crl.has_expired(): diff --git a/Core/DISET/private/Transports/SSL/ThreadSafeSSLObject.py b/Core/DISET/private/Transports/SSL/ThreadSafeSSLObject.py index 5514e4d5883..c028f80e4c8 100755 --- a/Core/DISET/private/Transports/SSL/ThreadSafeSSLObject.py +++ b/Core/DISET/private/Transports/SSL/ThreadSafeSSLObject.py @@ -6,7 +6,7 @@ from DIRAC.FrameworkSystem.Client.Logger import gLogger class ThreadSafeSSLObject: - cLock = LockRing().getLock( "DISET.TSSSLObj" ) + cLock = LockRing().getLock() def __init__( self, object ): self.cObject = object def __getattr__( self, name ): diff --git a/Core/DISET/private/Transports/SSLTransport.py b/Core/DISET/private/Transports/SSLTransport.py index f8a74d7aace..613f19ccde1 100755 --- a/Core/DISET/private/Transports/SSLTransport.py +++ b/Core/DISET/private/Transports/SSLTransport.py @@ -18,7 +18,7 @@ class SSLTransport( BaseTransport ): - __readWriteLock = LockRing().getLock( "DISET.SSLTrans.RW" ) + __readWriteLock = LockRing().getLock() def __init__( self, *args, **kwargs ): self.__writesDone = 0 diff --git a/Core/Utilities/InstallTools.py b/Core/Utilities/InstallTools.py index eb6d6587f3e..3a9943079d7 100644 --- a/Core/Utilities/InstallTools.py +++ b/Core/Utilities/InstallTools.py @@ -71,7 +71,7 @@ from DIRAC.Core.Utilities.CFG import CFG from DIRAC.Core.Utilities.Version import getVersion from DIRAC.ConfigurationSystem.Client.CSAPI import CSAPI -from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath, cfgPathToList, cfgInstallPath, cfgInstallSection, ResourcesDefaults +from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath, cfgPathToList, cfgInstallPath, cfgInstallSection, ResourcesDefaults, CSGlobals from DIRAC.Core.Security.Properties import * # On command line tools this can be set to True to abort after the first error. @@ -464,7 +464,8 @@ def addOptionToDiracCfg( option, value ): return S_ERROR( 'Could not merge %s=%s with local configuration' % ( option, value ) ) def addDefaultOptionsToCS( gConfig, componentType, systemName, - component, extensions, mySetup = setup, overwrite = False ): + component, extensions, mySetup = setup, + specialOptions={}, overwrite = False ): """ Add the section with the component options to the CS """ system = systemName.replace( 'System', '' ) @@ -493,9 +494,7 @@ def addDefaultOptionsToCS( gConfig, componentType, systemName, return S_OK( 'Component options already exist' ) # Add the component options now - # print "AT >>>", componentType, system, component, compInstance, extensions - result = getComponentCfg( componentType, system, component, compInstance, extensions ) - # print result + result = getComponentCfg( componentType, system, component, compInstance, extensions, specialOptions ) if not result['OK']: return result compCfg = result['Value'] @@ -551,7 +550,7 @@ def addCfgToComponentCfg( componentType, systemName, component, cfg ): gLogger.error( error ) return S_ERROR( error ) -def getComponentCfg( componentType, system, component, compInstance, extensions ): +def getComponentCfg( componentType, system, component, compInstance, extensions, specialOptions={} ): """ Get the CFG object of the component configuration """ @@ -562,9 +561,11 @@ def getComponentCfg( componentType, system, component, compInstance, extensions sectionName = 'Executors' extensionsDIRAC = [ x + 'DIRAC' for x in extensions ] + extensions + componentModule = component + if "Module" in specialOptions: + componentModule = specialOptions['Module'] compCfg = CFG() - for ext in extensionsDIRAC + ['DIRAC']: cfgTemplatePath = os.path.join( rootPath, ext, '%sSystem' % system, 'ConfigTemplate.cfg' ) if os.path.exists( cfgTemplatePath ): @@ -573,7 +574,16 @@ def getComponentCfg( componentType, system, component, compInstance, extensions loadCfg = CFG() loadCfg.loadFromFile( cfgTemplatePath ) compCfg = loadCfg.mergeWith( compCfg ) - + try: + compCfg = loadCfg[sectionName][componentModule] + # section found + break + except Exception: + error = 'Can not find %s in template' % cfgPath( sectionName, componentModule ) + gLogger.error( error ) + if exitOnError: + DIRAC.exit( -1 ) + return S_ERROR( error ) compPath = cfgPath( sectionName, component ) if not compCfg.isSection( compPath ): @@ -592,6 +602,9 @@ def getComponentCfg( componentType, system, component, compInstance, extensions cfg = __getCfg( sectionPath ) cfg.createNewSection( cfgPath( sectionPath, component ), '', compCfg ) + for option,value in specialOptions.items(): + cfg.setOption( cfgPath( sectionPath, component, option ), value ) + # Add the service URL if componentType == "service": port = compCfg.getOption( 'Port' , 0 ) @@ -707,6 +720,7 @@ def getSoftwareComponents( extensions ): # The Gateway does not need a handler services = { 'Framework' : ['Gateway'] } agents = {} + executors = {} for extension in ['DIRAC'] + [ x + 'DIRAC' for x in extensions]: if not os.path.exists( os.path.join( rootPath, extension ) ): @@ -746,6 +760,7 @@ def getSoftwareComponents( extensions ): resultDict = {} resultDict['Services'] = services resultDict['Agents'] = agents + resultDict['Executors'] = executors return S_OK( resultDict ) def getInstalledComponents(): @@ -756,6 +771,7 @@ def getInstalledComponents(): services = {} agents = {} + executors = {} systemList = os.listdir( runitDir ) for system in systemList: systemDir = os.path.join( runitDir, system ) @@ -774,12 +790,17 @@ def getInstalledComponents(): if not agents.has_key( system ): agents[system] = [] agents[system].append( component ) + elif body.find( 'dirac-executor' ) != -1: + if not executors.has_key( system ): + executors[system] = [] + executors[system].append( component ) except IOError: pass resultDict = {} resultDict['Services'] = services resultDict['Agents'] = agents + resultDict['Executors'] = executors return S_OK( resultDict ) def getSetupComponents(): @@ -789,6 +810,7 @@ def getSetupComponents(): services = {} agents = {} + executors = {} if not os.path.isdir( startDir ): return S_ERROR( 'Startup Directory does not exit: %s' % startDir ) componentList = os.listdir( startDir ) @@ -808,12 +830,18 @@ def getSetupComponents(): if not agents.has_key( system ): agents[system] = [] agents[system].append( agent ) + elif body.find( 'dirac-executor' ) != -1: + system, executor = component.split( '_' ) + if not executorss.has_key( system ): + executors[system] = [] + executors[system].append( agent ) except IOError: pass resultDict = {} resultDict['Services'] = services resultDict['Agents'] = agents + resultDict['Executors'] = executors return S_OK( resultDict ) def getStartupComponentStatus( componentTupleList ): @@ -882,6 +910,18 @@ def getStartupComponentStatus( componentTupleList ): return S_OK( componentDict ) +def getComponentModule( gConfig,system,component,compType ): + """ Get the component software module + """ + setup = CSGlobals.getSetup() + instance = gConfig.getValue( cfgPath( 'DIRAC', 'Setups', setup, system ),'' ) + if not instance: + return S_OK(component) + module = gConfig.getValue( cfgPath( 'Systems',system,instance,compType,component,'Module' ),'' ) + if not module: + module = component + return S_OK(module) + def getOverallStatus( extensions ): """ Get the list of all the components ( services and agents ) set up for running with runsvdir in startup directory @@ -908,8 +948,8 @@ def getOverallStatus( extensions ): runitDict = result['Value'] # Collect the info now - resultDict = {'Services':{}, 'Agents':{}} - for compType in ['Services', 'Agents']: + resultDict = {'Services':{}, 'Agents':{}, 'Executors':{} } + for compType in ['Services', 'Agents', 'Executors' ]: if softDict.has_key( 'Services' ): for system in softDict[compType]: resultDict[compType][system] = {} @@ -944,6 +984,36 @@ def getOverallStatus( extensions ): #print str(x) pass + # Installed components can be not the same as in the software list + if installedDict.has_key( 'Services' ): + for system in installedDict[compType]: + for component in installedDict[compType][system]: + if compType in resultDict: + if system in resultDict[compType]: + if component in resultDict[compType][system]: + continue + resultDict[compType][system][component] = {} + resultDict[compType][system][component]['Setup'] = False + resultDict[compType][system][component]['Installed'] = True + resultDict[compType][system][component]['RunitStatus'] = 'Unknown' + resultDict[compType][system][component]['Timeup'] = 0 + resultDict[compType][system][component]['PID'] = 0 + # TODO: why do we need a try here? + try: + if component in setupDict[compType][system]: + resultDict[compType][system][component]['Setup'] = True + except Exception: + pass + try: + compDir = system + '_' + component + if runitDict.has_key( compDir ): + resultDict[compType][system][component]['RunitStatus'] = runitDict[compDir]['RunitStatus'] + resultDict[compType][system][component]['Timeup'] = runitDict[compDir]['Timeup'] + resultDict[compType][system][component]['PID'] = runitDict[compDir]['PID'] + except Exception, x: + #print str(x) + pass + return S_OK( resultDict ) def checkComponentSoftware( componentType, system, component, extensions ): @@ -1317,11 +1387,14 @@ def _createRunitLog( runitCompDir ): os.chmod( logRunFile, gDefaultPerms ) -def installComponent( componentType, system, component, extensions ): +def installComponent( componentType, system, component, extensions, componentModule='' ): """ Install runit directory for the specified component """ # Check that the software for the component is installed - if not checkComponentSoftware( componentType, system, component, extensions )['OK']: + cModule = componentModule + if not cModule: + cModule = component + if not checkComponentSoftware( componentType, system, cModule, extensions )['OK']: error = 'Software for %s %s/%s is not installed' % ( componentType, system, component ) if exitOnError: gLogger.error( error ) @@ -1357,14 +1430,12 @@ def installComponent( componentType, system, component, extensions ): # [ "%(componentType)s" = "agent" ] && renice 20 -p $$ # -exec python %(DIRAC)s/DIRAC/Core/scripts/dirac-%(componentType)s.py %(system)s/%(component)s %(componentCfg)s -o LogLevel=%(logLevel)s < /dev/null +exec dirac-%(componentType)s %(system)s/%(component)s %(componentCfg)s < /dev/null """ % {'bashrc': os.path.join( instancePath, 'bashrc' ), - 'DIRAC': linkedRootPath, 'componentType': componentType, 'system' : system, 'component': component, - 'componentCfg': componentCfg, - 'logLevel': logLevel } ) + 'componentCfg': componentCfg } ) fd.close() os.chmod( runFile, gDefaultPerms ) @@ -1382,11 +1453,11 @@ def installComponent( componentType, system, component, extensions ): return S_OK( runitCompDir ) -def setupComponent( componentType, system, component, extensions ): +def setupComponent( componentType, system, component, extensions, componentModule='' ): """ Install and create link in startup """ - result = installComponent( componentType, system, component, extensions ) + result = installComponent( componentType, system, component, extensions, componentModule ) if not result['OK']: return result diff --git a/Core/Utilities/LockRing.py b/Core/Utilities/LockRing.py index 557e50c565c..912ed795499 100644 --- a/Core/Utilities/LockRing.py +++ b/Core/Utilities/LockRing.py @@ -17,10 +17,10 @@ def __init__( self ): self.__events = {} def __genName( self, container ): - name = md5( str( time.time() + random.random() ) ).hexdump() + name = md5( str( time.time() + random.random() ) ).hexdigest() retries = 10 while name in container and retries: - name = md5( str( time.time() + random.random() ) ).hexdump() + name = md5( str( time.time() + random.random() ) ).hexdigest() retries -= 1 return name @@ -60,11 +60,11 @@ def release( self, lockName ): return S_OK() def _openAll( self ): - """ - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + """ + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING DO NOT USE EXCEPT IN JUST SPAWNED NEW CHILD PROCESSES!!!!!!!! NEVER IN THE PARENT PROCESS!!!!!! - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING """ for lockName in self.__locks.keys(): try: @@ -73,11 +73,11 @@ def _openAll( self ): pass def _setAllEvents( self ): - """ - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + """ + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING DO NOT USE EXCEPT IN JUST SPAWNED NEW CHILD PROCESSES!!!!!!!! NEVER IN THE PARENT PROCESS!!!!!! - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING """ for evName in self.__events.keys(): try: diff --git a/Core/Utilities/ProcessPool.py b/Core/Utilities/ProcessPool.py index 9e1edbe218e..1dddd743d85 100644 --- a/Core/Utilities/ProcessPool.py +++ b/Core/Utilities/ProcessPool.py @@ -145,11 +145,6 @@ def __init__( self, pendingQueue, resultsQueue ): """ multiprocessing.Process.__init__( self ) self.daemon = True - if LockRing: - #Reset all locks - lr = LockRing() - lr._openAll() - lr._setAllEvents() self.__working = multiprocessing.Value( 'i', 0 ) self.__pendingQueue = pendingQueue self.__resultsQueue = resultsQueue @@ -170,12 +165,12 @@ def run( self ): :param self: self reference """ + if LockRing: + # Reset all locks + lr = LockRing() + lr._openAll() + lr._setAllEvents() while True: - if LockRing: - # Reset all locks - lr = LockRing() - lr._openAll() - lr._setAllEvents() try: task = self.__pendingQueue.get( block = True, timeout = 10 ) except Queue.Empty: @@ -185,10 +180,10 @@ def run( self ): self.__working.value = 1 try: task.process() + if task.hasCallback() or task.usePoolCallbacks(): + self.__resultsQueue.put( task, block = True ) finally: self.__working.value = 0 - if task.hasCallback() or task.usePoolCallbacks(): - self.__resultsQueue.put( task, block = True ) class BulletTask: """ dum-dum bullet """ @@ -250,6 +245,7 @@ def __call__( self ): self.__taskID = taskID self.__resultCallback = callback self.__exceptionCallback = exceptionCallback + self.__timeOut = 0 ## set time out self.setTimeOut( timeOut ) self.__done = False @@ -362,7 +358,6 @@ def doCallback( self ): if self.__done and not self.__exceptionRaised and self.__resultCallback: self.__resultCallback( self, self.__taskResult ) - def process( self ): """ execute task @@ -437,7 +432,9 @@ def __init__( self, minSize = 2, maxSize = 0, maxQueuedRequests = 10, self.__pendingQueue = multiprocessing.Queue( self.__maxQueuedRequests ) self.__resultsQueue = multiprocessing.Queue( 0 ) self.__prListLock = threading.Lock() - self.__workingProcessList = [] + + self.__workersDict = {} + self.__draining = False self.__bullet = BulletTask() self.__bulletCounter = 0 @@ -482,9 +479,7 @@ def getNumWorkingProcesses( self ): counter = 0 self.__prListLock.acquire() try: - for proc in self.__workingProcessList: - if proc.isWorking(): - counter += 1 + counter = len( [ pid for pid, worker in self.__workersDict.items() if worker.isWorking() ] ) finally: self.__prListLock.release() return counter @@ -497,9 +492,7 @@ def getNumIdleProcesses( self ): counter = 0 self.__prListLock.acquire() try: - for proc in self.__workingProcessList: - if not proc.isWorking(): - counter += 1 + counter = len( [ pid for pid, worker in self.__workersDict.items() if not worker.isWorking() ] ) finally: self.__prListLock.release() return counter @@ -516,21 +509,28 @@ def __spawnWorkingProcess( self ): :param self: self reference """ - self.__workingProcessList.append( WorkingProcess( self.__pendingQueue, self.__resultsQueue ) ) + self.__prListLock.acquire() + try: + worker = WorkingProcess( self.__pendingQueue, self.__resultsQueue ) + while worker.pid == None: + time.sleep(0.1) + self.__workersDict[ worker.pid ] = worker + finally: + self.__prListLock.release() def __killWorkingProcess( self ): """ suspend execution of WorkingProcesses exceeding queue limits :param self: self reference """ - try: - self.__pendingQueue.put( self.__bullet, block = True ) - except Queue.Full: - return S_ERROR( "Queue is full" ) self.__prListLock.acquire() try: self.__bulletCounter += 1 + self.__pendingQueue.put( self.__bullet, block = True ) + except Queue.Full: + self.__bulletCounter -= 1 finally: self.__prListLock.release() + self.__cleanDeadProcesses() def __cleanDeadProcesses( self ): @@ -538,13 +538,10 @@ def __cleanDeadProcesses( self ): ## check wounded processes self.__prListLock.acquire() try: - stillAlive = [] - for workingProcess in self.__workingProcessList: - if workingProcess.is_alive(): - stillAlive.append( workingProcess ) - else: + for pid, worker in self.__workersDict.items(): + if not worker.is_alive(): self.__bulletCounter -= 1 - self.__workingProcessList = stillAlive + del self.__workersDict[pid] finally: self.__prListLock.release() @@ -557,12 +554,12 @@ def __spawnNeededWorkingProcesses( self ): # If we are draining do not spawn processes if self.__draining: return - while len( self.__workingProcessList ) < self.__minSize: + while len( self.__workersDict ) < self.__minSize: self.__spawnWorkingProcess() while self.hasPendingTasks() and \ self.getNumIdleProcesses() == 0 and \ - len( self.__workingProcessList ) < self.__maxSize: + len( self.__workersDict ) < self.__maxSize: self.__spawnWorkingProcess() time.sleep( 0.1 ) @@ -572,7 +569,7 @@ def __killExceedingWorkingProcesses( self ): :param self: self reference """ self.__cleanDeadProcesses() - toKill = max( len( self.__workingProcessList ) - self.__maxSize, 0 ) + toKill = max( len( self.__workersDict ) - self.__maxSize, 0 ) while toKill: self.__killWorkingProcess() toKill = toKill - 1 @@ -588,19 +585,24 @@ def queueTask( self, task, blocking = True, usePoolCallbacks= False ): :param ProcessTask task: new task to execute :param bool blocking: flag to block if necessary and new empty slot is available (default = block) :param bool usePoolCallbacks: flag to trigger execution of pool callbacks (default = don't execute) - """ if not isinstance( task, ProcessTask ): raise TypeError( "Tasks added to the process pool must be ProcessTask instances" ) + if usePoolCallbacks and ( self.__poolCallback or self.__poolExceptionCallback ): + task.enablePoolCallbacks() + + self.__prListLock.acquire() try: - if usePoolCallbacks and ( self.__poolCallback or self.__poolExceptionCallback ): - task.enablePoolCallbacks() self.__pendingQueue.put( task, block = blocking ) except Queue.Full: + self.__prListLock.release() return S_ERROR( "Queue is full" ) + finally: + self.__prListLock.release() + self.__spawnNeededWorkingProcesses() # Throttle a bit to allow task state propagation - time.sleep( 0.01 ) + time.sleep( 0.1 ) return S_OK() def createAndQueueTask( self, @@ -697,18 +699,18 @@ def finalize( self, timeout = 10 ): :param self: self reference :param timeout: seconds to wait before killing """ - #Process all tasks + # Process all tasks self.processAllResults() - #Drain via bullets processes + # Drain via bullets processes self.__draining = True try: - bullets = len( self.__workingProcessList ) - self.__bulletCounter + bullets = len( self.__workersDict ) - self.__bulletCounter while bullets: self.__killWorkingProcess() bullets = bullets - 1 start = time.time() self.__cleanDeadProcesses() - while len( self.__workingProcessList ) > 0: + while len( self.__workersDict ) > 0: if timeout <= 0 or time.time() - start >= timeout: break time.sleep( 0.1 ) @@ -717,9 +719,10 @@ def finalize( self, timeout = 10 ): pass #self.__draining = False # terminate them as it should be done - for wp in self.__workingProcessList: - if wp.is_alive(): - wp.terminate() + for worker in self.__workersDict.values(): + if worker.is_alive(): + worker.terminate() + worker.join() self.__cleanDeadProcesses() # Kill 'em all!! self.__filicide() @@ -727,17 +730,13 @@ def finalize( self, timeout = 10 ): def __filicide( self ): """ Kill all children (processes :P) Kill'em all! ...and justice for all! """ - wpL = [ ( wp, 0 ) for wp in self.__workingProcessList ] - self.__workingProcessList = [] - while wpL: - wp, count = wpL.pop( 0 ) - if wp.pid == None: - if count > 5: - wp.terminate() - else: - wpL.append( ( wp, count + 1 ) ) - continue - os.kill( wp.pid, signal.SIGKILL ) + while self.__workersDict: + pid = self.__workersDict.keys().pop(0) + worker = self.__workersDict[pid] + if worker.is_alive(): + os.kill( worker.pid(), signal.SIGKILL ) + del self.__workersDict[pid] + def daemonize( self ): """ Make ProcessPool a finite being for opening and closing doors between chambers. diff --git a/Core/Utilities/ThreadSafe.py b/Core/Utilities/ThreadSafe.py index 849540ad345..b7588c67c3d 100755 --- a/Core/Utilities/ThreadSafe.py +++ b/Core/Utilities/ThreadSafe.py @@ -9,11 +9,10 @@ class Synchronizer: decorator making the call thread-safe""" def __init__( self, lockName = "", recursive = False ): + from DIRAC.Core.Utilities.LockRing import LockRing self.__lockName = lockName - if recursive: - self.__lock = threading.RLock() - else: - self.__lock = threading.Lock() + self.__lr = LockRing() + self.__lock = self.__lr.getLock( lockName, recursive = recursive ) def __call__( self, funcToCall ): def lockedFunc( *args, **kwargs ): @@ -27,10 +26,10 @@ def lockedFunc( *args, **kwargs ): print "UNLOCKING", self.__lockName self.__lock.release() return lockedFunc - + def lock(self): return self.__lock.acquire() - + def unlock(self): return self.__lock.release() @@ -40,7 +39,9 @@ class WORM: Write One - Read Many """ def __init__( self, maxReads = 10 ): - self.__lock = threading.Lock() + from DIRAC.Core.Utilities.LockRing import LockRing + self.__lr = LockRing() + self.__lock = self.__lr.getLock() self.__maxReads = maxReads self.__semaphore = threading.Semaphore( maxReads ) @@ -100,4 +101,4 @@ def __endReadZone( self ): End of danger zone. PRIVATE USE """ - self.__semaphore.release() \ No newline at end of file + self.__semaphore.release() diff --git a/Core/scripts/dirac-install-agent.py b/Core/scripts/dirac-install-agent.py index b710518d954..4398b6c03ae 100755 --- a/Core/scripts/dirac-install-agent.py +++ b/Core/scripts/dirac-install-agent.py @@ -23,7 +23,23 @@ def setOverwrite( opVal ): overwrite = True return S_OK() +module = '' +specialOptions = {} +def setModule( optVal ): + global specialOptions,module + specialOptions['Module'] = optVal + module = value + return S_OK() + +def setSpecialOption( optVal ): + global specialOptions + option,value = optVal.split('=') + specialOptions[option] = value + return S_OK() + Script.registerSwitch( "w", "overwrite", "Overwrite the configuration in the global CS", setOverwrite ) +Script.registerSwitch( "m:", "module=", "Python module name for the agent code", setModule ) +Script.registerSwitch( "p:", "parameter=", "Special agent option ", setSpecialOption ) Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], 'Usage:', ' %s [option|cfgfile] ... System Agent|System/Agent' % Script.scriptName, @@ -43,11 +59,13 @@ def setOverwrite( opVal ): agent = args[1] result = InstallTools.addDefaultOptionsToCS( gConfig, 'agent', system, agent, - getCSExtensions(), overwrite = overwrite ) + getCSExtensions(), + specialOptions=specialOptions, + overwrite = overwrite ) if not result['OK']: print "ERROR:", result['Message'] else: - result = InstallTools.installComponent( 'agent', system, agent, getCSExtensions() ) + result = InstallTools.installComponent( 'agent', system, agent, getCSExtensions(), module ) if not result['OK']: print "ERROR:", result['Message'] else: diff --git a/Core/scripts/dirac-install-executor.py b/Core/scripts/dirac-install-executor.py new file mode 100644 index 00000000000..ea199019b7c --- /dev/null +++ b/Core/scripts/dirac-install-executor.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +######################################################################## +# $HeadURL$ +# File : dirac-install-executor +# Author : Ricardo Graciani +######################################################################## +""" +Do the initial installation and configuration of a DIRAC service +""" +__RCSID__ = "$Id$" +# +from DIRAC.Core.Utilities import InstallTools +from DIRAC.ConfigurationSystem.Client.Helpers import getCSExtensions +# +from DIRAC import gConfig +InstallTools.exitOnError = True +# +from DIRAC.Core.Base import Script + +overwrite = False +def setOverwrite( opVal ): + global overwrite + overwrite = True + return S_OK() + +module = '' +specialOptions = {} +def setModule( optVal ): + global specialOptions,module + specialOptions['Module'] = optVal + module = value + return S_OK() + +def setSpecialOption( optVal ): + global specialOptions + option,value = optVal.split('=') + specialOptions[option] = value + return S_OK() + +Script.registerSwitch( "w", "overwrite", "Overwrite the configuration in the global CS", setOverwrite ) +Script.registerSwitch( "m:", "module=", "Python module name for the executor code", setModule ) +Script.registerSwitch( "p:", "parameter=", "Special executor option ", setSpecialOption ) +Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], + 'Usage:', + ' %s [option|cfgfile] ... System Service|System/Service' % Script.scriptName, + 'Arguments:', + ' System: Name of the DIRAC system (ie: WorkloadManagement)', + ' Service: Name of the DIRAC service (ie: Matcher)'] ) ) + +Script.parseCommandLine() +args = Script.getPositionalArgs() + +if len( args ) == 1: + args = args[0].split( '/' ) + +if len( args ) != 2: + Script.showHelp() + exit( -1 ) +# +system = args[0] +service = args[1] + +result = InstallTools.addDefaultOptionsToCS( gConfig, 'executor', system, service, + getCSExtensions(), + specialOptions=specialOptions, + overwrite = overwrite ) +if not result['OK']: + print "ERROR:", result['Message'] +else: + result = InstallTools.installComponent( 'service', system, service, getCSExtensions(), module ) + if not result['OK']: + print "ERROR:", result['Message'] + else: + print "Successfully installed executor %s in %s system" % ( service, system ) diff --git a/Core/scripts/dirac-install-service.py b/Core/scripts/dirac-install-service.py index 913fc97cf4c..cb265abd45b 100644 --- a/Core/scripts/dirac-install-service.py +++ b/Core/scripts/dirac-install-service.py @@ -23,7 +23,23 @@ def setOverwrite( opVal ): overwrite = True return S_OK() +module = '' +specialOptions = {} +def setModule( optVal ): + global specialOptions,module + specialOptions['Module'] = optVal + module = value + return S_OK() + +def setSpecialOption( optVal ): + global specialOptions + option,value = optVal.split('=') + specialOptions[option] = value + return S_OK() + Script.registerSwitch( "w", "overwrite", "Overwrite the configuration in the global CS", setOverwrite ) +Script.registerSwitch( "m:", "module=", "Python module name for the service code", setModule ) +Script.registerSwitch( "p:", "parameter=", "Special service option ", setSpecialOption ) Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], 'Usage:', ' %s [option|cfgfile] ... System Service|System/Service' % Script.scriptName, @@ -45,11 +61,13 @@ def setOverwrite( opVal ): service = args[1] result = InstallTools.addDefaultOptionsToCS( gConfig, 'service', system, service, - getCSExtensions(), overwrite = overwrite ) + getCSExtensions(), + specialOptions=specialOptions, + overwrite = overwrite ) if not result['OK']: print "ERROR:", result['Message'] else: - result = InstallTools.installComponent( 'service', system, service, getCSExtensions() ) + result = InstallTools.installComponent( 'service', system, service, getCSExtensions(), module ) if not result['OK']: print "ERROR:", result['Message'] else: diff --git a/DataManagementSystem/Agent/TransferAgent.py b/DataManagementSystem/Agent/TransferAgent.py index 46065c8f6f2..d4c84ee90b7 100755 --- a/DataManagementSystem/Agent/TransferAgent.py +++ b/DataManagementSystem/Agent/TransferAgent.py @@ -598,13 +598,6 @@ def executeFTS( self, requestDict ): ownerGroup["Value"] ) ) return S_OK( False ) - ## check request owner - #ownerDN = requestObj.getAttribute( "OwnerDN" ) - #if ownerDN["OK"] and ownerDN["Value"]: - # self.log.info("excuteFTS: request %s has its owner %s, can't use FTS" % ( requestDict["requestName"], - # ownerDN["Value"] ) ) - # return S_OK( False ) - ## check operation res = requestObj.getNumSubRequests( "transfer" ) if not res["OK"]: @@ -679,7 +672,7 @@ def schedule( self, requestDict ): requestDict = { "requestString" : str, "requestName" : str, "sourceServer" : str, - "executionOrder" : list, + "executionOrder" : int, "jobID" : int, "requestObj" : RequestContainer } @@ -695,14 +688,22 @@ def schedule( self, requestDict ): self.log.error( "schedule: Failed to get number of 'transfer' subrequests", res["Message"] ) return S_ERROR( "schedule: Failed to get number of 'transfer' subrequests" ) numberRequests = res["Value"] - self.log.info( "schedule: '%s' found with %s 'transfer' subrequest(s)" % ( requestName, - numberRequests ) ) + self.log.info( "schedule: request '%s'has got %s 'transfer' subrequest(s)" % ( requestName, + numberRequests ) ) for iSubRequest in range( numberRequests ): - self.log.info( "schedule: treating subrequest %s from '%s'." % ( iSubRequest, + self.log.info( "schedule: treating subrequest %s from '%s'" % ( iSubRequest, requestName ) ) subAttrs = requestObj.getSubRequestAttributes( iSubRequest, "transfer" )["Value"] + subRequestStatus = subAttrs["Status"] + #executionOrder = int(subAttrs["ExecutionOrder"]) if "ExecutionOrder" in subAttrs else 0 + #if executionOrder > requestDict["executionOrder"]: + # self.info.warn("schedule: skipping %s subrequest, executionOrder %s > request's executionOrder " % ( iSubRequest, + # executionOrder, + # requestDict["executionOrder"] ) ) + # continue + if subRequestStatus != "Waiting" : ## sub-request is already in terminal state self.log.info( "schedule: subrequest %s status is '%s', it won't be executed" % ( iSubRequest, @@ -792,8 +793,7 @@ def scheduleFiles( self, requestObj, index, subAttrs ): :param RequestContainer requestObj: request being processed :param dict subAttrs: subrequest's attributes """ - self.log.info( "scheduleFiles: *** FTS scheduling ***") - self.log.info( "scheduleFiles: processing subrequest %s" % index ) + self.log.info( "scheduleFiles: FTS scheduling, processing subrequest %s" % index ) ## get source SE sourceSE = subAttrs["SourceSE"] if subAttrs["SourceSE"] not in ( None, "None", "" ) else None ## get target SEs, no matter what's a type we need a list @@ -808,7 +808,7 @@ def scheduleFiles( self, requestObj, index, subAttrs ): if not subRequestFiles["OK"]: return subRequestFiles subRequestFiles = subRequestFiles["Value"] - self.log.info( "scheduleFiles: found %s files" % len( subRequestFiles ) ) + self.log.debug( "scheduleFiles: found %s files" % len( subRequestFiles ) ) ## collect not done LFNS notDoneLFNs = [] for subRequestFile in subRequestFiles: @@ -817,7 +817,7 @@ def scheduleFiles( self, requestObj, index, subAttrs ): notDoneLFNs.append( subRequestFile["LFN"] ) ## get subrequest files - self.log.info( "scheduleFiles: obtaining 'Waiting' files for %d subrequest" % index ) + self.log.debug( "scheduleFiles: obtaining 'Waiting' files for %d subrequest" % index ) files = self.collectFiles( requestObj, index, status = "Waiting" ) if not files["OK"]: self.log.debug("scheduleFiles: failed to get 'Waiting' files from subrequest", files["Message"] ) @@ -826,7 +826,7 @@ def scheduleFiles( self, requestObj, index, subAttrs ): waitingFiles, replicas, metadata = files["Value"] if not waitingFiles: - self.log.info("scheduleFiles: not 'Waiting' files found in this subrequest" ) + self.log.debug("scheduleFiles: not 'Waiting' files found in this subrequest" ) return S_OK( requestObj ) if not replicas or not metadata: @@ -1002,8 +1002,7 @@ def registerFiles( self, requestObj, index ): :param RequestContainer requestObj: request being processed :param dict subAttrs: subrequest's attributes """ - self.log.info( "registerFiles: *** failover registration *** ") - self.log.info( "registerFiles: obtaining all files in %d subrequest" % index ) + self.log.info( "registerFiles: failover registration, processing %s subrequest" % index ) subRequestFiles = requestObj.getSubRequestFiles( index, "transfer" ) if not subRequestFiles["OK"]: return subRequestFiles @@ -1169,7 +1168,7 @@ def determineReplicationTree( self, sourceSE, targetSEs, replicas, size, strateg self.sigma = float(strategy.split("_")[1]) self.log.debug("determineReplicationTree: new sigma %s" % self.sigma ) except ValueError: - self.log.warn("determineRepliactionTree: can't set new sigma value from '%s'" % strategy ) + self.log.warn("determineReplicationTree: can't set new sigma value from '%s'" % strategy ) if reStrategy.pattern in [ "MinimiseTotalWait", "DynamicThroughput" ]: replicasToUse = replicas.keys() if sourceSE == None else [ sourceSE ] tree = self.strategyDispatcher[ reStrategy ].__call__( replicasToUse, targetSEs ) diff --git a/DataManagementSystem/private/RequestAgentBase.py b/DataManagementSystem/private/RequestAgentBase.py index 5dfeb2ab1f5..243ac237637 100644 --- a/DataManagementSystem/private/RequestAgentBase.py +++ b/DataManagementSystem/private/RequestAgentBase.py @@ -135,6 +135,7 @@ def resetRequests( self ): :param self: self reference """ + self.info("resetRequest: will put %s back requests" % len(self.__requestHolder) ) for requestName, requestTuple in self.__requestHolder.items(): requestString, requestServer = requestTuple reset = self.requestClient().updateRequest( requestName, requestString, requestServer ) @@ -321,6 +322,8 @@ def execute( self ): requestDict = requestDict["Value"] requestDict["configPath"] = self.__configPath taskID = requestDict["requestName"] + self.log.info( "processPool tasks idle = %s working = %s" % ( self.processPool().getNumIdleProcesses(), + self.processPool().getNumWorkingProcesses() ) ) while True: if not self.processPool().getFreeSlots(): self.log.info("No free slots available in processPool, will wait a second to proceed...") @@ -342,17 +345,17 @@ def execute( self ): ## task created, a little time kick to proceed time.sleep( 0.1 ) break + return S_OK() def finalize( self ): - """ clean ending of one cycle execution + """ clean ending of maxcycle execution :param self: self reference """ ## finalize all processing if self.hasProcessPool(): - self.processPool().processAllResults() self.processPool().finalize() ## reset failover requests for further processing self.resetRequests() diff --git a/FrameworkSystem/Client/SystemAdministratorClientCLI.py b/FrameworkSystem/Client/SystemAdministratorClientCLI.py index 87e0c770014..a0e21cd5006 100644 --- a/FrameworkSystem/Client/SystemAdministratorClientCLI.py +++ b/FrameworkSystem/Client/SystemAdministratorClientCLI.py @@ -195,27 +195,29 @@ def do_show( self, args ): if not result['OK']: self.__errMsg( result['Message'] ) else: + fields = ["System",'Name','Module','Type','Setup','Installed','Runit','Uptime','PID'] + records = [] rDict = result['Value'] - print - print " System", ' ' * 20, 'Name', ' ' * 15, 'Type', ' ' * 13, 'Setup Installed Runit Uptime PID' - print '-' * 116 for compType in rDict: for system in rDict[compType]: for component in rDict[compType][system]: + record = [] if rDict[compType][system][component]['Installed']: - print system.ljust( 28 ), component.ljust( 28 ), compType.lower()[:-1].ljust( 7 ), + module = str( rDict[compType][system][component]['Module'] ) + record += [ system,component,module,compType.lower()[:-1]] if rDict[compType][system][component]['Setup']: - print 'SetUp'.rjust( 12 ), + record += ['Setup'] else: - print 'NotSetup'.rjust( 12 ), + record += ['NotSetup'] if rDict[compType][system][component]['Installed']: - print 'Installed'.rjust( 12 ), + record += ['Installed'] else: - print 'NotInstalled'.rjust( 12 ), - print str( rDict[compType][system][component]['RunitStatus'] ).ljust( 7 ), - print str( rDict[compType][system][component]['Timeup'] ).rjust( 7 ), - print str( rDict[compType][system][component]['PID'] ).rjust( 8 ), - print + record += ['NotInstalled'] + record += [str( rDict[compType][system][component]['RunitStatus'] )] + record += [str( rDict[compType][system][component]['Timeup'] )] + record += [str( rDict[compType][system][component]['PID'] )] + records.append(record) + printTable(fields,records) elif option == 'database' or option == 'databases': client = SystemAdministratorClient( self.host, self.port ) if not InstallTools.mysqlPassword: @@ -407,13 +409,26 @@ def do_install( self, args ): self.__errMsg( result['Message'] ) return print "Database %s from %s/%s installed successfully" % ( database, extension, system ) - elif option == "service" or option == "agent": + elif option in ["service","agent","executor"] : if len( argss ) < 2: print self.do_install.__doc__ return system = argss[0] - component = argss[1] + del argss[0] + component = argss[0] + del argss[0] + + specialOptions = {} + module = '' + for i in range(len(argss)): + if argss[i] == "-m": + specialOptions['Module'] = argss[i+1] + module = argss[i+1] + if argss[i] == "-p": + opt,value = argss[i+1].split('=') + specialOptions[opt] = value + client = SystemAdministratorClient( self.host, self.port ) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) @@ -423,12 +438,13 @@ def do_install( self, args ): self.__errMsg( result['Message'] ) return hostSetup = result['Value']['Setup'] - result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup ) + result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, + getCSExtensions(), hostSetup, specialOptions ) if not result['OK']: self.__errMsg( result['Message'] ) return # Then we can install and start the component - result = client.setupComponent( option, system, component ) + result = client.setupComponent( option, system, component, module ) if not result['OK']: self.__errMsg( result['Message'] ) return diff --git a/FrameworkSystem/Service/SystemAdministratorHandler.py b/FrameworkSystem/Service/SystemAdministratorHandler.py index 15e97e9bc1e..4c723e7565b 100644 --- a/FrameworkSystem/Service/SystemAdministratorHandler.py +++ b/FrameworkSystem/Service/SystemAdministratorHandler.py @@ -61,7 +61,19 @@ def export_getOverallStatus( self ): """ Get the complete status information for the components in the given list """ - return InstallTools.getOverallStatus( getCSExtensions() ) + result = InstallTools.getOverallStatus( getCSExtensions() ) + if not result['OK']: + return result + statusDict = result['Value'] + for compType in statusDict: + for system in statusDict[compType]: + for component in statusDict[compType][system]: + result = InstallTools.getComponentModule( gConfig,system,component,compType ) + if not result['OK']: + statusDict[compType][system][component]['Module'] = "Unknown" + else: + statusDict[compType][system][component]['Module'] = result['Value'] + return S_OK(statusDict) types_getStartupComponentStatus = [ ListType ] def export_getStartupComponentStatus( self, componentTupleList ): @@ -71,17 +83,17 @@ def export_getStartupComponentStatus( self, componentTupleList ): return InstallTools.getStartupComponentStatus( componentTupleList ) types_installComponent = [ StringTypes, StringTypes, StringTypes ] - def export_installComponent( self, componentType, system, component ): + def export_installComponent( self, componentType, system, component, componentModule='' ): """ Install runit directory for the specified component """ - return InstallTools.installComponent( componentType, system, component, getCSExtensions() ) + return InstallTools.installComponent( componentType, system, component, getCSExtensions(), componentModule ) types_setupComponent = [ StringTypes, StringTypes, StringTypes ] - def export_setupComponent( self, componentType, system, component ): + def export_setupComponent( self, componentType, system, component, componentModule='' ): """ Setup the specified component for running with the runsvdir daemon It implies installComponent """ - return InstallTools.setupComponent( componentType, system, component, getCSExtensions() ) + return InstallTools.setupComponent( componentType, system, component, getCSExtensions(), componentModule ) types_addDefaultOptionsToComponentCfg = [ StringTypes, StringTypes ] def export_addDefaultOptionsToComponentCfg( self, componentType, system, component ): diff --git a/Resources/Catalog/FileCatalogFactory.py b/Resources/Catalog/FileCatalogFactory.py index 55d07ee2c49..f1c74c7d4ba 100644 --- a/Resources/Catalog/FileCatalogFactory.py +++ b/Resources/Catalog/FileCatalogFactory.py @@ -64,7 +64,7 @@ def createCatalog( self, catalogName ): errStr = "Failed to instantiate catalog plug in" gLogger.error( errStr, moduleName ) return S_ERROR( errStr ) - self.log.info('Loaded module %sClient from %s' % ( catalogType, moduleRootPath ) ) + self.log.debug('Loaded module %sClient from %s' % ( catalogType, moduleRootPath ) ) return S_OK( catalog ) except Exception, x: errStr = "Failed to instantiate %s()" % ( moduleName ) @@ -73,4 +73,4 @@ def createCatalog( self, catalogName ): # Catalog module was not loaded return S_ERROR('No suitable client found for %s' % catalogName) - \ No newline at end of file + diff --git a/Resources/Computing/ComputingElementFactory.py b/Resources/Computing/ComputingElementFactory.py index ac4cd166f07..e2b06968b0b 100755 --- a/Resources/Computing/ComputingElementFactory.py +++ b/Resources/Computing/ComputingElementFactory.py @@ -28,6 +28,7 @@ def getCE(self, ceType='', ceName='', ceParametersDict={}): """This method returns the CE instance corresponding to the supplied CEUniqueID. If no corresponding CE is available, this is indicated. """ + self.log.verbose('Creating CE of %s type with the name %s' % (ceType,ceName) ) ceTypeLocal = ceType if not ceTypeLocal: ceTypeLocal = self.ceType @@ -35,7 +36,7 @@ def getCE(self, ceType='', ceName='', ceParametersDict={}): if not ceNameLocal: ceNameLocal = self.ceType ceConfigDict = getCEConfigDict( ceNameLocal ) - self.log.info('CEConfigDict',ceConfigDict) + self.log.verbose('CEConfigDict',ceConfigDict) if 'CEType' in ceConfigDict: ceTypeLocal = ceConfigDict['CEType'] if not ceTypeLocal: diff --git a/Resources/Computing/SSHComputingElement.py b/Resources/Computing/SSHComputingElement.py index ec92f9c9cb5..078fb066785 100644 --- a/Resources/Computing/SSHComputingElement.py +++ b/Resources/Computing/SSHComputingElement.py @@ -47,16 +47,15 @@ def __ssh_call( self, command, timeout ): ssh_newkey = 'Are you sure you want to continue connecting' child = pexpect.spawn( command, timeout = timeout ) - i = child.expect( [pexpect.TIMEOUT, ssh_newkey, pexpect.EOF, 'password: '] ) + i = child.expect( [pexpect.TIMEOUT, ssh_newkey, pexpect.EOF, 'password: ', 'Password: '] ) if i == 0: # Timeout return S_OK( ( -1, child.before, 'SSH login failed' ) ) elif i == 1: # SSH does not have the public key. Just accept it. child.sendline ( 'yes' ) - child.expect ( 'password: ' ) - i = child.expect( [pexpect.TIMEOUT, 'password: '] ) + i = child.expect( [pexpect.TIMEOUT, 'password: ', 'Password: '] ) if i == 0: # Timeout return S_OK( ( -1, child.before + child.after, 'SSH login failed' ) ) - elif i == 1: + elif i in [1,2]: child.sendline( password ) child.expect( pexpect.EOF ) return S_OK( ( 0, child.before, '' ) ) @@ -220,7 +219,7 @@ def submitJob( self, executableFile, proxy, numberOfJobs = 1 ): result = ssh.scpCall( 10, submitFile, '%s/%s' % ( self.executableArea, os.path.basename( submitFile ) ) ) # submit submitFile to the batch system executablePath = '%s/%s' % ( self.executableArea, os.path.basename( submitFile ) ) - cmd = "chdir %(execArea)s; chmod +x %(executable)s; %(executable)s 1>&2 > %(executable)s.out &" % \ + cmd = "chmod +x %(executable)s; %(executable)s 1>&2 > %(executable)s.out &" % \ {'numberOfJobs': numberOfJobs, 'executable': executablePath, 'execArea': self.executableArea} self.log.verbose( 'CE submission command: %s' % ( cmd ) ) @@ -232,7 +231,7 @@ def submitJob( self, executableFile, proxy, numberOfJobs = 1 ): self.log.debug( result ) return S_ERROR( result['Value'] ) else: - self.log.debug( 'Torque CE result OK' ) + self.log.debug( 'SSH CE result OK' ) batchIDList = result['Value'][1].strip().replace( '\r', '' ).split( '\n' ) diff --git a/Resources/Computing/pexpect.py b/Resources/Computing/pexpect.py new file mode 100644 index 00000000000..67c6389faa1 --- /dev/null +++ b/Resources/Computing/pexpect.py @@ -0,0 +1,1845 @@ +"""Pexpect is a Python module for spawning child applications and controlling +them automatically. Pexpect can be used for automating interactive applications +such as ssh, ftp, passwd, telnet, etc. It can be used to a automate setup +scripts for duplicating software package installations on different servers. It +can be used for automated software testing. Pexpect is in the spirit of Don +Libes' Expect, but Pexpect is pure Python. Other Expect-like modules for Python +require TCL and Expect or require C extensions to be compiled. Pexpect does not +use C, Expect, or TCL extensions. It should work on any platform that supports +the standard Python pty module. The Pexpect interface focuses on ease of use so +that simple tasks are easy. + +There are two main interfaces to Pexpect -- the function, run() and the class, +spawn. You can call the run() function to execute a command and return the +output. This is a handy replacement for os.system(). + +For example:: + + pexpect.run('ls -la') + +The more powerful interface is the spawn class. You can use this to spawn an +external child command and then interact with the child by sending lines and +expecting responses. + +For example:: + + child = pexpect.spawn('scp foo myname@host.example.com:.') + child.expect ('Password:') + child.sendline (mypassword) + +This works even for commands that ask for passwords or other input outside of +the normal stdio streams. + +Credits: Noah Spurrier, Richard Holden, Marco Molteni, Kimberley Burchett, +Robert Stone, Hartmut Goebel, Chad Schroeder, Erick Tryzelaar, Dave Kirby, Ids +vander Molen, George Todd, Noel Taylor, Nicolas D. Cesar, Alexander Gattin, +Geoffrey Marshall, Francisco Lourenco, Glen Mabey, Karthik Gurusamy, Fernando +Perez, Corey Minyard, Jon Cohen, Guillaume Chazarain, Andrew Ryan, Nick +Craig-Wood, Andrew Stone, Jorgen Grahn (Let me know if I forgot anyone.) + +Free, open source, and all that good stuff. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Pexpect Copyright (c) 2008 Noah Spurrier +http://pexpect.sourceforge.net/ + +$Id: pexpect.py 507 2007-12-27 02:40:52Z noah $ +""" + +try: + import os, sys, time + import select + import string + import re + import struct + import resource + import types + import pty + import tty + import termios + import fcntl + import errno + import traceback + import signal +except ImportError, e: + raise ImportError (str(e) + """ + +A critical module was not found. Probably this operating system does not +support it. Pexpect is intended for UNIX-like operating systems.""") + +__version__ = '2.3' +__revision__ = '$Revision: 399 $' +__all__ = ['ExceptionPexpect', 'EOF', 'TIMEOUT', 'spawn', 'run', 'which', + 'split_command_line', '__version__', '__revision__'] + +# Exception classes used by this module. +class ExceptionPexpect(Exception): + + """Base class for all exceptions raised by this module. + """ + + def __init__(self, value): + + self.value = value + + def __str__(self): + + return str(self.value) + + def get_trace(self): + + """This returns an abbreviated stack trace with lines that only concern + the caller. In other words, the stack trace inside the Pexpect module + is not included. """ + + tblist = traceback.extract_tb(sys.exc_info()[2]) + #tblist = filter(self.__filter_not_pexpect, tblist) + tblist = [item for item in tblist if self.__filter_not_pexpect(item)] + tblist = traceback.format_list(tblist) + return ''.join(tblist) + + def __filter_not_pexpect(self, trace_list_item): + + """This returns True if list item 0 the string 'pexpect.py' in it. """ + + if trace_list_item[0].find('pexpect.py') == -1: + return True + else: + return False + +class EOF(ExceptionPexpect): + + """Raised when EOF is read from a child. This usually means the child has exited.""" + +class TIMEOUT(ExceptionPexpect): + + """Raised when a read time exceeds the timeout. """ + +##class TIMEOUT_PATTERN(TIMEOUT): +## """Raised when the pattern match time exceeds the timeout. +## This is different than a read TIMEOUT because the child process may +## give output, thus never give a TIMEOUT, but the output +## may never match a pattern. +## """ +##class MAXBUFFER(ExceptionPexpect): +## """Raised when a scan buffer fills before matching an expected pattern.""" + +def run (command, timeout=-1, withexitstatus=False, events=None, extra_args=None, logfile=None, cwd=None, env=None): + + """ + This function runs the given command; waits for it to finish; then + returns all output as a string. STDERR is included in output. If the full + path to the command is not given then the path is searched. + + Note that lines are terminated by CR/LF (\\r\\n) combination even on + UNIX-like systems because this is the standard for pseudo ttys. If you set + 'withexitstatus' to true, then run will return a tuple of (command_output, + exitstatus). If 'withexitstatus' is false then this returns just + command_output. + + The run() function can often be used instead of creating a spawn instance. + For example, the following code uses spawn:: + + from pexpect import * + child = spawn('scp foo myname@host.example.com:.') + child.expect ('(?i)password') + child.sendline (mypassword) + + The previous code can be replace with the following:: + + from pexpect import * + run ('scp foo myname@host.example.com:.', events={'(?i)password': mypassword}) + + Examples + ======== + + Start the apache daemon on the local machine:: + + from pexpect import * + run ("/usr/local/apache/bin/apachectl start") + + Check in a file using SVN:: + + from pexpect import * + run ("svn ci -m 'automatic commit' my_file.py") + + Run a command and capture exit status:: + + from pexpect import * + (command_output, exitstatus) = run ('ls -l /bin', withexitstatus=1) + + Tricky Examples + =============== + + The following will run SSH and execute 'ls -l' on the remote machine. The + password 'secret' will be sent if the '(?i)password' pattern is ever seen:: + + run ("ssh username@machine.example.com 'ls -l'", events={'(?i)password':'secret\\n'}) + + This will start mencoder to rip a video from DVD. This will also display + progress ticks every 5 seconds as it runs. For example:: + + from pexpect import * + def print_ticks(d): + print d['event_count'], + run ("mencoder dvd://1 -o video.avi -oac copy -ovc copy", events={TIMEOUT:print_ticks}, timeout=5) + + The 'events' argument should be a dictionary of patterns and responses. + Whenever one of the patterns is seen in the command out run() will send the + associated response string. Note that you should put newlines in your + string if Enter is necessary. The responses may also contain callback + functions. Any callback is function that takes a dictionary as an argument. + The dictionary contains all the locals from the run() function, so you can + access the child spawn object or any other variable defined in run() + (event_count, child, and extra_args are the most useful). A callback may + return True to stop the current run process otherwise run() continues until + the next event. A callback may also return a string which will be sent to + the child. 'extra_args' is not used by directly run(). It provides a way to + pass data to a callback function through run() through the locals + dictionary passed to a callback. """ + + if timeout == -1: + child = spawn(command, maxread=2000, logfile=logfile, cwd=cwd, env=env) + else: + child = spawn(command, timeout=timeout, maxread=2000, logfile=logfile, cwd=cwd, env=env) + if events is not None: + patterns = events.keys() + responses = events.values() + else: + patterns=None # We assume that EOF or TIMEOUT will save us. + responses=None + child_result_list = [] + event_count = 0 + while 1: + try: + index = child.expect (patterns) + if type(child.after) in types.StringTypes: + child_result_list.append(child.before + child.after) + else: # child.after may have been a TIMEOUT or EOF, so don't cat those. + child_result_list.append(child.before) + if type(responses[index]) in types.StringTypes: + child.send(responses[index]) + elif type(responses[index]) is types.FunctionType: + callback_result = responses[index](locals()) + sys.stdout.flush() + if type(callback_result) in types.StringTypes: + child.send(callback_result) + elif callback_result: + break + else: + raise TypeError ('The callback must be a string or function type.') + event_count = event_count + 1 + except TIMEOUT, e: + child_result_list.append(child.before) + break + except EOF, e: + child_result_list.append(child.before) + break + child_result = ''.join(child_result_list) + if withexitstatus: + child.close() + return (child_result, child.exitstatus) + else: + return child_result + +class spawn (object): + + """This is the main class interface for Pexpect. Use this class to start + and control child applications. """ + + def __init__(self, command, args=[], timeout=30, maxread=2000, searchwindowsize=None, logfile=None, cwd=None, env=None): + + """This is the constructor. The command parameter may be a string that + includes a command and any arguments to the command. For example:: + + child = pexpect.spawn ('/usr/bin/ftp') + child = pexpect.spawn ('/usr/bin/ssh user@example.com') + child = pexpect.spawn ('ls -latr /tmp') + + You may also construct it with a list of arguments like so:: + + child = pexpect.spawn ('/usr/bin/ftp', []) + child = pexpect.spawn ('/usr/bin/ssh', ['user@example.com']) + child = pexpect.spawn ('ls', ['-latr', '/tmp']) + + After this the child application will be created and will be ready to + talk to. For normal use, see expect() and send() and sendline(). + + Remember that Pexpect does NOT interpret shell meta characters such as + redirect, pipe, or wild cards (>, |, or *). This is a common mistake. + If you want to run a command and pipe it through another command then + you must also start a shell. For example:: + + child = pexpect.spawn('/bin/bash -c "ls -l | grep LOG > log_list.txt"') + child.expect(pexpect.EOF) + + The second form of spawn (where you pass a list of arguments) is useful + in situations where you wish to spawn a command and pass it its own + argument list. This can make syntax more clear. For example, the + following is equivalent to the previous example:: + + shell_cmd = 'ls -l | grep LOG > log_list.txt' + child = pexpect.spawn('/bin/bash', ['-c', shell_cmd]) + child.expect(pexpect.EOF) + + The maxread attribute sets the read buffer size. This is maximum number + of bytes that Pexpect will try to read from a TTY at one time. Setting + the maxread size to 1 will turn off buffering. Setting the maxread + value higher may help performance in cases where large amounts of + output are read back from the child. This feature is useful in + conjunction with searchwindowsize. + + The searchwindowsize attribute sets the how far back in the incomming + seach buffer Pexpect will search for pattern matches. Every time + Pexpect reads some data from the child it will append the data to the + incomming buffer. The default is to search from the beginning of the + imcomming buffer each time new data is read from the child. But this is + very inefficient if you are running a command that generates a large + amount of data where you want to match The searchwindowsize does not + effect the size of the incomming data buffer. You will still have + access to the full buffer after expect() returns. + + The logfile member turns on or off logging. All input and output will + be copied to the given file object. Set logfile to None to stop + logging. This is the default. Set logfile to sys.stdout to echo + everything to standard output. The logfile is flushed after each write. + + Example log input and output to a file:: + + child = pexpect.spawn('some_command') + fout = file('mylog.txt','w') + child.logfile = fout + + Example log to stdout:: + + child = pexpect.spawn('some_command') + child.logfile = sys.stdout + + The logfile_read and logfile_send members can be used to separately log + the input from the child and output sent to the child. Sometimes you + don't want to see everything you write to the child. You only want to + log what the child sends back. For example:: + + child = pexpect.spawn('some_command') + child.logfile_read = sys.stdout + + To separately log output sent to the child use logfile_send:: + + self.logfile_send = fout + + The delaybeforesend helps overcome a weird behavior that many users + were experiencing. The typical problem was that a user would expect() a + "Password:" prompt and then immediately call sendline() to send the + password. The user would then see that their password was echoed back + to them. Passwords don't normally echo. The problem is caused by the + fact that most applications print out the "Password" prompt and then + turn off stdin echo, but if you send your password before the + application turned off echo, then you get your password echoed. + Normally this wouldn't be a problem when interacting with a human at a + real keyboard. If you introduce a slight delay just before writing then + this seems to clear up the problem. This was such a common problem for + many users that I decided that the default pexpect behavior should be + to sleep just before writing to the child application. 1/20th of a + second (50 ms) seems to be enough to clear up the problem. You can set + delaybeforesend to 0 to return to the old behavior. Most Linux machines + don't like this to be below 0.03. I don't know why. + + Note that spawn is clever about finding commands on your path. + It uses the same logic that "which" uses to find executables. + + If you wish to get the exit status of the child you must call the + close() method. The exit or signal status of the child will be stored + in self.exitstatus or self.signalstatus. If the child exited normally + then exitstatus will store the exit return code and signalstatus will + be None. If the child was terminated abnormally with a signal then + signalstatus will store the signal value and exitstatus will be None. + If you need more detail you can also read the self.status member which + stores the status returned by os.waitpid. You can interpret this using + os.WIFEXITED/os.WEXITSTATUS or os.WIFSIGNALED/os.TERMSIG. """ + + self.STDIN_FILENO = pty.STDIN_FILENO + self.STDOUT_FILENO = pty.STDOUT_FILENO + self.STDERR_FILENO = pty.STDERR_FILENO + self.stdin = sys.stdin + self.stdout = sys.stdout + self.stderr = sys.stderr + + self.searcher = None + self.ignorecase = False + self.before = None + self.after = None + self.match = None + self.match_index = None + self.terminated = True + self.exitstatus = None + self.signalstatus = None + self.status = None # status returned by os.waitpid + self.flag_eof = False + self.pid = None + self.child_fd = -1 # initially closed + self.timeout = timeout + self.delimiter = EOF + self.logfile = logfile + self.logfile_read = None # input from child (read_nonblocking) + self.logfile_send = None # output to send (send, sendline) + self.maxread = maxread # max bytes to read at one time into buffer + self.buffer = '' # This is the read buffer. See maxread. + self.searchwindowsize = searchwindowsize # Anything before searchwindowsize point is preserved, but not searched. + # Most Linux machines don't like delaybeforesend to be below 0.03 (30 ms). + self.delaybeforesend = 0.05 # Sets sleep time used just before sending data to child. Time in seconds. + self.delayafterclose = 0.1 # Sets delay in close() method to allow kernel time to update process status. Time in seconds. + self.delayafterterminate = 0.1 # Sets delay in terminate() method to allow kernel time to update process status. Time in seconds. + self.softspace = False # File-like object. + self.name = '<' + repr(self) + '>' # File-like object. + self.encoding = None # File-like object. + self.closed = True # File-like object. + self.cwd = cwd + self.env = env + self.__irix_hack = (sys.platform.lower().find('irix')>=0) # This flags if we are running on irix + # Solaris uses internal __fork_pty(). All others use pty.fork(). + if (sys.platform.lower().find('solaris')>=0) or (sys.platform.lower().find('sunos5')>=0): + self.use_native_pty_fork = False + else: + self.use_native_pty_fork = True + + + # allow dummy instances for subclasses that may not use command or args. + if command is None: + self.command = None + self.args = None + self.name = '' + else: + self._spawn (command, args) + + def __del__(self): + + """This makes sure that no system resources are left open. Python only + garbage collects Python objects. OS file descriptors are not Python + objects, so they must be handled explicitly. If the child file + descriptor was opened outside of this class (passed to the constructor) + then this does not close it. """ + + if not self.closed: + # It is possible for __del__ methods to execute during the + # teardown of the Python VM itself. Thus self.close() may + # trigger an exception because os.close may be None. + # -- Fernando Perez + try: + self.close() + except AttributeError: + pass + + def __str__(self): + + """This returns a human-readable string that represents the state of + the object. """ + + s = [] + s.append(repr(self)) + s.append('version: ' + __version__ + ' (' + __revision__ + ')') + s.append('command: ' + str(self.command)) + s.append('args: ' + str(self.args)) + s.append('searcher: ' + str(self.searcher)) + s.append('buffer (last 100 chars): ' + str(self.buffer)[-100:]) + s.append('before (last 100 chars): ' + str(self.before)[-100:]) + s.append('after: ' + str(self.after)) + s.append('match: ' + str(self.match)) + s.append('match_index: ' + str(self.match_index)) + s.append('exitstatus: ' + str(self.exitstatus)) + s.append('flag_eof: ' + str(self.flag_eof)) + s.append('pid: ' + str(self.pid)) + s.append('child_fd: ' + str(self.child_fd)) + s.append('closed: ' + str(self.closed)) + s.append('timeout: ' + str(self.timeout)) + s.append('delimiter: ' + str(self.delimiter)) + s.append('logfile: ' + str(self.logfile)) + s.append('logfile_read: ' + str(self.logfile_read)) + s.append('logfile_send: ' + str(self.logfile_send)) + s.append('maxread: ' + str(self.maxread)) + s.append('ignorecase: ' + str(self.ignorecase)) + s.append('searchwindowsize: ' + str(self.searchwindowsize)) + s.append('delaybeforesend: ' + str(self.delaybeforesend)) + s.append('delayafterclose: ' + str(self.delayafterclose)) + s.append('delayafterterminate: ' + str(self.delayafterterminate)) + return '\n'.join(s) + + def _spawn(self,command,args=[]): + + """This starts the given command in a child process. This does all the + fork/exec type of stuff for a pty. This is called by __init__. If args + is empty then command will be parsed (split on spaces) and args will be + set to parsed arguments. """ + + # The pid and child_fd of this object get set by this method. + # Note that it is difficult for this method to fail. + # You cannot detect if the child process cannot start. + # So the only way you can tell if the child process started + # or not is to try to read from the file descriptor. If you get + # EOF immediately then it means that the child is already dead. + # That may not necessarily be bad because you may haved spawned a child + # that performs some task; creates no stdout output; and then dies. + + # If command is an int type then it may represent a file descriptor. + if type(command) == type(0): + raise ExceptionPexpect ('Command is an int type. If this is a file descriptor then maybe you want to use fdpexpect.fdspawn which takes an existing file descriptor instead of a command string.') + + if type (args) != type([]): + raise TypeError ('The argument, args, must be a list.') + + if args == []: + self.args = split_command_line(command) + self.command = self.args[0] + else: + self.args = args[:] # work with a copy + self.args.insert (0, command) + self.command = command + + command_with_path = which(self.command) + if command_with_path is None: + raise ExceptionPexpect ('The command was not found or was not executable: %s.' % self.command) + self.command = command_with_path + self.args[0] = self.command + + self.name = '<' + ' '.join (self.args) + '>' + + assert self.pid is None, 'The pid member should be None.' + assert self.command is not None, 'The command member should not be None.' + + if self.use_native_pty_fork: + try: + self.pid, self.child_fd = pty.fork() + except OSError, e: + raise ExceptionPexpect('Error! pty.fork() failed: ' + str(e)) + else: # Use internal __fork_pty + self.pid, self.child_fd = self.__fork_pty() + + if self.pid == 0: # Child + try: + self.child_fd = sys.stdout.fileno() # used by setwinsize() + self.setwinsize(24, 80) + except: + # Some platforms do not like setwinsize (Cygwin). + # This will cause problem when running applications that + # are very picky about window size. + # This is a serious limitation, but not a show stopper. + pass + # Do not allow child to inherit open file descriptors from parent. + max_fd = resource.getrlimit(resource.RLIMIT_NOFILE)[0] + for i in range (3, max_fd): + try: + os.close (i) + except OSError: + pass + + # I don't know why this works, but ignoring SIGHUP fixes a + # problem when trying to start a Java daemon with sudo + # (specifically, Tomcat). + signal.signal(signal.SIGHUP, signal.SIG_IGN) + + if self.cwd is not None: + os.chdir(self.cwd) + if self.env is None: + os.execv(self.command, self.args) + else: + os.execvpe(self.command, self.args, self.env) + + # Parent + self.terminated = False + self.closed = False + + def __fork_pty(self): + + """This implements a substitute for the forkpty system call. This + should be more portable than the pty.fork() function. Specifically, + this should work on Solaris. + + Modified 10.06.05 by Geoff Marshall: Implemented __fork_pty() method to + resolve the issue with Python's pty.fork() not supporting Solaris, + particularly ssh. Based on patch to posixmodule.c authored by Noah + Spurrier:: + + http://mail.python.org/pipermail/python-dev/2003-May/035281.html + + """ + + parent_fd, child_fd = os.openpty() + if parent_fd < 0 or child_fd < 0: + raise ExceptionPexpect, "Error! Could not open pty with os.openpty()." + + pid = os.fork() + if pid < 0: + raise ExceptionPexpect, "Error! Failed os.fork()." + elif pid == 0: + # Child. + os.close(parent_fd) + self.__pty_make_controlling_tty(child_fd) + + os.dup2(child_fd, 0) + os.dup2(child_fd, 1) + os.dup2(child_fd, 2) + + if child_fd > 2: + os.close(child_fd) + else: + # Parent. + os.close(child_fd) + + return pid, parent_fd + + def __pty_make_controlling_tty(self, tty_fd): + + """This makes the pseudo-terminal the controlling tty. This should be + more portable than the pty.fork() function. Specifically, this should + work on Solaris. """ + + child_name = os.ttyname(tty_fd) + + # Disconnect from controlling tty if still connected. + fd = os.open("/dev/tty", os.O_RDWR | os.O_NOCTTY); + if fd >= 0: + os.close(fd) + + os.setsid() + + # Verify we are disconnected from controlling tty + try: + fd = os.open("/dev/tty", os.O_RDWR | os.O_NOCTTY); + if fd >= 0: + os.close(fd) + raise ExceptionPexpect, "Error! We are not disconnected from a controlling tty." + except: + # Good! We are disconnected from a controlling tty. + pass + + # Verify we can open child pty. + fd = os.open(child_name, os.O_RDWR); + if fd < 0: + raise ExceptionPexpect, "Error! Could not open child pty, " + child_name + else: + os.close(fd) + + # Verify we now have a controlling tty. + fd = os.open("/dev/tty", os.O_WRONLY) + if fd < 0: + raise ExceptionPexpect, "Error! Could not open controlling tty, /dev/tty" + else: + os.close(fd) + + def fileno (self): # File-like object. + + """This returns the file descriptor of the pty for the child. + """ + + return self.child_fd + + def close (self, force=True): # File-like object. + + """This closes the connection with the child application. Note that + calling close() more than once is valid. This emulates standard Python + behavior with files. Set force to True if you want to make sure that + the child is terminated (SIGKILL is sent if the child ignores SIGHUP + and SIGINT). """ + + if not self.closed: + self.flush() + os.close (self.child_fd) + time.sleep(self.delayafterclose) # Give kernel time to update process status. + if self.isalive(): + if not self.terminate(force): + raise ExceptionPexpect ('close() could not terminate the child using terminate()') + self.child_fd = -1 + self.closed = True + #self.pid = None + + def flush (self): # File-like object. + + """This does nothing. It is here to support the interface for a + File-like object. """ + + pass + + def isatty (self): # File-like object. + + """This returns True if the file descriptor is open and connected to a + tty(-like) device, else False. """ + + return os.isatty(self.child_fd) + + def waitnoecho (self, timeout=-1): + + """This waits until the terminal ECHO flag is set False. This returns + True if the echo mode is off. This returns False if the ECHO flag was + not set False before the timeout. This can be used to detect when the + child is waiting for a password. Usually a child application will turn + off echo mode when it is waiting for the user to enter a password. For + example, instead of expecting the "password:" prompt you can wait for + the child to set ECHO off:: + + p = pexpect.spawn ('ssh user@example.com') + p.waitnoecho() + p.sendline(mypassword) + + If timeout is None then this method to block forever until ECHO flag is + False. + + """ + + if timeout == -1: + timeout = self.timeout + if timeout is not None: + end_time = time.time() + timeout + while True: + if not self.getecho(): + return True + if timeout < 0 and timeout is not None: + return False + if timeout is not None: + timeout = end_time - time.time() + time.sleep(0.1) + + def getecho (self): + + """This returns the terminal echo mode. This returns True if echo is + on or False if echo is off. Child applications that are expecting you + to enter a password often set ECHO False. See waitnoecho(). """ + + attr = termios.tcgetattr(self.child_fd) + if attr[3] & termios.ECHO: + return True + return False + + def setecho (self, state): + + """This sets the terminal echo mode on or off. Note that anything the + child sent before the echo will be lost, so you should be sure that + your input buffer is empty before you call setecho(). For example, the + following will work as expected:: + + p = pexpect.spawn('cat') + p.sendline ('1234') # We will see this twice (once from tty echo and again from cat). + p.expect (['1234']) + p.expect (['1234']) + p.setecho(False) # Turn off tty echo + p.sendline ('abcd') # We will set this only once (echoed by cat). + p.sendline ('wxyz') # We will set this only once (echoed by cat) + p.expect (['abcd']) + p.expect (['wxyz']) + + The following WILL NOT WORK because the lines sent before the setecho + will be lost:: + + p = pexpect.spawn('cat') + p.sendline ('1234') # We will see this twice (once from tty echo and again from cat). + p.setecho(False) # Turn off tty echo + p.sendline ('abcd') # We will set this only once (echoed by cat). + p.sendline ('wxyz') # We will set this only once (echoed by cat) + p.expect (['1234']) + p.expect (['1234']) + p.expect (['abcd']) + p.expect (['wxyz']) + """ + + self.child_fd + attr = termios.tcgetattr(self.child_fd) + if state: + attr[3] = attr[3] | termios.ECHO + else: + attr[3] = attr[3] & ~termios.ECHO + # I tried TCSADRAIN and TCSAFLUSH, but these were inconsistent + # and blocked on some platforms. TCSADRAIN is probably ideal if it worked. + termios.tcsetattr(self.child_fd, termios.TCSANOW, attr) + + def read_nonblocking (self, size = 1, timeout = -1): + + """This reads at most size characters from the child application. It + includes a timeout. If the read does not complete within the timeout + period then a TIMEOUT exception is raised. If the end of file is read + then an EOF exception will be raised. If a log file was set using + setlog() then all data will also be written to the log file. + + If timeout is None then the read may block indefinitely. If timeout is -1 + then the self.timeout value is used. If timeout is 0 then the child is + polled and if there was no data immediately ready then this will raise + a TIMEOUT exception. + + The timeout refers only to the amount of time to read at least one + character. This is not effected by the 'size' parameter, so if you call + read_nonblocking(size=100, timeout=30) and only one character is + available right away then one character will be returned immediately. + It will not wait for 30 seconds for another 99 characters to come in. + + This is a wrapper around os.read(). It uses select.select() to + implement the timeout. """ + + if self.closed: + raise ValueError ('I/O operation on closed file in read_nonblocking().') + + if timeout == -1: + timeout = self.timeout + + # Note that some systems such as Solaris do not give an EOF when + # the child dies. In fact, you can still try to read + # from the child_fd -- it will block forever or until TIMEOUT. + # For this case, I test isalive() before doing any reading. + # If isalive() is false, then I pretend that this is the same as EOF. + if not self.isalive(): + r,w,e = self.__select([self.child_fd], [], [], 0) # timeout of 0 means "poll" + if not r: + self.flag_eof = True + raise EOF ('End Of File (EOF) in read_nonblocking(). Braindead platform.') + elif self.__irix_hack: + # This is a hack for Irix. It seems that Irix requires a long delay before checking isalive. + # This adds a 2 second delay, but only when the child is terminated. + r, w, e = self.__select([self.child_fd], [], [], 2) + if not r and not self.isalive(): + self.flag_eof = True + raise EOF ('End Of File (EOF) in read_nonblocking(). Pokey platform.') + + r,w,e = self.__select([self.child_fd], [], [], timeout) + + if not r: + if not self.isalive(): + # Some platforms, such as Irix, will claim that their processes are alive; + # then timeout on the select; and then finally admit that they are not alive. + self.flag_eof = True + raise EOF ('End of File (EOF) in read_nonblocking(). Very pokey platform.') + else: + raise TIMEOUT ('Timeout exceeded in read_nonblocking().') + + if self.child_fd in r: + try: + s = os.read(self.child_fd, size) + except OSError, e: # Linux does this + self.flag_eof = True + raise EOF ('End Of File (EOF) in read_nonblocking(). Exception style platform.') + if s == '': # BSD style + self.flag_eof = True + raise EOF ('End Of File (EOF) in read_nonblocking(). Empty string style platform.') + + if self.logfile is not None: + self.logfile.write (s) + self.logfile.flush() + if self.logfile_read is not None: + self.logfile_read.write (s) + self.logfile_read.flush() + + return s + + raise ExceptionPexpect ('Reached an unexpected state in read_nonblocking().') + + def read (self, size = -1): # File-like object. + + """This reads at most "size" bytes from the file (less if the read hits + EOF before obtaining size bytes). If the size argument is negative or + omitted, read all data until EOF is reached. The bytes are returned as + a string object. An empty string is returned when EOF is encountered + immediately. """ + + if size == 0: + return '' + if size < 0: + self.expect (self.delimiter) # delimiter default is EOF + return self.before + + # I could have done this more directly by not using expect(), but + # I deliberately decided to couple read() to expect() so that + # I would catch any bugs early and ensure consistant behavior. + # It's a little less efficient, but there is less for me to + # worry about if I have to later modify read() or expect(). + # Note, it's OK if size==-1 in the regex. That just means it + # will never match anything in which case we stop only on EOF. + cre = re.compile('.{%d}' % size, re.DOTALL) + index = self.expect ([cre, self.delimiter]) # delimiter default is EOF + if index == 0: + return self.after ### self.before should be ''. Should I assert this? + return self.before + + def readline (self, size = -1): # File-like object. + + """This reads and returns one entire line. A trailing newline is kept + in the string, but may be absent when a file ends with an incomplete + line. Note: This readline() looks for a \\r\\n pair even on UNIX + because this is what the pseudo tty device returns. So contrary to what + you may expect you will receive the newline as \\r\\n. An empty string + is returned when EOF is hit immediately. Currently, the size argument is + mostly ignored, so this behavior is not standard for a file-like + object. If size is 0 then an empty string is returned. """ + + if size == 0: + return '' + index = self.expect (['\r\n', self.delimiter]) # delimiter default is EOF + if index == 0: + return self.before + '\r\n' + else: + return self.before + + def __iter__ (self): # File-like object. + + """This is to support iterators over a file-like object. + """ + + return self + + def next (self): # File-like object. + + """This is to support iterators over a file-like object. + """ + + result = self.readline() + if result == "": + raise StopIteration + return result + + def readlines (self, sizehint = -1): # File-like object. + + """This reads until EOF using readline() and returns a list containing + the lines thus read. The optional "sizehint" argument is ignored. """ + + lines = [] + while True: + line = self.readline() + if not line: + break + lines.append(line) + return lines + + def write(self, s): # File-like object. + + """This is similar to send() except that there is no return value. + """ + + self.send (s) + + def writelines (self, sequence): # File-like object. + + """This calls write() for each element in the sequence. The sequence + can be any iterable object producing strings, typically a list of + strings. This does not add line separators There is no return value. + """ + + for s in sequence: + self.write (s) + + def send(self, s): + + """This sends a string to the child process. This returns the number of + bytes written. If a log file was set then the data is also written to + the log. """ + + time.sleep(self.delaybeforesend) + if self.logfile is not None: + self.logfile.write (s) + self.logfile.flush() + if self.logfile_send is not None: + self.logfile_send.write (s) + self.logfile_send.flush() + c = os.write(self.child_fd, s) + return c + + def sendline(self, s=''): + + """This is like send(), but it adds a line feed (os.linesep). This + returns the number of bytes written. """ + + n = self.send(s) + n = n + self.send (os.linesep) + return n + + def sendcontrol(self, char): + + """This sends a control character to the child such as Ctrl-C or + Ctrl-D. For example, to send a Ctrl-G (ASCII 7):: + + child.sendcontrol('g') + + See also, sendintr() and sendeof(). + """ + + char = char.lower() + a = ord(char) + if a>=97 and a<=122: + a = a - ord('a') + 1 + return self.send (chr(a)) + d = {'@':0, '`':0, + '[':27, '{':27, + '\\':28, '|':28, + ']':29, '}': 29, + '^':30, '~':30, + '_':31, + '?':127} + if char not in d: + return 0 + return self.send (chr(d[char])) + + def sendeof(self): + + """This sends an EOF to the child. This sends a character which causes + the pending parent output buffer to be sent to the waiting child + program without waiting for end-of-line. If it is the first character + of the line, the read() in the user program returns 0, which signifies + end-of-file. This means to work as expected a sendeof() has to be + called at the beginning of a line. This method does not send a newline. + It is the responsibility of the caller to ensure the eof is sent at the + beginning of a line. """ + + ### Hmmm... how do I send an EOF? + ###C if ((m = write(pty, *buf, p - *buf)) < 0) + ###C return (errno == EWOULDBLOCK) ? n : -1; + #fd = sys.stdin.fileno() + #old = termios.tcgetattr(fd) # remember current state + #attr = termios.tcgetattr(fd) + #attr[3] = attr[3] | termios.ICANON # ICANON must be set to recognize EOF + #try: # use try/finally to ensure state gets restored + # termios.tcsetattr(fd, termios.TCSADRAIN, attr) + # if hasattr(termios, 'CEOF'): + # os.write (self.child_fd, '%c' % termios.CEOF) + # else: + # # Silly platform does not define CEOF so assume CTRL-D + # os.write (self.child_fd, '%c' % 4) + #finally: # restore state + # termios.tcsetattr(fd, termios.TCSADRAIN, old) + if hasattr(termios, 'VEOF'): + char = termios.tcgetattr(self.child_fd)[6][termios.VEOF] + else: + # platform does not define VEOF so assume CTRL-D + char = chr(4) + self.send(char) + + def sendintr(self): + + """This sends a SIGINT to the child. It does not require + the SIGINT to be the first character on a line. """ + + if hasattr(termios, 'VINTR'): + char = termios.tcgetattr(self.child_fd)[6][termios.VINTR] + else: + # platform does not define VINTR so assume CTRL-C + char = chr(3) + self.send (char) + + def eof (self): + + """This returns True if the EOF exception was ever raised. + """ + + return self.flag_eof + + def terminate(self, force=False): + + """This forces a child process to terminate. It starts nicely with + SIGHUP and SIGINT. If "force" is True then moves onto SIGKILL. This + returns True if the child was terminated. This returns False if the + child could not be terminated. """ + + if not self.isalive(): + return True + try: + self.kill(signal.SIGHUP) + time.sleep(self.delayafterterminate) + if not self.isalive(): + return True + self.kill(signal.SIGCONT) + time.sleep(self.delayafterterminate) + if not self.isalive(): + return True + self.kill(signal.SIGINT) + time.sleep(self.delayafterterminate) + if not self.isalive(): + return True + if force: + self.kill(signal.SIGKILL) + time.sleep(self.delayafterterminate) + if not self.isalive(): + return True + else: + return False + return False + except OSError, e: + # I think there are kernel timing issues that sometimes cause + # this to happen. I think isalive() reports True, but the + # process is dead to the kernel. + # Make one last attempt to see if the kernel is up to date. + time.sleep(self.delayafterterminate) + if not self.isalive(): + return True + else: + return False + + def wait(self): + + """This waits until the child exits. This is a blocking call. This will + not read any data from the child, so this will block forever if the + child has unread output and has terminated. In other words, the child + may have printed output then called exit(); but, technically, the child + is still alive until its output is read. """ + + if self.isalive(): + pid, status = os.waitpid(self.pid, 0) + else: + raise ExceptionPexpect ('Cannot wait for dead child process.') + self.exitstatus = os.WEXITSTATUS(status) + if os.WIFEXITED (status): + self.status = status + self.exitstatus = os.WEXITSTATUS(status) + self.signalstatus = None + self.terminated = True + elif os.WIFSIGNALED (status): + self.status = status + self.exitstatus = None + self.signalstatus = os.WTERMSIG(status) + self.terminated = True + elif os.WIFSTOPPED (status): + raise ExceptionPexpect ('Wait was called for a child process that is stopped. This is not supported. Is some other process attempting job control with our child pid?') + return self.exitstatus + + def isalive(self): + + """This tests if the child process is running or not. This is + non-blocking. If the child was terminated then this will read the + exitstatus or signalstatus of the child. This returns True if the child + process appears to be running or False if not. It can take literally + SECONDS for Solaris to return the right status. """ + + if self.terminated: + return False + + if self.flag_eof: + # This is for Linux, which requires the blocking form of waitpid to get + # status of a defunct process. This is super-lame. The flag_eof would have + # been set in read_nonblocking(), so this should be safe. + waitpid_options = 0 + else: + waitpid_options = os.WNOHANG + + try: + pid, status = os.waitpid(self.pid, waitpid_options) + except OSError, e: # No child processes + if e[0] == errno.ECHILD: + raise ExceptionPexpect ('isalive() encountered condition where "terminated" is 0, but there was no child process. Did someone else call waitpid() on our process?') + else: + raise e + + # I have to do this twice for Solaris. I can't even believe that I figured this out... + # If waitpid() returns 0 it means that no child process wishes to + # report, and the value of status is undefined. + if pid == 0: + try: + pid, status = os.waitpid(self.pid, waitpid_options) ### os.WNOHANG) # Solaris! + except OSError, e: # This should never happen... + if e[0] == errno.ECHILD: + raise ExceptionPexpect ('isalive() encountered condition that should never happen. There was no child process. Did someone else call waitpid() on our process?') + else: + raise e + + # If pid is still 0 after two calls to waitpid() then + # the process really is alive. This seems to work on all platforms, except + # for Irix which seems to require a blocking call on waitpid or select, so I let read_nonblocking + # take care of this situation (unfortunately, this requires waiting through the timeout). + if pid == 0: + return True + + if pid == 0: + return True + + if os.WIFEXITED (status): + self.status = status + self.exitstatus = os.WEXITSTATUS(status) + self.signalstatus = None + self.terminated = True + elif os.WIFSIGNALED (status): + self.status = status + self.exitstatus = None + self.signalstatus = os.WTERMSIG(status) + self.terminated = True + elif os.WIFSTOPPED (status): + raise ExceptionPexpect ('isalive() encountered condition where child process is stopped. This is not supported. Is some other process attempting job control with our child pid?') + return False + + def kill(self, sig): + + """This sends the given signal to the child application. In keeping + with UNIX tradition it has a misleading name. It does not necessarily + kill the child unless you send the right signal. """ + + # Same as os.kill, but the pid is given for you. + if self.isalive(): + os.kill(self.pid, sig) + + def compile_pattern_list(self, patterns): + + """This compiles a pattern-string or a list of pattern-strings. + Patterns must be a StringType, EOF, TIMEOUT, SRE_Pattern, or a list of + those. Patterns may also be None which results in an empty list (you + might do this if waiting for an EOF or TIMEOUT condition without + expecting any pattern). + + This is used by expect() when calling expect_list(). Thus expect() is + nothing more than:: + + cpl = self.compile_pattern_list(pl) + return self.expect_list(cpl, timeout) + + If you are using expect() within a loop it may be more + efficient to compile the patterns first and then call expect_list(). + This avoid calls in a loop to compile_pattern_list():: + + cpl = self.compile_pattern_list(my_pattern) + while some_condition: + ... + i = self.expect_list(clp, timeout) + ... + """ + + if patterns is None: + return [] + if type(patterns) is not types.ListType: + patterns = [patterns] + + compile_flags = re.DOTALL # Allow dot to match \n + if self.ignorecase: + compile_flags = compile_flags | re.IGNORECASE + compiled_pattern_list = [] + for p in patterns: + if type(p) in types.StringTypes: + compiled_pattern_list.append(re.compile(p, compile_flags)) + elif p is EOF: + compiled_pattern_list.append(EOF) + elif p is TIMEOUT: + compiled_pattern_list.append(TIMEOUT) + elif type(p) is type(re.compile('')): + compiled_pattern_list.append(p) + else: + raise TypeError ('Argument must be one of StringTypes, EOF, TIMEOUT, SRE_Pattern, or a list of those type. %s' % str(type(p))) + + return compiled_pattern_list + + def expect(self, pattern, timeout = -1, searchwindowsize=None): + + """This seeks through the stream until a pattern is matched. The + pattern is overloaded and may take several types. The pattern can be a + StringType, EOF, a compiled re, or a list of any of those types. + Strings will be compiled to re types. This returns the index into the + pattern list. If the pattern was not a list this returns index 0 on a + successful match. This may raise exceptions for EOF or TIMEOUT. To + avoid the EOF or TIMEOUT exceptions add EOF or TIMEOUT to the pattern + list. That will cause expect to match an EOF or TIMEOUT condition + instead of raising an exception. + + If you pass a list of patterns and more than one matches, the first match + in the stream is chosen. If more than one pattern matches at that point, + the leftmost in the pattern list is chosen. For example:: + + # the input is 'foobar' + index = p.expect (['bar', 'foo', 'foobar']) + # returns 1 ('foo') even though 'foobar' is a "better" match + + Please note, however, that buffering can affect this behavior, since + input arrives in unpredictable chunks. For example:: + + # the input is 'foobar' + index = p.expect (['foobar', 'foo']) + # returns 0 ('foobar') if all input is available at once, + # but returs 1 ('foo') if parts of the final 'bar' arrive late + + After a match is found the instance attributes 'before', 'after' and + 'match' will be set. You can see all the data read before the match in + 'before'. You can see the data that was matched in 'after'. The + re.MatchObject used in the re match will be in 'match'. If an error + occurred then 'before' will be set to all the data read so far and + 'after' and 'match' will be None. + + If timeout is -1 then timeout will be set to the self.timeout value. + + A list entry may be EOF or TIMEOUT instead of a string. This will + catch these exceptions and return the index of the list entry instead + of raising the exception. The attribute 'after' will be set to the + exception type. The attribute 'match' will be None. This allows you to + write code like this:: + + index = p.expect (['good', 'bad', pexpect.EOF, pexpect.TIMEOUT]) + if index == 0: + do_something() + elif index == 1: + do_something_else() + elif index == 2: + do_some_other_thing() + elif index == 3: + do_something_completely_different() + + instead of code like this:: + + try: + index = p.expect (['good', 'bad']) + if index == 0: + do_something() + elif index == 1: + do_something_else() + except EOF: + do_some_other_thing() + except TIMEOUT: + do_something_completely_different() + + These two forms are equivalent. It all depends on what you want. You + can also just expect the EOF if you are waiting for all output of a + child to finish. For example:: + + p = pexpect.spawn('/bin/ls') + p.expect (pexpect.EOF) + print p.before + + If you are trying to optimize for speed then see expect_list(). + """ + + compiled_pattern_list = self.compile_pattern_list(pattern) + return self.expect_list(compiled_pattern_list, timeout, searchwindowsize) + + def expect_list(self, pattern_list, timeout = -1, searchwindowsize = -1): + + """This takes a list of compiled regular expressions and returns the + index into the pattern_list that matched the child output. The list may + also contain EOF or TIMEOUT (which are not compiled regular + expressions). This method is similar to the expect() method except that + expect_list() does not recompile the pattern list on every call. This + may help if you are trying to optimize for speed, otherwise just use + the expect() method. This is called by expect(). If timeout==-1 then + the self.timeout value is used. If searchwindowsize==-1 then the + self.searchwindowsize value is used. """ + + return self.expect_loop(searcher_re(pattern_list), timeout, searchwindowsize) + + def expect_exact(self, pattern_list, timeout = -1, searchwindowsize = -1): + + """This is similar to expect(), but uses plain string matching instead + of compiled regular expressions in 'pattern_list'. The 'pattern_list' + may be a string; a list or other sequence of strings; or TIMEOUT and + EOF. + + This call might be faster than expect() for two reasons: string + searching is faster than RE matching and it is possible to limit the + search to just the end of the input buffer. + + This method is also useful when you don't want to have to worry about + escaping regular expression characters that you want to match.""" + + if type(pattern_list) in types.StringTypes or pattern_list in (TIMEOUT, EOF): + pattern_list = [pattern_list] + return self.expect_loop(searcher_string(pattern_list), timeout, searchwindowsize) + + def expect_loop(self, searcher, timeout = -1, searchwindowsize = -1): + + """This is the common loop used inside expect. The 'searcher' should be + an instance of searcher_re or searcher_string, which describes how and what + to search for in the input. + + See expect() for other arguments, return value and exceptions. """ + + self.searcher = searcher + + if timeout == -1: + timeout = self.timeout + if timeout is not None: + end_time = time.time() + timeout + if searchwindowsize == -1: + searchwindowsize = self.searchwindowsize + + try: + incoming = self.buffer + freshlen = len(incoming) + while True: # Keep reading until exception or return. + index = searcher.search(incoming, freshlen, searchwindowsize) + if index >= 0: + self.buffer = incoming[searcher.end : ] + self.before = incoming[ : searcher.start] + self.after = incoming[searcher.start : searcher.end] + self.match = searcher.match + self.match_index = index + return self.match_index + # No match at this point + if timeout < 0 and timeout is not None: + raise TIMEOUT ('Timeout exceeded in expect_any().') + # Still have time left, so read more data + c = self.read_nonblocking (self.maxread, timeout) + freshlen = len(c) + time.sleep (0.0001) + incoming = incoming + c + if timeout is not None: + timeout = end_time - time.time() + except EOF, e: + self.buffer = '' + self.before = incoming + self.after = EOF + index = searcher.eof_index + if index >= 0: + self.match = EOF + self.match_index = index + return self.match_index + else: + self.match = None + self.match_index = None + raise EOF (str(e) + '\n' + str(self)) + except TIMEOUT, e: + self.buffer = incoming + self.before = incoming + self.after = TIMEOUT + index = searcher.timeout_index + if index >= 0: + self.match = TIMEOUT + self.match_index = index + return self.match_index + else: + self.match = None + self.match_index = None + raise TIMEOUT (str(e) + '\n' + str(self)) + except: + self.before = incoming + self.after = None + self.match = None + self.match_index = None + raise + + def getwinsize(self): + + """This returns the terminal window size of the child tty. The return + value is a tuple of (rows, cols). """ + + TIOCGWINSZ = getattr(termios, 'TIOCGWINSZ', 1074295912L) + s = struct.pack('HHHH', 0, 0, 0, 0) + x = fcntl.ioctl(self.fileno(), TIOCGWINSZ, s) + return struct.unpack('HHHH', x)[0:2] + + def setwinsize(self, r, c): + + """This sets the terminal window size of the child tty. This will cause + a SIGWINCH signal to be sent to the child. This does not change the + physical window size. It changes the size reported to TTY-aware + applications like vi or curses -- applications that respond to the + SIGWINCH signal. """ + + # Check for buggy platforms. Some Python versions on some platforms + # (notably OSF1 Alpha and RedHat 7.1) truncate the value for + # termios.TIOCSWINSZ. It is not clear why this happens. + # These platforms don't seem to handle the signed int very well; + # yet other platforms like OpenBSD have a large negative value for + # TIOCSWINSZ and they don't have a truncate problem. + # Newer versions of Linux have totally different values for TIOCSWINSZ. + # Note that this fix is a hack. + TIOCSWINSZ = getattr(termios, 'TIOCSWINSZ', -2146929561) + if TIOCSWINSZ == 2148037735L: # L is not required in Python >= 2.2. + TIOCSWINSZ = -2146929561 # Same bits, but with sign. + # Note, assume ws_xpixel and ws_ypixel are zero. + s = struct.pack('HHHH', r, c, 0, 0) + fcntl.ioctl(self.fileno(), TIOCSWINSZ, s) + + def interact(self, escape_character = chr(29), input_filter = None, output_filter = None): + + """This gives control of the child process to the interactive user (the + human at the keyboard). Keystrokes are sent to the child process, and + the stdout and stderr output of the child process is printed. This + simply echos the child stdout and child stderr to the real stdout and + it echos the real stdin to the child stdin. When the user types the + escape_character this method will stop. The default for + escape_character is ^]. This should not be confused with ASCII 27 -- + the ESC character. ASCII 29 was chosen for historical merit because + this is the character used by 'telnet' as the escape character. The + escape_character will not be sent to the child process. + + You may pass in optional input and output filter functions. These + functions should take a string and return a string. The output_filter + will be passed all the output from the child process. The input_filter + will be passed all the keyboard input from the user. The input_filter + is run BEFORE the check for the escape_character. + + Note that if you change the window size of the parent the SIGWINCH + signal will not be passed through to the child. If you want the child + window size to change when the parent's window size changes then do + something like the following example:: + + import pexpect, struct, fcntl, termios, signal, sys + def sigwinch_passthrough (sig, data): + s = struct.pack("HHHH", 0, 0, 0, 0) + a = struct.unpack('hhhh', fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ , s)) + global p + p.setwinsize(a[0],a[1]) + p = pexpect.spawn('/bin/bash') # Note this is global and used in sigwinch_passthrough. + signal.signal(signal.SIGWINCH, sigwinch_passthrough) + p.interact() + """ + + # Flush the buffer. + self.stdout.write (self.buffer) + self.stdout.flush() + self.buffer = '' + mode = tty.tcgetattr(self.STDIN_FILENO) + tty.setraw(self.STDIN_FILENO) + try: + self.__interact_copy(escape_character, input_filter, output_filter) + finally: + tty.tcsetattr(self.STDIN_FILENO, tty.TCSAFLUSH, mode) + + def __interact_writen(self, fd, data): + + """This is used by the interact() method. + """ + + while data != '' and self.isalive(): + n = os.write(fd, data) + data = data[n:] + + def __interact_read(self, fd): + + """This is used by the interact() method. + """ + + return os.read(fd, 1000) + + def __interact_copy(self, escape_character = None, input_filter = None, output_filter = None): + + """This is used by the interact() method. + """ + + while self.isalive(): + r,w,e = self.__select([self.child_fd, self.STDIN_FILENO], [], []) + if self.child_fd in r: + data = self.__interact_read(self.child_fd) + if output_filter: data = output_filter(data) + if self.logfile is not None: + self.logfile.write (data) + self.logfile.flush() + os.write(self.STDOUT_FILENO, data) + if self.STDIN_FILENO in r: + data = self.__interact_read(self.STDIN_FILENO) + if input_filter: data = input_filter(data) + i = data.rfind(escape_character) + if i != -1: + data = data[:i] + self.__interact_writen(self.child_fd, data) + break + self.__interact_writen(self.child_fd, data) + + def __select (self, iwtd, owtd, ewtd, timeout=None): + + """This is a wrapper around select.select() that ignores signals. If + select.select raises a select.error exception and errno is an EINTR + error then it is ignored. Mainly this is used to ignore sigwinch + (terminal resize). """ + + # if select() is interrupted by a signal (errno==EINTR) then + # we loop back and enter the select() again. + if timeout is not None: + end_time = time.time() + timeout + while True: + try: + return select.select (iwtd, owtd, ewtd, timeout) + except select.error, e: + if e[0] == errno.EINTR: + # if we loop back we have to subtract the amount of time we already waited. + if timeout is not None: + timeout = end_time - time.time() + if timeout < 0: + return ([],[],[]) + else: # something else caused the select.error, so this really is an exception + raise + +############################################################################## +# The following methods are no longer supported or allowed. + + def setmaxread (self, maxread): + + """This method is no longer supported or allowed. I don't like getters + and setters without a good reason. """ + + raise ExceptionPexpect ('This method is no longer supported or allowed. Just assign a value to the maxread member variable.') + + def setlog (self, fileobject): + + """This method is no longer supported or allowed. + """ + + raise ExceptionPexpect ('This method is no longer supported or allowed. Just assign a value to the logfile member variable.') + +############################################################################## +# End of spawn class +############################################################################## + +class searcher_string (object): + + """This is a plain string search helper for the spawn.expect_any() method. + + Attributes: + + eof_index - index of EOF, or -1 + timeout_index - index of TIMEOUT, or -1 + + After a successful match by the search() method the following attributes + are available: + + start - index into the buffer, first byte of match + end - index into the buffer, first byte after match + match - the matching string itself + """ + + def __init__(self, strings): + + """This creates an instance of searcher_string. This argument 'strings' + may be a list; a sequence of strings; or the EOF or TIMEOUT types. """ + + self.eof_index = -1 + self.timeout_index = -1 + self._strings = [] + for n, s in zip(range(len(strings)), strings): + if s is EOF: + self.eof_index = n + continue + if s is TIMEOUT: + self.timeout_index = n + continue + self._strings.append((n, s)) + + def __str__(self): + + """This returns a human-readable string that represents the state of + the object.""" + + ss = [ (ns[0],' %d: "%s"' % ns) for ns in self._strings ] + ss.append((-1,'searcher_string:')) + if self.eof_index >= 0: + ss.append ((self.eof_index,' %d: EOF' % self.eof_index)) + if self.timeout_index >= 0: + ss.append ((self.timeout_index,' %d: TIMEOUT' % self.timeout_index)) + ss.sort() + ss = zip(*ss)[1] + return '\n'.join(ss) + + def search(self, buffer, freshlen, searchwindowsize=None): + + """This searches 'buffer' for the first occurence of one of the search + strings. 'freshlen' must indicate the number of bytes at the end of + 'buffer' which have not been searched before. It helps to avoid + searching the same, possibly big, buffer over and over again. + + See class spawn for the 'searchwindowsize' argument. + + If there is a match this returns the index of that string, and sets + 'start', 'end' and 'match'. Otherwise, this returns -1. """ + + absurd_match = len(buffer) + first_match = absurd_match + + # 'freshlen' helps a lot here. Further optimizations could + # possibly include: + # + # using something like the Boyer-Moore Fast String Searching + # Algorithm; pre-compiling the search through a list of + # strings into something that can scan the input once to + # search for all N strings; realize that if we search for + # ['bar', 'baz'] and the input is '...foo' we need not bother + # rescanning until we've read three more bytes. + # + # Sadly, I don't know enough about this interesting topic. /grahn + + for index, s in self._strings: + if searchwindowsize is None: + # the match, if any, can only be in the fresh data, + # or at the very end of the old data + offset = -(freshlen+len(s)) + else: + # better obey searchwindowsize + offset = -searchwindowsize + n = buffer.find(s, offset) + if n >= 0 and n < first_match: + first_match = n + best_index, best_match = index, s + if first_match == absurd_match: + return -1 + self.match = best_match + self.start = first_match + self.end = self.start + len(self.match) + return best_index + +class searcher_re (object): + + """This is regular expression string search helper for the + spawn.expect_any() method. + + Attributes: + + eof_index - index of EOF, or -1 + timeout_index - index of TIMEOUT, or -1 + + After a successful match by the search() method the following attributes + are available: + + start - index into the buffer, first byte of match + end - index into the buffer, first byte after match + match - the re.match object returned by a succesful re.search + + """ + + def __init__(self, patterns): + + """This creates an instance that searches for 'patterns' Where + 'patterns' may be a list or other sequence of compiled regular + expressions, or the EOF or TIMEOUT types.""" + + self.eof_index = -1 + self.timeout_index = -1 + self._searches = [] + for n, s in zip(range(len(patterns)), patterns): + if s is EOF: + self.eof_index = n + continue + if s is TIMEOUT: + self.timeout_index = n + continue + self._searches.append((n, s)) + + def __str__(self): + + """This returns a human-readable string that represents the state of + the object.""" + + ss = [ (n,' %d: re.compile("%s")' % (n,str(s.pattern))) for n,s in self._searches] + ss.append((-1,'searcher_re:')) + if self.eof_index >= 0: + ss.append ((self.eof_index,' %d: EOF' % self.eof_index)) + if self.timeout_index >= 0: + ss.append ((self.timeout_index,' %d: TIMEOUT' % self.timeout_index)) + ss.sort() + ss = zip(*ss)[1] + return '\n'.join(ss) + + def search(self, buffer, freshlen, searchwindowsize=None): + + """This searches 'buffer' for the first occurence of one of the regular + expressions. 'freshlen' must indicate the number of bytes at the end of + 'buffer' which have not been searched before. + + See class spawn for the 'searchwindowsize' argument. + + If there is a match this returns the index of that string, and sets + 'start', 'end' and 'match'. Otherwise, returns -1.""" + + absurd_match = len(buffer) + first_match = absurd_match + # 'freshlen' doesn't help here -- we cannot predict the + # length of a match, and the re module provides no help. + if searchwindowsize is None: + searchstart = 0 + else: + searchstart = max(0, len(buffer)-searchwindowsize) + for index, s in self._searches: + match = s.search(buffer, searchstart) + if match is None: + continue + n = match.start() + if n < first_match: + first_match = n + the_match = match + best_index = index + if first_match == absurd_match: + return -1 + self.start = first_match + self.match = the_match + self.end = self.match.end() + return best_index + +def which (filename): + + """This takes a given filename; tries to find it in the environment path; + then checks if it is executable. This returns the full path to the filename + if found and executable. Otherwise this returns None.""" + + # Special case where filename already contains a path. + if os.path.dirname(filename) != '': + if os.access (filename, os.X_OK): + return filename + + if not os.environ.has_key('PATH') or os.environ['PATH'] == '': + p = os.defpath + else: + p = os.environ['PATH'] + + # Oddly enough this was the one line that made Pexpect + # incompatible with Python 1.5.2. + #pathlist = p.split (os.pathsep) + pathlist = string.split (p, os.pathsep) + + for path in pathlist: + f = os.path.join(path, filename) + if os.access(f, os.X_OK): + return f + return None + +def split_command_line(command_line): + + """This splits a command line into a list of arguments. It splits arguments + on spaces, but handles embedded quotes, doublequotes, and escaped + characters. It's impossible to do this with a regular expression, so I + wrote a little state machine to parse the command line. """ + + arg_list = [] + arg = '' + + # Constants to name the states we can be in. + state_basic = 0 + state_esc = 1 + state_singlequote = 2 + state_doublequote = 3 + state_whitespace = 4 # The state of consuming whitespace between commands. + state = state_basic + + for c in command_line: + if state == state_basic or state == state_whitespace: + if c == '\\': # Escape the next character + state = state_esc + elif c == r"'": # Handle single quote + state = state_singlequote + elif c == r'"': # Handle double quote + state = state_doublequote + elif c.isspace(): + # Add arg to arg_list if we aren't in the middle of whitespace. + if state == state_whitespace: + None # Do nothing. + else: + arg_list.append(arg) + arg = '' + state = state_whitespace + else: + arg = arg + c + state = state_basic + elif state == state_esc: + arg = arg + c + state = state_basic + elif state == state_singlequote: + if c == r"'": + state = state_basic + else: + arg = arg + c + elif state == state_doublequote: + if c == r'"': + state = state_basic + else: + arg = arg + c + + if arg != '': + arg_list.append(arg) + return arg_list + +# vi:ts=4:sw=4:expandtab:ft=python: diff --git a/WorkloadManagementSystem/Agent/JobAgent.py b/WorkloadManagementSystem/Agent/JobAgent.py index ce5c03565a8..1731823964b 100755 --- a/WorkloadManagementSystem/Agent/JobAgent.py +++ b/WorkloadManagementSystem/Agent/JobAgent.py @@ -357,9 +357,6 @@ def __checkInstallSoftware( self, jobID, jobParams, resourceParams ): self.__report( jobID, 'Matched', 'Installing Software' ) softwareDist = jobParams['SoftwareDistModule'] - #HACK: Delete when svn repo is in production! - softwareDist = softwareDist.replace( "DIRAC.LHCbSystem.", "LHCbDIRAC.Core." ) - #END OF HACK self.log.verbose( 'Found VO Software Distribution module: %s' % ( softwareDist ) ) argumentsDict = {'Job':jobParams, 'CE':resourceParams} moduleFactory = ModuleFactory() diff --git a/WorkloadManagementSystem/Agent/SiteDirector.py b/WorkloadManagementSystem/Agent/SiteDirector.py index 3978e4b5296..d4945fdeb43 100644 --- a/WorkloadManagementSystem/Agent/SiteDirector.py +++ b/WorkloadManagementSystem/Agent/SiteDirector.py @@ -8,7 +8,7 @@ """ from DIRAC.Core.Base.AgentModule import AgentModule -from DIRAC.ConfigurationSystem.Client.Helpers import getCSExtensions, getVO, Registry, Operations +from DIRAC.ConfigurationSystem.Client.Helpers import CSGlobals, getVO, Registry, Operations, Resources from DIRAC.Resources.Computing.ComputingElementFactory import ComputingElementFactory from DIRAC.WorkloadManagementSystem.Client.ServerUtils import pilotAgentsDB, taskQueueDB, jobDB from DIRAC.WorkloadManagementSystem.Service.WMSUtilities import getGridEnv @@ -52,12 +52,28 @@ def initialize( self ): def beginExecution( self ): self.gridEnv = self.am_getOption( "GridEnv", getGridEnv() ) - self.genericPilotDN = self.am_getOption( 'GenericPilotDN', 'Unknown' ) - self.genericPilotGroup = self.am_getOption( 'GenericPilotGroup', 'Unknown' ) + self.vo = self.am_getOption( "Community", '' ) + + # Choose the group for which pilots will be submitted. This is a hack until + # we will be able to match pilots to VOs. + self.group = '' + if self.vo: + result = Registry.getGroupsForVO( self.vo ) + if not result['OK']: + return result + for group in result['Value']: + if 'NormalUser' in Registry.getPropertiesForGroup( group ): + self.group = group + break + + self.operations = Operations.Operations( vo=self.vo ) + self.genericPilotDN = self.operations.getValue( '/Pilot/GenericPilotDN', 'Unknown' ) + self.genericPilotGroup = self.operations.getValue( '/Pilot/GenericPilotGroup', 'Unknown' ) self.pilot = DIRAC_PILOT self.install = DIRAC_INSTALL self.workingDirectory = self.am_getOption( 'WorkDirectory' ) self.maxQueueLength = self.am_getOption( 'MaxQueueLength', 86400 * 3 ) + self.pilotLogLevel = self.am_getOption( 'PilotLogLevel', 'INFO' ) # Flags self.updateStatus = self.am_getOption( 'UpdatePilotStatus', True ) @@ -65,14 +81,36 @@ def beginExecution( self ): self.sendAccounting = self.am_getOption( 'SendPilotAccounting', True ) # Get the site description dictionary - siteNames = self.am_getOption( 'Site', [] ) - if not siteNames: - siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) - if siteName == 'Unknown': - return S_OK( 'No site specified for the SiteDirector' ) - else: - siteNames = [siteName] - self.siteNames = siteNames + siteNames = None + if not self.am_getOption( 'Site', 'Any' ).lower() == "any": + siteNames = self.am_getOption( 'Site', [] ) + ceTypes = None + if not self.am_getOption( 'CETypes', 'Any' ).lower() == "any": + ceTypes = self.am_getOption( 'CETypes', [] ) + ces = None + if not self.am_getOption( 'CEs', 'Any' ).lower() == "any": + ces = self.am_getOption( 'CEs', [] ) + + result = Resources.getQueues( community=self.vo, + siteList=siteNames, + ceList=ces, + ceTypeList=ceTypes, + mode='Direct') + if not result['OK']: + return result + + resourceDict = result['Value'] + result = self.getQueues(resourceDict) + if not result['OK']: + return result + + #if not siteNames: + # siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) + # if siteName == 'Unknown': + # return S_OK( 'No site specified for the SiteDirector' ) + # else: + # siteNames = [siteName] + #self.siteNames = siteNames if self.updateStatus: self.log.always( 'Pilot status update requested' ) @@ -81,21 +119,14 @@ def beginExecution( self ): if self.sendAccounting: self.log.always( 'Pilot accounting sending requested' ) - self.log.always( 'Site:', self.siteNames ) - ceTypes = self.am_getOption( 'CETypes', [] ) - if ceTypes: - self.log.always( 'CETypes:', ceTypes ) - ces = self.am_getOption( 'CEs', [] ) - if ceTypes: - self.log.always( 'CEs:', ces ) + self.log.always( 'Sites:', siteNames ) + self.log.always( 'CETypes:', ceTypes ) + self.log.always( 'CEs:', ces ) self.log.always( 'GenericPilotDN:', self.genericPilotDN ) self.log.always( 'GenericPilotGroup:', self.genericPilotGroup ) self.localhost = socket.getfqdn() self.proxy = '' - result = self.getQueues() - if not result['OK']: - return result if self.queueDict: self.log.always( "Agent will serve queues:" ) @@ -106,55 +137,23 @@ def beginExecution( self ): return S_OK() - def getQueues( self ): + def getQueues( self, resourceDict ): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() - ceTypes = self.am_getOption( 'CETypes', [] ) - ceConfList = self.am_getOption( 'CEs', [] ) - - for siteName in self.siteNames: - # Look up CE definitions in the site CS description - ceList = [] - gridType = siteName.split( '.' )[0] - result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs' % ( gridType, siteName ) ) - if not result['OK']: - return S_ERROR( 'Failed to look up the CS for the site %s CEs' % siteName ) - if not result['Value']: - return S_ERROR( 'No CEs found for site %s' % siteName ) - ceTotalList = result['Value'] - for ce in ceTotalList: - if ( ceConfList and ce in ceConfList ) or not ceConfList: - ceType = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/CEType' % ( gridType, siteName, ce ), 'Unknown' ) - result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s' % ( gridType, siteName, ce ) ) - if not result['OK']: - return S_ERROR( 'Failed to look up the CS for ce %s' % ce ) - ceDict = result['Value'] - if "SubmissionMode" in ceDict and ceDict['SubmissionMode'].lower() == "direct": - if ceType in ceTypes: - ceList.append( ( ce, ceType, ceDict ) ) - - for ce, ceType, ceDict in ceList: - section = '/Resources/Sites/%s/%s/CEs/%s/Queues' % ( gridType, siteName, ce ) - result = gConfig.getSections( section ) - if not result['OK']: - return S_ERROR( 'Failed to look up the CS for queues' ) - if not result['Value']: - return S_ERROR( 'No Queues found for site %s, ce %s' % ( siteName, ce ) ) - - queues = result['Value'] - for queue in queues: - result = gConfig.getOptionsDict( '%s/%s' % ( section, queue ) ) - if not result['OK']: - return S_ERROR( 'Failed to look up the CS for ce,queue %s,%s' % ( ce, queue ) ) + for site in resourceDict: + for ce in resourceDict[site]: + ceDict = resourceDict[site][ce] + qDict = ceDict.pop('Queues') + for queue in qDict: queueName = '%s_%s' % ( ce, queue ) self.queueDict[queueName] = {} - self.queueDict[queueName]['ParametersDict'] = result['Value'] + self.queueDict[queueName]['ParametersDict'] = qDict[queue] self.queueDict[queueName]['ParametersDict']['Queue'] = queue - self.queueDict[queueName]['ParametersDict']['Site'] = siteName + self.queueDict[queueName]['ParametersDict']['Site'] = site self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' ) # Evaluate the CPU limit of the queue according to the Glue convention @@ -172,17 +171,17 @@ def getQueues( self ): if not os.path.exists( qwDir ): os.makedirs( qwDir ) self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir - queueDict = dict( ceDict ) - queueDict.update( self.queueDict[queueName]['ParametersDict'] ) + ceQueueDict = dict( ceDict ) + ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] ) result = ceFactory.getCE( ceName = ce, - ceType = ceType, - ceParametersDict = queueDict ) + ceType = ceDict['CEType'], + ceParametersDict = ceQueueDict ) if not result['OK']: return result self.queueDict[queueName]['CE'] = result['Value'] self.queueDict[queueName]['CEName'] = ce - self.queueDict[queueName]['CEType'] = ceType - self.queueDict[queueName]['Site'] = siteName + self.queueDict[queueName]['CEType'] = ceDict['CEType'] + self.queueDict[queueName]['Site'] = site self.queueDict[queueName]['QueueName'] = queue result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: @@ -217,6 +216,21 @@ def submitJobs( self ): """ Go through defined computing elements and submit jobs if necessary """ + # Check that there is some work at all + setup = CSGlobals.getSetup() + tqDict = { 'Setup':setup, + 'CPUTime': 9999999 } + if self.vo: + tqDict['Community'] = self.vo + if self.group: + tqDict['OwnerGroup'] = self.group + result = taskQueueDB.matchAndGetTaskQueue( tqDict ) + if not result[ 'OK' ]: + return result + if not result['Value']: + self.log.verbose('No Waiting jobs suitable for the director') + return S_OK() + # Check if the site is allowed in the mask result = jobDB.getSiteMask() if not result['OK']: @@ -234,7 +248,8 @@ def submitJobs( self ): if 'CPUTime' in self.queueDict[queue]['ParametersDict'] : queueCPUTime = int( self.queueDict[queue]['ParametersDict']['CPUTime'] ) else: - return S_ERROR( 'CPU time limit is not specified for queue %s' % queue ) + self.log.warn( 'CPU time limit is not specified for queue %s, skipping...' % queue ) + continue if queueCPUTime > self.maxQueueLength: queueCPUTime = self.maxQueueLength @@ -261,6 +276,10 @@ def submitJobs( self ): self.log.info( 'Site not in the mask %s' % siteName ) self.log.info( 'Removing "Site" from matching Dict' ) del ceDict[ 'Site' ] + if self.vo: + ceDict['Community'] = self.vo + if self.group: + ceDict['OwnerGroup'] = self.group result = taskQueueDB.getMatchingTaskQueues( ceDict ) @@ -405,12 +424,13 @@ def __getPilotOptions( self, queue, pilotsToSubmit ): pilotOptions.append( '-M %s' % 5 ) # Debug - pilotOptions.append( '-d' ) + if self.pilotLogLevel.lower() == 'debug': + pilotOptions.append( '-d' ) # CS Servers csServers = gConfig.getValue( "/DIRAC/Configuration/Servers", [] ) pilotOptions.append( '-C %s' % ",".join( csServers ) ) # DIRAC Extensions - extensionsList = getCSExtensions() + extensionsList = CSGlobals.getCSExtensions() if extensionsList: pilotOptions.append( '-e %s' % ",".join( extensionsList ) ) # Requested CPU time @@ -435,6 +455,9 @@ def __getPilotOptions( self, queue, pilotsToSubmit ): if 'CPUNormalizationFactor' in queueDict: pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % queueDict['CPUNormalizationFactor'] ) + if self.group: + pilotOptions.append( '-G %s' % self.group ) + self.log.verbose( "pilotOptions: ", ' '.join( pilotOptions ) ) return pilotOptions diff --git a/WorkloadManagementSystem/private/gLitePilotDirector.py b/WorkloadManagementSystem/private/gLitePilotDirector.py index 6bd4895be90..10272842e5f 100644 --- a/WorkloadManagementSystem/private/gLitePilotDirector.py +++ b/WorkloadManagementSystem/private/gLitePilotDirector.py @@ -181,10 +181,10 @@ def _getChildrenReferences( self, proxy, parentReference, taskQueueID ): if not ret['OK']: self.log.error( 'Failed to execute Job Status', ret['Message'] ) - return False + return [] if ret['Value'][0] != 0: self.log.error( 'Error executing Job Status:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) - return False + return [] self.log.info( 'Job Status Execution Time: %.2f' % ( time.time() - start ) ) stdout = ret['Value'][1]