def stop(self, graceful=True):
"""\
Stop workers
:attr graceful: boolean, If True (the default) workers will be
killed gracefully (ie. trying to wait for the current connection)
"""
unlink = self.reexec_pid == self.master_pid == 0 and not self.systemd
sock.close_sockets(self.LISTENERS, unlink)
self.LISTENERS = []
sig = signal.SIGTERM
if not graceful:
sig = signal.SIGQUIT
limit = time.time() + self.cfg.graceful_timeout
# instruct the workers to exit
self.kill_workers(sig)
# wait until the graceful timeout
while self.WORKERS and time.time() < limit:
time.sleep(0.1)
self.kill_workers(signal.SIGKILL)
python类SIGKILL的实例源码
def murder_workers(self):
"""\
Kill unused/idle workers
"""
if not self.timeout:
return
workers = list(self.WORKERS.items())
for (pid, worker) in workers:
try:
if time.time() - worker.tmp.last_update() <= self.timeout:
continue
except (OSError, ValueError):
continue
if not worker.aborted:
self.log.critical("WORKER TIMEOUT (pid:%s)", pid)
worker.aborted = True
self.kill_worker(pid, signal.SIGABRT)
else:
self.kill_worker(pid, signal.SIGKILL)
def close(self):
"""Close any open window.
Note that this only works with non-blocking methods.
"""
if self._process:
# Be nice first.
self._process.send_signal(signal.SIGINT)
# If it doesn't close itself promptly, be brutal.
try:
self._process.wait(timeout=1)
except subprocess.TimeoutExpired:
self._process.send_signal(signal.SIGKILL)
# Clean up.
self._process = None
def close (self, force=True): # File-like object.
"""This closes the connection with the child application. Note that
calling close() more than once is valid. This emulates standard Python
behavior with files. Set force to True if you want to make sure that
the child is terminated (SIGKILL is sent if the child ignores SIGHUP
and SIGINT). """
if not self.closed:
self.flush()
os.close (self.child_fd)
time.sleep(self.delayafterclose) # Give kernel time to update process status.
if self.isalive():
if not self.terminate(force):
raise ExceptionPexpect ('close() could not terminate the child using terminate()')
self.child_fd = -1
self.closed = True
#self.pid = None
def terminate_process_and_children(self, name):
"""
Recursively terminate all children of
respective process
@args:
name: Name of the job
"""
if name not in self.jobs:
print("[%s] does not exist as a process!", name)
ppid = self.jobs[name]['process'].pid
try:
parent_proc = psutil.Process(ppid)
except psutil.NoSuchProcess:
return
children = parent_proc.children(recursive=True)
for proc in children:
l.debug(proc)
try:
proc.send_signal(signal.SIGKILL)
except:
pass
def test_wait_timeout_0(self):
sproc = get_test_subprocess()
p = psutil.Process(sproc.pid)
self.assertRaises(psutil.TimeoutExpired, p.wait, 0)
p.kill()
stop_at = time.time() + 2
while True:
try:
code = p.wait(0)
except psutil.TimeoutExpired:
if time.time() >= stop_at:
raise
else:
break
if POSIX:
self.assertEqual(code, signal.SIGKILL)
else:
self.assertEqual(code, 0)
self.assertFalse(p.is_running())
def kill_last_pid():
"""Kill the last pid. See:
https://github.com/google/clusterfuzz-tools/issues/299"""
# We have found that, when invoking `sv stop python-daemon`, the process
# in call() isn't killed. Therefore, we need to explicitly kill it and
# all of its children.
#
# We hope that pid recycling is not that fast.
try:
with open(LAST_PID_FILE, 'r') as f:
pid = int(f.read().strip())
os.killpg(pid, signal.SIGKILL)
except: # pylint: disable=bare-except
pass
finally:
try:
os.remove(LAST_PID_FILE)
except: # pylint: disable=bare-except
pass
def test_fail(self):
"""Test failing to kill."""
self.mock.killpg.side_effect = [None, None, None, None]
with self.assertRaises(error.KillProcessFailedError) as cm:
common.kill(self.proc)
self.assertEqual(
'`cmd` (pid=1234) cannot be killed.',
cm.exception.message)
self.assert_exact_calls(self.mock.killpg, [
mock.call(1234, signal.SIGTERM), mock.call(1234, signal.SIGTERM),
mock.call(1234, signal.SIGKILL), mock.call(1234, signal.SIGKILL)
])
self.assert_exact_calls(self.mock.sleep, [mock.call(3)] * 4)
def kill(proc):
"""Kill a process multiple times.
See: https://github.com/google/clusterfuzz-tools/pull/301"""
try:
for sig in [signal.SIGTERM, signal.SIGTERM,
signal.SIGKILL, signal.SIGKILL]:
logger.debug('Killing pid=%s with %s', proc.pid, sig)
# Process leader id is the group id.
os.killpg(proc.pid, sig)
# Wait for any shutdown stacktrace to be dumped.
time.sleep(3)
raise error.KillProcessFailedError(proc.args, proc.pid)
except OSError as e:
if e.errno != NO_SUCH_PROCESS_ERRNO:
raise
def abort(self):
"""
Abort the executing command
"""
self._abort.set()
try:
kill(self._pid, signal.SIGKILL)
except:
pass
if self._pid:
self.join(timeout=10.0)
def execute(cmd, timeout=60, **kwargs):
"""
Executes the given shell command
Args:
cmd: List of command arguments
timeout: maximum alloted time for the command
**kwargs: passes to LocalShell.spawn
Returns:
An execution result.
Raises:
NoSuchCommandError, RunProcessError, FileNotFoundError
"""
shell = LocalShell()
#It is unlikely that someone actually intends to supply
#a string based on how spur works.
if type(cmd) == str:
cmd = ["bash", "-c"] + [cmd]
process = shell.spawn(cmd, store_pid=True, **kwargs)
start_time = time()
while process.is_running():
delta_time = time() - start_time
if delta_time > timeout:
process.send_signal(SIGKILL)
raise TimeoutError(cmd, timeout)
return process.wait_for_result()
def execute (self, *args):
if self.crashEnabled:
os.kill(os.getpid(), signal.SIGKILL)
return ExecutableDevice.execute(self, *args)
def killChildProcesses(parentPid):
childPids = getChildren(parentPid)
for pid in childPids:
killChildProcesses(pid)
try:
os.kill(pid, signal.SIGKILL)
except OSError:
pass
for pid in childPids:
try:
os.waitpid(pid, 0)
except OSError:
pass
# create a class for consuming events
def killChildProcesses(parentPid):
childPids = getChildren(parentPid)
for pid in childPids:
killChildProcesses(pid)
try:
os.kill(pid, signal.SIGKILL)
except OSError:
pass
for pid in childPids:
try:
os.waitpid(pid, 0)
except OSError:
pass
def test_ReRegDevMgrDuplicate(self):
# These two nodes use the same identifier, but have different names to distinguish them
devmgr_nb, devMgr = self.launchDeviceManager("/nodes/test_BasicTestDevice_node/DeviceManager.dcd.xml")
self.assertNotEqual(devMgr, None)
# NOTE These assert check must be kept in-line with the DeviceManager.dcd.xml
self.assertEqual(len(self._domMgr._get_deviceManagers()), 1)
self.assertEqual(len(devMgr._get_registeredDevices()), 1)
self.terminateChild(devmgr_nb, signals=(signal.SIGKILL,))
self.assertNotEqual(devmgr_nb.poll(), None)
devmgr_nb, devMgr = self.launchDeviceManager("/nodes/test_BasicTestDeviceSameDevMgrId_node/DeviceManager.dcd.xml")
self.assertNotEqual(devMgr, None)
# NOTE These assert check must be kept in-line with the DeviceManager.dcd.xml
self.assertEqual(len(self._domMgr._get_deviceManagers()), 1)
self.assertEqual(len(devMgr._get_registeredDevices()), 1)
# Verify that the second DeviceManager is no longer alive,
# This is REDHAWK specific, the spec would have let this go without
# giving the user clear warning that something was wrong
self.assertEqual(len(self._domMgr._get_deviceManagers()), 1)
devMgr = self._domMgr._get_deviceManagers()[0]
self.assertEqual(devMgr._get_label(), "BasicTestDeviceSameDevMgrId_node") # If the second one won, it would be DeviceManager2
self.assertEqual(len(devMgr._get_registeredDevices()), 1)
def _get_identifier(self):
return self._identifier
# if SIGKILL is used (simulating a nodeBooter unexpected abort)
# the next attempt to communicate with the domain manager will
# throw a COMM_FAILURE because the connection died unexpectedly
# Clients that hold references to the DomainManager should
# include code similar to that below
def test_DeviceManagerDisappear(self):
self._nb_domMgr, self._domMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
self._nb_devMgr, devMgr = self.launchDeviceManager("/nodes/test_BasicTestDevice_node/DeviceManager.dcd.xml")
self.assertEqual(len(self._domMgr._get_applicationFactories()), 0)
self.assertEqual(len(self._domMgr._get_applications()), 0)
self._domMgr.installApplication("/waveforms/CommandWrapper/CommandWrapper.sad.xml")
self.assertEqual(len(self._domMgr._get_applicationFactories()), 1)
self.assertEqual(len(self._domMgr._get_applications()), 0)
# Ensure the expected device is available
self.assertNotEqual(devMgr, None)
self.assertEqual(len(devMgr._get_registeredDevices()), 1)
device = devMgr._get_registeredDevices()[0]
# Kill the domainMgr and device manager
os.kill(self._nb_domMgr.pid, signal.SIGKILL)
if not self.waitTermination(self._nb_domMgr):
self.fail("Domain Manager Failed to Die")
os.kill(self._nb_devMgr.pid, signal.SIGTERM)
if not self.waitTermination(self._nb_devMgr):
self.fail("Device Manager Failed to Die")
# Start the domainMgr again
self._nb_domMgr, newDomMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
# Verify our client reference still is valid
self.assertEqual(False, newDomMgr._non_existent())
self.assertEqual(newDomMgr._get_identifier(),'DCE:5f52f645-110f-4142-8cc9-4d9316ddd958')
self.assertEqual(self._domMgr._get_identifier(),'DCE:5f52f645-110f-4142-8cc9-4d9316ddd958')
self.assertEqual(False, self._domMgr._non_existent())
self.assertEqual(len(self._domMgr._get_deviceManagers()), 0)
self.assertEqual(len(self._domMgr._get_applicationFactories()), 1)
def test_ServicesRestored(self):
domBooter, domMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
devBooter, devMgr = self.launchDeviceManager("/nodes/test_PortTestDevice_node/DeviceManager.dcd.xml")
svcBooter, svcMgr = self.launchDeviceManager("/nodes/test_BasicService_node/DeviceManager.dcd.xml")
# Make sure that the service node is up before killing the domain manager
while len(svcMgr._get_registeredServices()) != 1:
time.sleep(0.1)
# Forcibly terminate the domain manager to simulate a crash
os.kill(domBooter.pid, signal.SIGKILL)
if not self.waitTermination(domBooter):
self.fail("DomainManager failed to die")
# Restart the domain manager
domBooter, domMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
# Check that the domain manager reconnected to the device managers.
self.assertEqual(len(domMgr._get_deviceManagers()), 2)
# Install the PortConnectServiceName application and try to create an
# instance to verify that the domain manager is still aware of the services
# that had previously been registered.
domMgr.installApplication("/waveforms/PortConnectServiceName/PortConnectServiceName.sad.xml")
self.assertEqual(len(domMgr._get_applicationFactories()), 1)
appFact = domMgr._get_applicationFactories()[0]
try:
app = appFact.create(appFact._get_name(), [], [])
except CF.ApplicationFactory.CreateApplicationError:
self.fail("Unable to create application with service connection")
# The BasicService provides a PropertySet interface, so verify that some
# properties are returned from the service test of the PortTest component.
testResults = app.runTest(1, [])
self.assertNotEqual(len(testResults), 0)
def test_ApplicationUsesDevice(self):
self._nb_domMgr, self._domMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
self._nb_devMgr, devMgr = self.launchDeviceManager("/nodes/test_SADUsesDevice/DeviceManager.dcd.xml")
self._domMgr.installApplication("/waveforms/SADUsesDeviceWave/SADUsesDeviceWaveExternalSimple.sad.xml")
appFact = self._domMgr._get_applicationFactories()[0]
app = appFact.create(appFact._get_name(), [], [])
# Make sure that the allocation was made to the device
prop = CF.DataType(id='simple_alloc', value=any.to_any(None))
for dev in devMgr._get_registeredDevices():
if dev._get_label() == 'SADUsesDevice_1':
allocRes = dev.query([prop])
self.assertEquals(allocRes[0].value.value(), 8)
# Kill the domainMgr
os.kill(self._nb_domMgr.pid, signal.SIGTERM)
# TODO if SIGKILL is used (simulating a nodeBooter unexpected abort,
# the IOR and the newly spawned domain manager do not work
if not self.waitTermination(self._nb_domMgr):
self.fail("Domain Manager Failed to Die")
# Start the domainMgr again
self._nb_domMgr, newDomMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
# Capacity still allocated to device
prop = CF.DataType(id='simple_alloc', value=any.to_any(None))
for dev in devMgr._get_registeredDevices():
if dev._get_label() == 'SADUsesDevice_1':
allocRes = dev.query([prop])
self.assertEquals(allocRes[0].value.value(), 8)
# Release app to free up device capacity to make sure usesdevicecapacties was properly restored
newApp = newDomMgr._get_applications()[0]
newApp.releaseObject()
prop = CF.DataType(id='simple_alloc', value=any.to_any(None))
for dev in devMgr._get_registeredDevices():
if dev._get_label() == 'SADUsesDevice_1':
allocRes = dev.query([prop])
self.assertEquals(allocRes[0].value.value(), 10)
def test_ApplicationStartOrder(self):
self._nb_domMgr, self._domMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
self._nb_devMgr, devMgr = self.launchDeviceManager("/nodes/test_BasicTestDevice_node/DeviceManager.dcd.xml")
self._domMgr.installApplication("/waveforms/CommandWrapperStartOrderTests/CommandWrapperWithOrder.sad.xml")
appFact = self._domMgr._get_applicationFactories()[0]
app = appFact.create(appFact._get_name(), [], [])
app.start()
comps = app._get_registeredComponents()
for c in comps:
self.assertEquals(c.componentObject._get_started(), True)
# Kill the domainMgr
os.kill(self._nb_domMgr.pid, signal.SIGTERM)
# TODO if SIGKILL is used (simulating a nodeBooter unexpected abort,
# the IOR and the newly spawned domain manager do not work
if not self.waitTermination(self._nb_domMgr):
self.fail("Domain Manager Failed to Die")
# Start the domainMgr again
self._nb_domMgr, newDomMgr = self.launchDomainManager(endpoint="giop:tcp::5679", dbURI=self._dbfile)
# Components should all still be started
for c in comps:
self.assertEquals(c.componentObject._get_started(), True)
# Stop application to make sure that start order Resource variables were recovered properly
app = newDomMgr._get_applications()[0]
app.stop()
for c in comps:
self.assertEquals(c.componentObject._get_started(), False)
# Start components to make sure that start also works
app.start()
for c in comps:
self.assertEquals(c.componentObject._get_started(), True)