问题描述:

In the following code, I am trying to fork a process for running a command and then capture back the result when the child subprocess has exited.

In the end a loop is run , on a global var, to wait for child process to end so that parent process doesn't exit before child process but overall running of command is non-blocking in a way. The code is working fine 9 out of 10 times but once in a while is giving the error.

Error is in case when it seems subprocess.Popen returned None. But I am not sure why would that happen randomly.

Can somebody please help in figuring out what is going wrong here ?

Machine details

[[email protected] /]# uname -a

Linux 1-0-0-9 3.10.0-229.el7.x86_64 #1 SMP Thu Jan 29 18:37:38 EST 2015 x86_64 x86_64 x86_64 GNU/Linux

Code:

#!/usr/bin/env python

import os

import subprocess

import signal

import time

flag = False

class Utils(object):

def __init__(self):

self.child_pid = None

signal.signal(signal.SIGCHLD, self.sigchld_handler)

def sigchld_handler(self, *args):

print "handling SIGCHLD"

p = self.child_pid

stdout_val = p.communicate()[0]

retcode = p.returncode

print p.returncode, stdout_val.strip()

self.child_pid = None

global flag

flag = False

def run_command(self, cmnd, env=None, cwd=None, timeout=0):

global flag

flag = True

cmnd = cmnd.split()

self.child_pid =subprocess.Popen(cmnd, stdin=None, bufsize=-1, env=env,

stdout=subprocess.PIPE, stderr=subprocess.STDOUT,

close_fds=True, cwd=cwd, preexec_fn=os.setsid)

print "Invoked child process " , self.child_pid.pid

print "Running command .."

Utils().run_command("ls -lrt")

for i in xrange(10000):

if not i % 1000:

print i

print flag

i = 0

while flag:

i = i + 1

Correct (Desired) Output:

Running command ..

Invoked child process 9703

1000

2000

3000

4000

5000

handling SIGCHLD

0 total 52

drwxr-xr-x. 2 root root 6 Mar 13 2014 srv

drwxr-xr-x. 2 root root 6 Mar 13 2014 mnt

drwxr-xr-x. 2 root root 6 Mar 13 2014 media

drwxr-xr-x. 2 root root 6 Mar 13 2014 home

lrwxrwxrwx. 1 root root 7 Jan 9 2016 bin -> usr/bin

lrwxrwxrwx. 1 root root 9 Jan 9 2016 lib64 -> usr/lib64

lrwxrwxrwx. 1 root root 7 Jan 9 2016 lib -> usr/lib

lrwxrwxrwx. 1 root root 8 Jan 9 2016 sbin -> usr/sbin

drwxr-xr-x. 13 root root 4096 Jan 9 2016 usr

drwxr-xr-x. 4 root root 28 Nov 18 16:03 opt

dr-xr-xr-x. 4 root root 4096 Nov 18 16:06 boot

dr-xr-xr-x 178 root root 0 Nov 22 21:53 proc

dr-xr-xr-x 13 root root 0 Nov 22 21:53 sys

drwxr-xr-x. 22 root root 4096 Nov 22 21:53 var

drwxr-xr-x 19 root root 3060 Nov 22 21:53 dev

drwxr-xr-x. 124 root root 8192 Nov 22 21:53 etc

dr-xr-x---. 8 root root 4096 Nov 22 21:53 root

-rw-r--r-- 1 root root 573 Nov 22 22:15 a.py

-rw-r--r-- 1 root root 1108 Nov 22 22:15 cmnd.py

-rw-r--r-- 1 root root 1800 Nov 22 22:15 fork.py

-rw-r--r-- 1 root root 1368 Nov 22 22:15 ipc_pipe.py

-rw-r--r-- 1 root root 491 Nov 22 22:15 threads.py

drwxr-xr-x 35 root root 1000 Nov 22 22:35 run

drwxrwxrwt. 8 root root 4096 Nov 22 22:35 tmp

6000

7000

8000

9000

False

Error (Failing case):

Running command ..

handling SIGCHLD

handling SIGCHLD

handling SIGCHLD

Traceback (most recent call last):

File "cmnd.py", line 37, in <module>

Utils().run_command("ls -lrt")

File "cmnd.py", line 33, in run_command

close_fds=True, cwd=cwd, preexec_fn=os.setsid)

File "/usr/lib64/python2.7/subprocess.py", line 711, in __init__

errread, errwrite)

File "/usr/lib64/python2.7/subprocess.py", line 1296, in _execute_child

data = _eintr_retry_call(os.read, errpipe_read, 1048576)

File "/usr/lib64/python2.7/subprocess.py", line 478, in _eintr_retry_call

return func(*args)

File "cmnd.py", line 19, in sigchld_handler

stdout_val = p.communicate()[0]

AttributeError: 'NoneType' object has no attribute 'communicate'

网友答案:

I was able to duplicate the NoneType error and it is clearly a race condition. For proof, I imported traceback and added print traceback.print_stack(args[1]) to the signal handler. The stack trace shows that Popen was still waiting on os.fdopen when the signal arrived and self.child_pid was not yet assigned.

Running command ..
handling SIGCHLD
  File "c.py", line 39, in <module>
    Utils().run_command("ls -lrt")
  File "c.py", line 35, in run_command
    close_fds=True, cwd=cwd, preexec_fn=os.setsid)
  File "/usr/lib/python2.7/subprocess.py", line 740, in __init__
    self.stdout = os.fdopen(c2pread, 'rb', bufsize)
None
Traceback (most recent call last):
  File "c.py", line 39, in <module>
    Utils().run_command("ls -lrt")
  File "c.py", line 35, in run_command
    close_fds=True, cwd=cwd, preexec_fn=os.setsid)
  File "/usr/lib/python2.7/subprocess.py", line 740, in __init__
    self.stdout = os.fdopen(c2pread, 'rb', bufsize)
  File "c.py", line 21, in sigchld_handler
    stdout_val = p.communicate()[0]
AttributeError: 'NoneType' object has no attribute 'communicate'

There is no good way to solve this problem with signals that I can think of. But there are other problems with your code, such as the potential for deadlock if the child process stdout or stderr pipes fill. Instead of signals, you could use a background thread to call Popen.communicate and use the poll and wait methods to see if the process is complete.

#!/usr/bin/env python

import os
import subprocess
import time
import threading

flag = False
class Utils(object):

    def __init__(self):
        self.child = None
        self._thread = None

    def run_command(self, cmnd, env=None, cwd=None, timeout=0):
        global flag
        flag = True
        cmnd = cmnd.split()
        self.child = subprocess.Popen(cmnd, stdin=None, bufsize=-1, env=env,
                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                             close_fds=True, cwd=cwd, preexec_fn=os.setsid)
        self._thread = threading.Thread(target=self._communicate_thread)
        self._thread.start()
        print "Invoked child process " , self.child.pid
        return self

    def _communicate_thread(self):
        self.out, self.err = self.child.communicate()

    def poll(self):
        return self.child.poll()

    def wait(self):
        rc = self.child.wait()
        if self._thread:
            self._thread.join()
            self._thread = None
        return rc

print "Running command .."
cmd = Utils().run_command("ls -lrt")
while True:
    print 'poll', cmd.poll()
    if cmd.poll() is not None:
        break
    else:
        time.sleep(.1)

print 'done', cmd.wait()
print cmd.out
print cmd.err
相关阅读:
Top