I want to run many processes in parallel with ability to take stdout in any time. How should I do it? Do I need to run thread for each
subprocess.Popen() call, a what?
You can do it in a single thread.
Suppose you have a script that prints lines at random times:
#!/usr/bin/env python #file: child.py import os import random import sys import time for i in range(10): print("%2d %s %s" % (int(sys.argv), os.getpid(), i)) sys.stdout.flush() time.sleep(random.random())
#!/usr/bin/env python from __future__ import print_function from select import select from subprocess import Popen, PIPE # start several subprocesses processes = [Popen(['./child.py', str(i)], stdout=PIPE, bufsize=1, close_fds=True, universal_newlines=True) for i in range(5)] # read output timeout = 0.1 # seconds while processes: # remove finished processes from the list (O(N**2)) for p in processes[:]: if p.poll() is not None: # process ended print(p.stdout.read(), end='') # read the rest p.stdout.close() processes.remove(p) # wait until there is something to read rlist = select([p.stdout for p in processes], ,, timeout) # read a line from each process that has output ready for f in rlist: print(f.readline(), end='') #NOTE: it can block
A more portable solution (that should work on Windows, Linux, OSX) can use reader threads for each process, see Non-blocking read on a subprocess.PIPE in python.
os.pipe()-based solution that works on Unix and Windows:
#!/usr/bin/env python from __future__ import print_function import io import os import sys from subprocess import Popen ON_POSIX = 'posix' in sys.builtin_module_names # create a pipe to get data input_fd, output_fd = os.pipe() # start several subprocesses processes = [Popen([sys.executable, 'child.py', str(i)], stdout=output_fd, close_fds=ON_POSIX) # close input_fd in children for i in range(5)] os.close(output_fd) # close unused end of the pipe # read output line by line as soon as it is available with io.open(input_fd, 'r', buffering=1) as file: for line in file: print(line, end='') # for p in processes: p.wait()
You can also collect stdout from multiple subprocesses concurrently using
#!/usr/bin/env python import sys from twisted.internet import protocol, reactor class ProcessProtocol(protocol.ProcessProtocol): def outReceived(self, data): print data, # received chunk of stdout from child def processEnded(self, status): global nprocesses nprocesses -= 1 if nprocesses == 0: # all processes ended reactor.stop() # start subprocesses nprocesses = 5 for _ in xrange(nprocesses): reactor.spawnProcess(ProcessProtocol(), sys.executable, args=[sys.executable, 'child.py'], usePTY=True) # can change how child buffers stdout reactor.run()
You don’t need to run a thread for each process. You can peek at the
stdout streams for each process without blocking on them, and only read from them if they have data available to read.
You do have to be careful not to accidentally block on them, though, if you’re not intending to.
You can wait for
process.poll() to finish, and run other stuff concurrently:
import time import sys from subprocess import Popen, PIPE def ex1() -> None: command = 'sleep 2.1 && echo "happy friday"' proc = Popen(command, shell=True, stderr=PIPE, stdout=PIPE) while proc.poll() is None: # do stuff here print('waiting') time.sleep(0.05) out, _err = proc.communicate() print(out, file=sys.stderr) sys.stderr.flush() assert proc.poll() == 0 ex1()