diff --git a/README.Rmd b/README.Rmd index dcd3c7c..7ef9a5e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -29,7 +29,23 @@ import ezpq # `ezpq`: an easy parallel queueing system. -Read this on [GitHub](https://github.com/dm3ll3n/ezpq) or [my site](https://www.donaldmellenbruch.com/project/ezpq/). +> Read this on [GitHub](https://github.com/dm3ll3n/ezpq) or [my site](https://www.donaldmellenbruch.com/project/ezpq/). + +## How to get it + +Install from [PyPI](https://pypi.org/project/ezpq/) with: + +```python +pip install ezpq +``` + +Optional packages: + +```python +pip install pandas # required for plots +pip install plotnine # required for plots +pip install tqdm # required for progress bars +``` ## Overview @@ -59,22 +75,6 @@ The queueing system uses `multiprocessing.Process` by default and can also run j * Built-in logging to CSV. * Customizable visualizations of queue operations. -## How to get it - -Install from [PyPI](https://pypi.org/project/ezpq/) with: - -```python -pip install ezpq -``` - -Optional packages: - -```python -pip install pandas # required for plots -pip install plotnine # required for plots -pip install tqdm # required for progress bars -``` - ## Quickstart Suppose you wanted to speed up the following code, which runs 60 operations that take anywhere from 0s to 2s. With an average job time of ~1s, this operation should take ~60s. @@ -311,6 +311,44 @@ with ezpq.Queue(6) as Q: ![](docs/imgs/tqdm_map.gif) +### starmap + +`starmap` is similar to `map`, but operates on a list of lists, with each nested list being unpacked as arguments to the function. + +```{python, echo=TRUE} +def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) + +# list of lists to iterate over. +args_list = [[x, x%4] # (x, k) + for x in range(100)] + +# starmap +with ezpq.Queue(10) as Q: + output = Q.starmap(my_pow, iterable=args_list) + +[x['output'] for x in output[:10]] +``` + +### startmapkw + +Same as `starmap`, but operations on a list of *dicts* to be expanded as kwargs to the function. + +```{python, echo=TRUE} +def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) + +# list of dicts to iterate over. +kwargs_list = [{ 'x':x, 'k':x%4 } # (x, k) + for x in range(100)] + +# starmapkw +with ezpq.Queue(10) as Q: + output = Q.starmapkw(my_pow, iterable=kwargs_list) + +[x['output'] for x in output[:10]] +``` + ### dispose The queueing operations performed by `ezpq.Queue` are performed on a periodic basis. By default, the `poll` parameter for a Queue is `0.1` seconds. This "pulse" thread will continue firing until the Queue is disposed of. @@ -336,7 +374,7 @@ In the above graphic, notice how same-colored bars never overlap. These bars rep ### Lane Error Handling -You may want to short-circuit a synchronous lane if a job in the lane fails. You can do this by specifying `skip_on_lane_error=True` when putting a job in the queue. If specified and the preceding job has a non-zero exit code, this job will not be run. +You may want to short-circuit a synchronous lane if a job in the lane fails. You can do this by specifying `stop_on_lane_error=True` when putting a job in the queue. If specified and the preceding job has a non-zero exit code, this job will not be run. ```{python, echo=TRUE} def reciprocal(x): diff --git a/README.md b/README.md index 620c409..52fe341 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,42 @@ # `ezpq`: an easy parallel queueing system. -* [`ezpq`: an easy parallel queueing system.](#ezpq-an-easy-parallel-queueing-system) - * [Overview](#overview) - * [Features](#features) - * [How to get it](#how-to-get-it) - * [Quickstart](#quickstart) - * [ezpq.Queue](#ezpqqueue) - * [ezpq.Job](#ezpqjob) - * [put](#put) - * [size](#size) - * [wait](#wait) - * [get](#get) - * [collect](#collect) - * [map](#map) - * [dispose](#dispose) - * [Synchronous Lanes](#synchronous-lanes) - * [Lane Error Handling](#lane-error-handling) - * [ezpq.Plot](#ezpqplot) - * [More Examples](#more-examples) - -Read this on [GitHub](https://github.com/dm3ll3n/ezpq) or [my -site](https://www.donaldmellenbruch.com/project/ezpq/). +> Read this on [GitHub](https://github.com/dm3ll3n/ezpq) or [my site](https://www.donaldmellenbruch.com/project/ezpq/). + +- [How to get it](#how-to-get-it) +- [Overview](#overview) +- [Features](#features) +- [Quickstart](#quickstart) +- [ezpq.Queue](#ezpq.queue) +- [ezpq.Job](#ezpq.job) + - [put](#put) + - [size](#size) + - [wait](#wait) + - [get](#get) + - [collect](#collect) + - [map](#map) + - [starmap](#starmap) + - [startmapkw](#startmapkw) + - [dispose](#dispose) +- [Synchronous Lanes](#synchronous-lanes) + - [Lane Error Handling](#lane-error-handling) +- [ezpq.Plot](#ezpq.plot) +- [More Examples](#more-examples) + +## How to get it + +Install from [PyPI](https://pypi.org/project/ezpq/) with: + +``` python +pip install ezpq +``` + +Optional packages: + +``` python +pip install pandas # required for plots +pip install plotnine # required for plots +pip install tqdm # required for progress bars +``` ## Overview @@ -53,22 +69,6 @@ also run jobs with `threading.Thread`. - Built-in logging to CSV. - Customizable visualizations of queue operations. -## How to get it - -Install from [PyPI](https://pypi.org/project/ezpq/) with: - -``` python -pip install ezpq -``` - -Optional packages: - -``` python -pip install pandas # required for plots -pip install plotnine # required for plots -pip install tqdm # required for progress bars -``` - ## Quickstart Suppose you wanted to speed up the following code, which runs 60 @@ -143,7 +143,7 @@ print( output[0] ) ## {'args': [0], ## 'callback': None, ## 'cancelled': False, - ## 'ended': datetime.datetime(2019, 2, 18, 20, 21, 0, 902915), + ## 'ended': datetime.datetime(2019, 3, 13, 0, 48, 52, 811248), ## 'exception': None, ## 'exitcode': 0, ## 'function': 'random_sleep', @@ -153,11 +153,11 @@ print( output[0] ) ## 'name': 1, ## 'output': 1.3444218515250481, ## 'priority': 100, - ## 'processed': datetime.datetime(2019, 2, 18, 20, 21, 0, 955396), - ## 'qid': 'f4717edb', - ## 'runtime': 1.3515939712524414, - ## 'started': datetime.datetime(2019, 2, 18, 20, 20, 59, 551321), - ## 'submitted': datetime.datetime(2019, 2, 18, 20, 20, 59, 446199), + ## 'processed': datetime.datetime(2019, 3, 13, 0, 48, 52, 867387), + ## 'qid': '13318d36', + ## 'runtime': 1.3500409126281738, + ## 'started': datetime.datetime(2019, 3, 13, 0, 48, 51, 461207), + ## 'submitted': datetime.datetime(2019, 3, 13, 0, 48, 51, 357405), ## 'timeout': 0} Easily convert output to a `pandas` dataframe: @@ -169,11 +169,11 @@ print( df.head()[['id', 'output', 'runtime', 'exitcode']] ) ``` ## id output runtime exitcode - ## 0 1 1.344422 1.351594 0 - ## 1 2 0.634364 0.640723 0 - ## 2 3 1.456034 1.461620 0 - ## 3 4 0.737965 0.743645 0 - ## 4 5 0.736048 0.742260 0 + ## 0 1 1.344422 1.350041 0 + ## 1 2 0.634364 0.638938 0 + ## 2 3 1.456034 1.459830 0 + ## 3 4 0.737965 0.741742 0 + ## 4 5 0.736048 0.739848 0 Use `ezpq.Plot` to generate a Gannt chart of the job timings. @@ -335,9 +335,9 @@ with ezpq.Queue(6) as Q: ## 'Total: 60; Waiting: 31; Working: 6; Completed: 23' ## 'Total: 60; Waiting: 24; Working: 6; Completed: 30' ## 'Total: 60; Waiting: 17; Working: 6; Completed: 37' - ## 'Total: 60; Waiting: 12; Working: 6; Completed: 42' + ## 'Total: 60; Waiting: 11; Working: 6; Completed: 43' ## 'Total: 60; Waiting: 6; Working: 6; Completed: 48' - ## 'Total: 60; Waiting: 1; Working: 6; Completed: 53' + ## 'Total: 60; Waiting: 0; Working: 5; Completed: 55' ## 'Total: 60; Waiting: 0; Working: 1; Completed: 59' ## 'Total: 60; Waiting: 0; Working: 0; Completed: 60' @@ -407,6 +407,42 @@ call. Include `show_progress=True` to get output `tqdm` progress bar. ![](docs/imgs/tqdm_map.gif) +### starmap + +`starmap` is similar to `map`, but operates on a list of lists, with +each nested list being unpacked as arguments to the function. + +``` python +def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) +# list of lists to iterate over. +args_list = [[x, x%4] # (x, k) + for x in range(100)] +# starmap +with ezpq.Queue(10) as Q: + output = Q.starmap(my_pow, iterable=args_list) + +[x['output'] for x in output[:10]] +``` + +### startmapkw + +Same as `starmap`, but operations on a list of *dicts* to be expanded as +kwargs to the function. + +``` python +def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) +# list of dicts to iterate over. +kwargs_list = [{ 'x':x, 'k':x%4 } # (x, k) + for x in range(100)] +# starmapkw +with ezpq.Queue(10) as Q: + output = Q.starmapkw(my_pow, iterable=kwargs_list) + +[x['output'] for x in output[:10]] +``` + ### dispose The queueing operations performed by `ezpq.Queue` are performed on a @@ -435,7 +471,7 @@ synchronously. ### Lane Error Handling You may want to short-circuit a synchronous lane if a job in the lane -fails. You can do this by specifying `skip_on_lane_error=True` when +fails. You can do this by specifying `stop_on_lane_error=True` when putting a job in the queue. If specified and the preceding job has a non-zero exit code, this job will not be run. diff --git a/README.rst b/README.rst index b80439e..ed8fed5 100644 --- a/README.rst +++ b/README.rst @@ -1,14 +1,15 @@ ``ezpq``: an easy parallel queueing system. =========================================== -- ```ezpq``: an easy parallel queueing - system. <#ezpq-an-easy-parallel-queueing-system>`__ + Read this on `GitHub `__ or `my + site `__. + +- `How to get it <#how-to-get-it>`__ - `Overview <#overview>`__ - `Features <#features>`__ -- `How to get it <#how-to-get-it>`__ - `Quickstart <#quickstart>`__ -- `ezpq.Queue <#ezpqqueue>`__ -- `ezpq.Job <#ezpqjob>`__ +- `ezpq.Queue <#ezpq.queue>`__ +- `ezpq.Job <#ezpq.job>`__ - `put <#put>`__ - `size <#size>`__ @@ -16,17 +17,33 @@ - `get <#get>`__ - `collect <#collect>`__ - `map <#map>`__ + - `starmap <#starmap>`__ + - `startmapkw <#startmapkw>`__ - `dispose <#dispose>`__ - `Synchronous Lanes <#synchronous-lanes>`__ - `Lane Error Handling <#lane-error-handling>`__ -- `ezpq.Plot <#ezpqplot>`__ +- `ezpq.Plot <#ezpq.plot>`__ - `More Examples <#more-examples>`__ -Read this on `GitHub `__ or `my -site `__. +How to get it +------------- + +Install from `PyPI `__ with: + +.. code:: python + + pip install ezpq + +Optional packages: + +.. code:: python + + pip install pandas # required for plots + pip install plotnine # required for plots + pip install tqdm # required for progress bars Overview -------- @@ -40,8 +57,7 @@ Overview The queueing system uses ``multiprocessing.Process`` by default and can also run jobs with ``threading.Thread``. -.. figure:: docs/imgs/ezpq.png - :alt: +|image0| Features -------- @@ -62,23 +78,6 @@ Features - Built-in logging to CSV. - Customizable visualizations of queue operations. -How to get it -------------- - -Install from `PyPI `__ with: - -.. code:: python - - pip install ezpq - -Optional packages: - -.. code:: python - - pip install pandas # required for plots - pip install plotnine # required for plots - pip install tqdm # required for progress bars - Quickstart ---------- @@ -88,24 +87,24 @@ operations that take anywhere from 0s to 2s. With an average job time of .. code:: python - import time - import random - def random_sleep(x): - random.seed(x) - n = random.uniform(0.5, 1.5) - time.sleep(n) - return n + import time + import random + def random_sleep(x): + random.seed(x) + n = random.uniform(0.5, 1.5) + time.sleep(n) + return n .. code:: python - start = time.time() - output = [random_sleep(x) for x in range(60)] - end = time.time() - print('> Runtime: ' + str(end - start)) + start = time.time() + output = [random_sleep(x) for x in range(60)] + end = time.time() + print('> Runtime: ' + str(end - start)) :: - ## '> Runtime: 58.932034969329834' + ## '> Runtime: 58.932034969329834' Here is the function ran in parallel with an ``ezpq`` Queue of 6 workers. Thus, the runtime of the above operation will be reduced from @@ -113,26 +112,26 @@ workers. Thus, the runtime of the above operation will be reduced from .. code:: python - import time - import random - import ezpq - start = time.time() - with ezpq.Queue(6) as Q: - output = Q.map(random_sleep, range(60)) - end = time.time() - print('> Runtime: ' + str(end - start)) + import time + import random + import ezpq + start = time.time() + with ezpq.Queue(6) as Q: + output = Q.map(random_sleep, range(60)) + end = time.time() + print('> Runtime: ' + str(end - start)) Here is the same scenario, using the ``@ezpq.Queue`` decorator. .. code:: python - @ezpq.Queue(6) - def random_sleep(x): - random.seed(x) - n = random.uniform(0.5, 1.5) - time.sleep(n) - return n - output = random_sleep(iterable=range(60)) + @ezpq.Queue(6) + def random_sleep(x): + random.seed(x) + n = random.uniform(0.5, 1.5) + time.sleep(n) + return n + output = random_sleep(iterable=range(60)) While ``map()`` and the decorator are useful for quick-n-simple parallization, the essential functions of an ``ezpq`` Queue include @@ -140,67 +139,66 @@ parallization, the essential functions of an ``ezpq`` Queue include .. code:: python - with ezpq.Queue(6) as Q: - for x in range(60): - Q.put(random_sleep, args=x) - Q.wait() - output = Q.collect() + with ezpq.Queue(6) as Q: + for x in range(60): + Q.put(random_sleep, args=x) + Q.wait() + output = Q.collect() The output is a list of dicts containing verbose information about each job, along with its output, and exit code. .. code:: python - print( output[0] ) + print( output[0] ) :: - ## {'args': [0], - ## 'callback': None, - ## 'cancelled': False, - ## 'ended': datetime.datetime(2019, 2, 18, 20, 21, 0, 902915), - ## 'exception': None, - ## 'exitcode': 0, - ## 'function': 'random_sleep', - ## 'id': 1, - ## 'kwargs': None, - ## 'lane': None, - ## 'name': 1, - ## 'output': 1.3444218515250481, - ## 'priority': 100, - ## 'processed': datetime.datetime(2019, 2, 18, 20, 21, 0, 955396), - ## 'qid': 'f4717edb', - ## 'runtime': 1.3515939712524414, - ## 'started': datetime.datetime(2019, 2, 18, 20, 20, 59, 551321), - ## 'submitted': datetime.datetime(2019, 2, 18, 20, 20, 59, 446199), - ## 'timeout': 0} + ## {'args': [0], + ## 'callback': None, + ## 'cancelled': False, + ## 'ended': datetime.datetime(2019, 3, 13, 0, 48, 52, 811248), + ## 'exception': None, + ## 'exitcode': 0, + ## 'function': 'random_sleep', + ## 'id': 1, + ## 'kwargs': None, + ## 'lane': None, + ## 'name': 1, + ## 'output': 1.3444218515250481, + ## 'priority': 100, + ## 'processed': datetime.datetime(2019, 3, 13, 0, 48, 52, 867387), + ## 'qid': '13318d36', + ## 'runtime': 1.3500409126281738, + ## 'started': datetime.datetime(2019, 3, 13, 0, 48, 51, 461207), + ## 'submitted': datetime.datetime(2019, 3, 13, 0, 48, 51, 357405), + ## 'timeout': 0} Easily convert output to a ``pandas`` dataframe: .. code:: python - import pandas as pd - df = pd.DataFrame(output) - print( df.head()[['id', 'output', 'runtime', 'exitcode']] ) + import pandas as pd + df = pd.DataFrame(output) + print( df.head()[['id', 'output', 'runtime', 'exitcode']] ) :: - ## id output runtime exitcode - ## 0 1 1.344422 1.351594 0 - ## 1 2 0.634364 0.640723 0 - ## 2 3 1.456034 1.461620 0 - ## 3 4 0.737965 0.743645 0 - ## 4 5 0.736048 0.742260 0 + ## id output runtime exitcode + ## 0 1 1.344422 1.350041 0 + ## 1 2 0.634364 0.638938 0 + ## 2 3 1.456034 1.459830 0 + ## 3 4 0.737965 0.741742 0 + ## 4 5 0.736048 0.739848 0 Use ``ezpq.Plot`` to generate a Gannt chart of the job timings. .. code:: python - plt = ezpq.Plot(output).build(show_legend=False) - plt.save('docs/imgs/quickstart.png') + plt = ezpq.Plot(output).build(show_legend=False) + plt.save('docs/imgs/quickstart.png') -.. figure:: docs/imgs/quickstart.png - :alt: +|image1| ezpq.Queue ---------- @@ -217,44 +215,44 @@ The ``Queue`` class implements the queueing system, which is itself a :: - ## Help on function __init__ in module ezpq.Queue: - ## - ## __init__(self, n_workers=8, max_size=0, job_runner=, auto_remove=False, auto_start=True, auto_stop=False, callback=None, log_file=None, poll=0.1, show_progress=False, qid=None) - ## Implements a parallel queueing system. - ## - ## Args: - ## n_workers: the max number of concurrent jobs. - ## - Accepts: int - ## - Default: cpu_count() - ## max_size: when > 0, will throw an exception the number of enqueued jobs exceeds this value. Otherwise, no limit. - ## - Accepts: int - ## - Default: 0 (unlimited) - ## job_runner: the class to use to invoke new jobs. - ## - Accepts: multiprocessing.Process, threading.Thread - ## - Default: multiprocessing.Process - ## auto_remove: controls whether jobs are discarded of after completion. - ## - Accepts: bool - ## - Default: False - ## auto_start: controls whether the queue system "pulse" is started upon instantiation (default), or manually. - ## - Accepts: bool - ## - Default: True - ## auto_stop: controls whether the queue system "pulse" stops itself after all jobs are complete. - ## - Accepts: bool - ## - Default: False - ## callback: optional function to execute synchronously immediately after a job completes. - ## - Accepts: function object - ## - Default: None - ## log_file: if file path is specified, job data is written to this path in CSV format. - ## - Accepts: str - ## - Default: None - ## poll: controls the pulse frequency; the amount of time slept between operations. - ## - Accepts: float - ## - Default: 0.1 - ## - ## Returns: - ## ezpq.Queue object. - ## - ## None + ## Help on function __init__ in module ezpq.Queue: + ## + ## __init__(self, n_workers=8, max_size=0, job_runner=, auto_remove=False, auto_start=True, auto_stop=False, callback=None, log_file=None, poll=0.1, show_progress=False, qid=None) + ## Implements a parallel queueing system. + ## + ## Args: + ## n_workers: the max number of concurrent jobs. + ## - Accepts: int + ## - Default: cpu_count() + ## max_size: when > 0, will throw an exception the number of enqueued jobs exceeds this value. Otherwise, no limit. + ## - Accepts: int + ## - Default: 0 (unlimited) + ## job_runner: the class to use to invoke new jobs. + ## - Accepts: multiprocessing.Process, threading.Thread + ## - Default: multiprocessing.Process + ## auto_remove: controls whether jobs are discarded of after completion. + ## - Accepts: bool + ## - Default: False + ## auto_start: controls whether the queue system "pulse" is started upon instantiation (default), or manually. + ## - Accepts: bool + ## - Default: True + ## auto_stop: controls whether the queue system "pulse" stops itself after all jobs are complete. + ## - Accepts: bool + ## - Default: False + ## callback: optional function to execute synchronously immediately after a job completes. + ## - Accepts: function object + ## - Default: None + ## log_file: if file path is specified, job data is written to this path in CSV format. + ## - Accepts: str + ## - Default: None + ## poll: controls the pulse frequency; the amount of time slept between operations. + ## - Accepts: float + ## - Default: 0.1 + ## + ## Returns: + ## ezpq.Queue object. + ## + ## None ezpq.Job -------- @@ -264,50 +262,49 @@ queue with a call to ``submit()``. :: - ## Help on function __init__ in module ezpq.Job: - ## - ## __init__(self, function, args=None, kwargs=None, name=None, priority=100, lane=None, timeout=0, suppress_errors=False, stop_on_lane_error=False) - ## Defines what to run within a `ezpq.Queue`, and how to run it. - ## - ## Args: - ## function: the function to run. - ## - Accepts: function object - ## args: optional positional arguments to pass to the function. - ## - Accepts: list, tuple - ## - Default: None - ## kwargs: optional keyword arguments to pass to the function. - ## - Accepts: dict - ## - Default: None - ## name: optional name to give to the job. Does not have to be unique. - ## - Accepts: str - ## - Default: None; assumes same name as job id. - ## priority: priority value to assign. Lower values get processed sooner. - ## - Accepts: int - ## - Default: 100 - ## lane: a sequential lane to place the job in. if it does not already exist, it will be created. - ## - Accepts: int, str; any hashable object - ## - Default: None; no lanes. - ## timeout: When > 0, if this value (in seconds) is exceeded, the job is terminated. Otherwise, no limit enforced. - ## - Accepts: float - ## - Default: 0 (unlimited) - ## - ## Returns: - ## ezpq.Job object - ## - ## None + ## Help on function __init__ in module ezpq.Job: + ## + ## __init__(self, function, args=None, kwargs=None, name=None, priority=100, lane=None, timeout=0, suppress_errors=False, stop_on_lane_error=False) + ## Defines what to run within a `ezpq.Queue`, and how to run it. + ## + ## Args: + ## function: the function to run. + ## - Accepts: function object + ## args: optional positional arguments to pass to the function. + ## - Accepts: list, tuple + ## - Default: None + ## kwargs: optional keyword arguments to pass to the function. + ## - Accepts: dict + ## - Default: None + ## name: optional name to give to the job. Does not have to be unique. + ## - Accepts: str + ## - Default: None; assumes same name as job id. + ## priority: priority value to assign. Lower values get processed sooner. + ## - Accepts: int + ## - Default: 100 + ## lane: a sequential lane to place the job in. if it does not already exist, it will be created. + ## - Accepts: int, str; any hashable object + ## - Default: None; no lanes. + ## timeout: When > 0, if this value (in seconds) is exceeded, the job is terminated. Otherwise, no limit enforced. + ## - Accepts: float + ## - Default: 0 (unlimited) + ## + ## Returns: + ## ezpq.Job object + ## + ## None .. code:: python - with ezpq.Queue(6) as Q: - for x in range(60): - priority = x % 2 # give even numbers higher priority. - job = ezpq.Job(random_sleep, args=x, priority=priority) - Q.submit(job) - Q.wait() - output = Q.collect() + with ezpq.Queue(6) as Q: + for x in range(60): + priority = x % 2 # give even numbers higher priority. + job = ezpq.Job(random_sleep, args=x, priority=priority) + Q.submit(job) + Q.wait() + output = Q.collect() -.. figure:: docs/imgs/submit.png - :alt: +|image2| put ~~~ @@ -317,11 +314,11 @@ All of its arguments are passed to ``ezpq.Job()``. .. code:: python - with ezpq.Queue(6) as Q: - for x in range(60): - Q.put(random_sleep, args=x) - Q.wait() - output = Q.collect() + with ezpq.Queue(6) as Q: + for x in range(60): + Q.put(random_sleep, args=x) + Q.wait() + output = Q.collect() size ~~~~ @@ -334,42 +331,42 @@ corresponding queue(s) will be counted. For example: .. code:: python - def print_sizes(Q): - msg = 'Total: {0}; Waiting: {1}; Working: {2}; Completed: {3}'.format( - Q.size(), - Q.size(waiting=True), - Q.size(working=True), - Q.size(completed=True) - ) - print(msg) + def print_sizes(Q): + msg = 'Total: {0}; Waiting: {1}; Working: {2}; Completed: {3}'.format( + Q.size(), + Q.size(waiting=True), + Q.size(working=True), + Q.size(completed=True) + ) + print(msg) .. code:: python - with ezpq.Queue(6) as Q: - # enqueue jobs - for x in range(60): - Q.put(random_sleep, x) - # repeatedly print sizes until complete. - while Q.size(waiting=True, working=True): - print_sizes(Q) - time.sleep(1) - print_sizes(Q) + with ezpq.Queue(6) as Q: + # enqueue jobs + for x in range(60): + Q.put(random_sleep, x) + # repeatedly print sizes until complete. + while Q.size(waiting=True, working=True): + print_sizes(Q) + time.sleep(1) + print_sizes(Q) :: - ## 'Total: 60; Waiting: 60; Working: 0; Completed: 0' - ## 'Total: 60; Waiting: 51; Working: 6; Completed: 3' - ## 'Total: 60; Waiting: 46; Working: 6; Completed: 8' - ## 'Total: 60; Waiting: 39; Working: 6; Completed: 15' - ## 'Total: 60; Waiting: 34; Working: 6; Completed: 20' - ## 'Total: 60; Waiting: 31; Working: 6; Completed: 23' - ## 'Total: 60; Waiting: 24; Working: 6; Completed: 30' - ## 'Total: 60; Waiting: 17; Working: 6; Completed: 37' - ## 'Total: 60; Waiting: 12; Working: 6; Completed: 42' - ## 'Total: 60; Waiting: 6; Working: 6; Completed: 48' - ## 'Total: 60; Waiting: 1; Working: 6; Completed: 53' - ## 'Total: 60; Waiting: 0; Working: 1; Completed: 59' - ## 'Total: 60; Waiting: 0; Working: 0; Completed: 60' + ## 'Total: 60; Waiting: 60; Working: 0; Completed: 0' + ## 'Total: 60; Waiting: 51; Working: 6; Completed: 3' + ## 'Total: 60; Waiting: 46; Working: 6; Completed: 8' + ## 'Total: 60; Waiting: 39; Working: 6; Completed: 15' + ## 'Total: 60; Waiting: 34; Working: 6; Completed: 20' + ## 'Total: 60; Waiting: 31; Working: 6; Completed: 23' + ## 'Total: 60; Waiting: 24; Working: 6; Completed: 30' + ## 'Total: 60; Waiting: 17; Working: 6; Completed: 37' + ## 'Total: 60; Waiting: 11; Working: 6; Completed: 43' + ## 'Total: 60; Waiting: 6; Working: 6; Completed: 48' + ## 'Total: 60; Waiting: 0; Working: 5; Completed: 55' + ## 'Total: 60; Waiting: 0; Working: 1; Completed: 59' + ## 'Total: 60; Waiting: 0; Working: 0; Completed: 60' wait ~~~~ @@ -384,8 +381,7 @@ completed jobs. New in v0.2.0, include ``show_progress=True`` to show a progress bar while waiting. This is equivalent to a call to ``waitpb()``. -.. figure:: docs/imgs/tqdm.gif - :alt: +|image3| get ~~~ @@ -398,16 +394,16 @@ the timeout is exceeded, ``None`` is returned. .. code:: python - with ezpq.Queue(6) as Q: - n_inputs = 60 - output = [None] * n_inputs - # enqueue jobs - for x in range(n_inputs): - Q.put(random_sleep, args=x) - - # repeatedly `get()` until queue is empty. - for i in range(n_inputs): - output[i] = Q.get(wait=True) + with ezpq.Queue(6) as Q: + n_inputs = 60 + output = [None] * n_inputs + # enqueue jobs + for x in range(n_inputs): + Q.put(random_sleep, args=x) + + # repeatedly `get()` until queue is empty. + for i in range(n_inputs): + output[i] = Q.get(wait=True) collect ~~~~~~~ @@ -419,22 +415,22 @@ before ``collect()`` if desired. .. code:: python - with ezpq.Queue(6) as Q: - # enqueue jobs - for x in range(60): - Q.put(random_sleep, x) - # wait and collect all jobs - print('Queue size before: {0}'.format(Q.size())) - Q.wait() - output = Q.collect() - print('Queue size after: {0}'.format(Q.size())) - print('Output size: {0}'.format(len(output))) + with ezpq.Queue(6) as Q: + # enqueue jobs + for x in range(60): + Q.put(random_sleep, x) + # wait and collect all jobs + print('Queue size before: {0}'.format(Q.size())) + Q.wait() + output = Q.collect() + print('Queue size after: {0}'.format(Q.size())) + print('Output size: {0}'.format(len(output))) :: - ## 'Queue size before: 60' - ## 'Queue size after: 0' - ## 'Output size: 60' + ## 'Queue size before: 60' + ## 'Queue size after: 0' + ## 'Output size: 60' map ~~~ @@ -443,8 +439,45 @@ map one call. Include ``show_progress=True`` to get output ``tqdm`` progress bar. -.. figure:: docs/imgs/tqdm_map.gif - :alt: +|image4| + +starmap +~~~~~~~ + +``starmap`` is similar to ``map``, but operates on a list of lists, with +each nested list being unpacked as arguments to the function. + +.. code:: python + + def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) + # list of lists to iterate over. + args_list = [[x, x%4] # (x, k) + for x in range(100)] + # starmap + with ezpq.Queue(10) as Q: + output = Q.starmap(my_pow, iterable=args_list) + + [x['output'] for x in output[:10]] + +startmapkw +~~~~~~~~~~ + +Same as ``starmap``, but operations on a list of *dicts* to be expanded +as kwargs to the function. + +.. code:: python + + def my_pow(x, k): + return '{}^{} = {}'.format(x, k, x**k) + # list of dicts to iterate over. + kwargs_list = [{ 'x':x, 'k':x%4 } # (x, k) + for x in range(100)] + # starmapkw + with ezpq.Queue(10) as Q: + output = Q.starmapkw(my_pow, iterable=kwargs_list) + + [x['output'] for x in output[:10]] dispose ~~~~~~~ @@ -467,8 +500,7 @@ to execute them in sequence. All that is required is an arbitrary lane name/id passed to the ``lane`` parameter of ``put``. Empty lanes are automatically removed. -.. figure:: docs/imgs/lanes.gif - :alt: +|image5| In the above graphic, notice how same-colored bars never overlap. These bars represent jobs that are in the same lane, which executed @@ -478,29 +510,28 @@ Lane Error Handling ~~~~~~~~~~~~~~~~~~~ You may want to short-circuit a synchronous lane if a job in the lane -fails. You can do this by specifying ``skip_on_lane_error=True`` when +fails. You can do this by specifying ``stop_on_lane_error=True`` when putting a job in the queue. If specified and the preceding job has a non-zero exit code, this job will not be run. .. code:: python - def reciprocal(x): - time.sleep(0.1) # slow things down - return 1/x # will throw DivideByZero exception + def reciprocal(x): + time.sleep(0.1) # slow things down + return 1/x # will throw DivideByZero exception .. code:: python - import random - with ezpq.Queue(6) as Q: - for i in range(100): - Q.put(reciprocal, random.randint(0, 10), lane=i%5, suppress_errors=True, stop_on_lane_error=True) - Q.wait() - output = Q.collect() - plt = ezpq.Plot(output).build(facet_by='lane', color_by='exitcode', color_pal=['red', 'blue']) - plt.save('docs/imgs/lane_error.png') + import random + with ezpq.Queue(6) as Q: + for i in range(100): + Q.put(reciprocal, random.randint(0, 10), lane=i%5, suppress_errors=True, stop_on_lane_error=True) + Q.wait() + output = Q.collect() + plt = ezpq.Plot(output).build(facet_by='lane', color_by='exitcode', color_pal=['red', 'blue']) + plt.save('docs/imgs/lane_error.png') -.. figure:: docs/imgs/lane_error.png - :alt: +|image6| ezpq.Plot --------- @@ -515,54 +546,53 @@ coloring, to faceting, :: - ## Help on function build in module ezpq.Plot: - ## - ## build(self, color_by='qid', facet_by='qid', facet_scale='fixed', show_legend=True, bar_width=1, title=None, color_pal=None, theme='bw') - ## Produces a plot based on the data and options provided to a `ezpq.Plot()` object. - ## - ## Args: - ## color_by: controls the column to use for coloring the bars. - ## - Accepts: one of 'qid', 'priority', 'lane', 'cancelled', 'exitcode', 'name', 'output' - ## - Default: 'qid' - ## facet_by: controls the column to use for facetting the plot. - ## - Accepts: one of 'qid', 'priority', 'lane', 'cancelled', 'exitcode', 'name', 'output' - ## - Default: 'qid' - ## facet_scale: controls the scale of the x/y axis across facets. - ## - Accepts: one of 'fixed', 'free', 'free_x', 'free_y' - ## - Default: 'fixed' - ## show_legend: controls whether the legend is drawn. - ## - Accepts: bool - ## - Default: True - ## bar_width: controls the bar width - ## - Accepts: float - ## - Default: 1 - ## title: optional title to be drawn above the plot. - ## - Accepts: str, None - ## - Default: None - ## theme: - ## - Accepts: 'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark', 'matplotlib', 'minimal', 'xkcd', 'light' - ## - Default: 'bw' - ## Returns: - ## The plot produced from plotnine.ggplot(). - ## - ## None + ## Help on function build in module ezpq.Plot: + ## + ## build(self, color_by='qid', facet_by='qid', facet_scale='fixed', show_legend=True, bar_width=1, title=None, color_pal=None, theme='bw') + ## Produces a plot based on the data and options provided to a `ezpq.Plot()` object. + ## + ## Args: + ## color_by: controls the column to use for coloring the bars. + ## - Accepts: one of 'qid', 'priority', 'lane', 'cancelled', 'exitcode', 'name', 'output' + ## - Default: 'qid' + ## facet_by: controls the column to use for facetting the plot. + ## - Accepts: one of 'qid', 'priority', 'lane', 'cancelled', 'exitcode', 'name', 'output' + ## - Default: 'qid' + ## facet_scale: controls the scale of the x/y axis across facets. + ## - Accepts: one of 'fixed', 'free', 'free_x', 'free_y' + ## - Default: 'fixed' + ## show_legend: controls whether the legend is drawn. + ## - Accepts: bool + ## - Default: True + ## bar_width: controls the bar width + ## - Accepts: float + ## - Default: 1 + ## title: optional title to be drawn above the plot. + ## - Accepts: str, None + ## - Default: None + ## theme: + ## - Accepts: 'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark', 'matplotlib', 'minimal', 'xkcd', 'light' + ## - Default: 'bw' + ## Returns: + ## The plot produced from plotnine.ggplot(). + ## + ## None .. code:: python - with ezpq.Queue(6) as Q: - for x in range(60): - lane = x % 5 - Q.put(random_sleep, x, timeout=1, lane=lane) - Q.wait() - output = Q.collect() + with ezpq.Queue(6) as Q: + for x in range(60): + lane = x % 5 + Q.put(random_sleep, x, timeout=1, lane=lane) + Q.wait() + output = Q.collect() .. code:: python - plt = ezpq.Plot(output).build(facet_by='lane', show_legend=False) - plt.save('docs/imgs/lanes2.png') + plt = ezpq.Plot(output).build(facet_by='lane', show_legend=False) + plt.save('docs/imgs/lanes2.png') -.. figure:: docs/imgs/lanes2.png - :alt: +|image7| Each horizontal bar represents an independent job id. The start of the gray bar indicates when the job entered the queuing system. The start of @@ -577,3 +607,13 @@ More Examples Many more examples can be found in `docs/examples.ipynb `__. + +.. |image0| image:: docs/imgs/ezpq.png +.. |image1| image:: docs/imgs/quickstart.png +.. |image2| image:: docs/imgs/submit.png +.. |image3| image:: docs/imgs/tqdm.gif +.. |image4| image:: docs/imgs/tqdm_map.gif +.. |image5| image:: docs/imgs/lanes.gif +.. |image6| image:: docs/imgs/lane_error.png +.. |image7| image:: docs/imgs/lanes2.png + diff --git a/docs/examples.ipynb b/docs/examples.ipynb index 79d1613..7cf4c33 100644 --- a/docs/examples.ipynb +++ b/docs/examples.ipynb @@ -1,2878 +1,2931 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# `ezpq` Examples" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "if os.path.exists('../ezpq/__init__.py'):\n", - " sys.path.insert(0, '../')\n", - "import ezpq" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "import os\n", - "import time\n", - "import random\n", - "import logging as log\n", - "import multiprocessing as mp\n", - "import threading\n", - "logger = log.getLogger()\n", - "logger.setLevel(log.INFO)\n", - "# logger.setLevel(log.DEBUG) # uncomment to view verbose output" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "# Context manager to time functions\n", - "class MyTimer():\n", - " '''https://www.blog.pythonlibrary.org/2016/05/24/python-101-an-intro-to-benchmarking-your-code/'''\n", - " def __init__(self):\n", - " self.start = time.time()\n", - "\n", - " def __enter__(self):\n", - " return self\n", - "\n", - " def __exit__(self, exc_type, exc_val, exc_tb):\n", - " end = time.time()\n", - " runtime = round(end - self.start, 3)\n", - " msg = 'Runtime: {time}'\n", - " print(msg.format(time=runtime))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Elementary example\n", - "\n", - "I'll start with a step-by-step example to illustrate the basic operations. The code below is what we want to optimize. It simply loops 10 times, sleeping 1 second each time, for a total of 10 seconds." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "source": [ - "```python\n", - "for _ in range(10):\n", - " time.sleep(1)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create the Queue with *n* workers." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "Q = ezpq.Queue(n_workers=5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Put jobs in the Queue." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "for _ in range(10):\n", - " Q.put(time.sleep, 1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "size() will count all jobs in the waiting, working, and completed queues." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.size()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a list to store job data (runtime and output)." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "results = list()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`get()` is used to pop the highest priority job from the *completed* queue. The result must be stored or it will be lost.\n", - "\n", - "When no output is available and `wait` is true, the operation will wait for the next job to finish." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "done = Q.get(wait=True) # get() returns a dict\n", - "\n", - "results.append(done) # append dictionary to results list." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Count remaining jobs after the operation above." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.size()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`collect()` is used to repeatedly `get()` all completed jobs.\n", - "\n", - "`collect()` does not support the `wait` parameter;\n", - "\n", - "Instead, wait for all running and queued jobs to complete with `wait()` before calling `collect()`." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "Q.wait()\n", - "\n", - "done_list = Q.collect() # collect() returns a list\n", - "\n", - "results.extend(done_list) # extend result list " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Verify no jobs remain." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Q.size()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Dispose of the `ezpq`. This will ensure all jobs are stopped and data is discarded." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "Q.dispose()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, inspect a single job result." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'qid': '50630fdf',\n", - " 'id': 1,\n", - " 'name': 1,\n", - " 'priority': 100,\n", - " 'lane': None,\n", - " 'timeout': 0,\n", - " 'function': 'sleep',\n", - " 'args': [1],\n", - " 'kwargs': None,\n", - " 'submitted': datetime.datetime(2019, 2, 18, 20, 20, 17, 663244),\n", - " 'started': datetime.datetime(2019, 2, 18, 20, 20, 17, 758352),\n", - " 'ended': datetime.datetime(2019, 2, 18, 20, 20, 18, 763252),\n", - " 'processed': datetime.datetime(2019, 2, 18, 20, 20, 18, 860747),\n", - " 'exitcode': 0,\n", - " 'cancelled': False,\n", - " 'runtime': 1.0048999786376953,\n", - " 'output': None,\n", - " 'exception': None,\n", - " 'callback': None}" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a pandas dataframe for a grid view of the results." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
argscallbackcancelledendedexceptionexitcodefunctionidkwargslanenameoutputpriorityprocessedqidruntimestartedsubmittedtimeout
0[1]NoneFalse2019-02-18 20:20:18.763252None0sleep1NoneNone1None1002019-02-18 20:20:18.86074750630fdf1.0049002019-02-18 20:20:17.7583522019-02-18 20:20:17.6632440
1[1]NoneFalse2019-02-18 20:20:18.767614None0sleep2NoneNone2None1002019-02-18 20:20:18.86113350630fdf1.0042432019-02-18 20:20:17.7633712019-02-18 20:20:17.6632740
2[1]NoneFalse2019-02-18 20:20:18.773731None0sleep3NoneNone3None1002019-02-18 20:20:18.86137250630fdf1.0048082019-02-18 20:20:17.7689232019-02-18 20:20:17.6633010
3[1]NoneFalse2019-02-18 20:20:18.778162None0sleep4NoneNone4None1002019-02-18 20:20:18.86159450630fdf1.0046132019-02-18 20:20:17.7735502019-02-18 20:20:17.6633300
4[1]NoneFalse2019-02-18 20:20:18.801069None0sleep5NoneNone5None1002019-02-18 20:20:18.86189450630fdf1.0061492019-02-18 20:20:17.7949202019-02-18 20:20:17.6633510
\n", - "
" - ], - "text/plain": [ - " args callback cancelled ended exception exitcode \\\n", - "0 [1] None False 2019-02-18 20:20:18.763252 None 0 \n", - "1 [1] None False 2019-02-18 20:20:18.767614 None 0 \n", - "2 [1] None False 2019-02-18 20:20:18.773731 None 0 \n", - "3 [1] None False 2019-02-18 20:20:18.778162 None 0 \n", - "4 [1] None False 2019-02-18 20:20:18.801069 None 0 \n", - "\n", - " function id kwargs lane name output priority processed \\\n", - "0 sleep 1 None None 1 None 100 2019-02-18 20:20:18.860747 \n", - "1 sleep 2 None None 2 None 100 2019-02-18 20:20:18.861133 \n", - "2 sleep 3 None None 3 None 100 2019-02-18 20:20:18.861372 \n", - "3 sleep 4 None None 4 None 100 2019-02-18 20:20:18.861594 \n", - "4 sleep 5 None None 5 None 100 2019-02-18 20:20:18.861894 \n", - "\n", - " qid runtime started submitted \\\n", - "0 50630fdf 1.004900 2019-02-18 20:20:17.758352 2019-02-18 20:20:17.663244 \n", - "1 50630fdf 1.004243 2019-02-18 20:20:17.763371 2019-02-18 20:20:17.663274 \n", - "2 50630fdf 1.004808 2019-02-18 20:20:17.768923 2019-02-18 20:20:17.663301 \n", - "3 50630fdf 1.004613 2019-02-18 20:20:17.773550 2019-02-18 20:20:17.663330 \n", - "4 50630fdf 1.006149 2019-02-18 20:20:17.794920 2019-02-18 20:20:17.663351 \n", - "\n", - " timeout \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(results)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lastly, visualize the steps." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ezpq.Plot(results).build()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the chart above, the colored lines overlap because the jobs were run in parallel. Contrast that with the following example using serial processing." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Practical Examples" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Shared parameters; adjust as desired." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "n_workers = 10 # number of workers to process the queue.\n", - "n_inputs = 50 # number of inputs to the queue.\n", - "job_time = 1 # number of seconds each input will 'sleep'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f3ad832275004ee0bb2d0388a5992673", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
argscallbackcancelledendedexceptionexitcodefunctionidkwargslanenameoutputpriorityprocessedqidruntimestartedsubmittedtimeout
0[1]NoneFalse2019-02-18 20:20:22.972163None0sleep1NoneNone1None1002019-02-18 20:20:23.0695543bd621c41.0045212019-02-18 20:20:21.9676422019-02-18 20:20:21.8624320
1[1]NoneFalse2019-02-18 20:20:22.978533None0sleep2NoneNone2None1002019-02-18 20:20:23.0699943bd621c41.0045482019-02-18 20:20:21.9739852019-02-18 20:20:21.8624700
2[1]NoneFalse2019-02-18 20:20:22.986742None0sleep3NoneNone3None1002019-02-18 20:20:23.0703163bd621c41.0053692019-02-18 20:20:21.9813732019-02-18 20:20:21.8624910
3[1]NoneFalse2019-02-18 20:20:22.994291None0sleep4NoneNone4None1002019-02-18 20:20:23.0705783bd621c41.0052362019-02-18 20:20:21.9890552019-02-18 20:20:21.8625050
4[1]NoneFalse2019-02-18 20:20:22.999577None0sleep5NoneNone5None1002019-02-18 20:20:23.0708433bd621c41.0036402019-02-18 20:20:21.9959372019-02-18 20:20:21.8625320
\n", - "
" - ], - "text/plain": [ - " args callback cancelled ended exception exitcode \\\n", - "0 [1] None False 2019-02-18 20:20:22.972163 None 0 \n", - "1 [1] None False 2019-02-18 20:20:22.978533 None 0 \n", - "2 [1] None False 2019-02-18 20:20:22.986742 None 0 \n", - "3 [1] None False 2019-02-18 20:20:22.994291 None 0 \n", - "4 [1] None False 2019-02-18 20:20:22.999577 None 0 \n", - "\n", - " function id kwargs lane name output priority processed \\\n", - "0 sleep 1 None None 1 None 100 2019-02-18 20:20:23.069554 \n", - "1 sleep 2 None None 2 None 100 2019-02-18 20:20:23.069994 \n", - "2 sleep 3 None None 3 None 100 2019-02-18 20:20:23.070316 \n", - "3 sleep 4 None None 4 None 100 2019-02-18 20:20:23.070578 \n", - "4 sleep 5 None None 5 None 100 2019-02-18 20:20:23.070843 \n", - "\n", - " qid runtime started submitted \\\n", - "0 3bd621c4 1.004521 2019-02-18 20:20:21.967642 2019-02-18 20:20:21.862432 \n", - "1 3bd621c4 1.004548 2019-02-18 20:20:21.973985 2019-02-18 20:20:21.862470 \n", - "2 3bd621c4 1.005369 2019-02-18 20:20:21.981373 2019-02-18 20:20:21.862491 \n", - "3 3bd621c4 1.005236 2019-02-18 20:20:21.989055 2019-02-18 20:20:21.862505 \n", - "4 3bd621c4 1.003640 2019-02-18 20:20:21.995937 2019-02-18 20:20:21.862532 \n", - "\n", - " timeout \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 " - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with ezpq.Queue(n_workers=n_workers) as Q: # create an ezpq.Queue() with 10 workers. \n", - " for i in range(n_inputs): # create 50 jobs, each sleeping for 1 second.\n", - " Q.put(function=time.sleep, args=job_time)\n", - " \n", - " Q.waitpb() # wait for all jobs to complete.\n", - " \n", - " results = Q.collect() # collect completed jobs from queue.\n", - " \n", - "df = pd.DataFrame(results)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", - " from_inches(height, units), units))\n", - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/50x1x10.png\n", - " warn('Filename: {}'.format(filename))\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "title='{} inputs, {}s job time, {} workers (parallel).'.format(n_inputs, job_time, n_workers)\n", - "\n", - "plt = ezpq.Plot(results).build(title=title)\n", - "\n", - "plt.save('imgs/{}x{}x{}.png'.format(n_inputs, job_time, n_workers))\n", - "plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Job Priority" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Jobs are prioritized by (1) custom priority and (2) arrival time. By using a priority queue for both pending and completed jobs, you can be sure that higher priority jobs are executed and retrieved sooner.\n", - "\n", - "The default priority value is 100. Lower values take higher priority." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6d9dfdca9a5948b996aa7ff469e7703a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", - " from_inches(height, units), units))\n", - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/100x1x10_priority.png\n", - " warn('Filename: {}'.format(filename))\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with ezpq.Queue(n_workers=n_workers) as Q:\n", - "\n", - " for i in range(n_inputs):\n", - " Q.put(function=time.sleep, args=job_time,\n", - " priority=int((i+1) % 10 != 0)+1) # prioritize numbers divisible by ten.\n", - "\n", - " Q.waitpb()\n", - "\n", - " results = Q.collect()\n", - "\n", - "title = '{} inputs, {}s job time, {} workers (prioritized).'.format(n_inputs, job_time, n_workers)\n", - "\n", - "plt = ezpq.Plot(results).build(title=title, color_by='priority')\n", - "\n", - "plt.save('imgs/100x1x10_priority.png')\n", - "plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Threads\n", - "\n", - "By default, `ezpq.Queue` uses `multiprocessing.Process` to spawn new jobs. However, you can use `threading.Thread` instead by specifying `job_runner=Thread` when creating the Queue. Some of the differences between processes and threads are explained [here](https://medium.com/@bfortuner/python-multithreading-vs-multiprocessing-73072ce5600b).\n", - "\n", - "> Note that, inherently, `Thread` loses the ability to enforce a timeout, to kill jobs, or to obtain exit codes.\n", - "\n", - "Aside from that, there's no difference in how you interact with ezpq.\n", - "\n", - "In addition to using Threads, the example below varies the job time between 0-2 seconds." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ecb60e878f194caf8f1e9a9d1aa2dfd5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", - " from_inches(height, units), units))\n", - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/100xNx10.png\n", - " warn('Filename: {}'.format(filename))\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from threading import Thread\n", - "\n", - "with ezpq.Queue(n_workers=n_workers, job_runner=Thread) as Q:\n", - "\n", - " for i in range(n_inputs):\n", - " sleep_sec = random.randint(0, job_time*2000) / 1000 # sleep for random time between 0-2s (avg=1s).\n", - " \n", - " Q.put(function=time.sleep, args=sleep_sec)\n", - "\n", - " Q.waitpb()\n", - "\n", - " results = Q.collect()\n", - "\n", - "title = '{} inputs, 0-2s job time, {} workers (threads).'.format(n_inputs, job_time, n_workers)\n", - "plt = ezpq.Plot(results).build(title=title)\n", - "\n", - "plt.save('imgs/100xNx10.png')\n", - "plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Timeout\n", - "\n", - "> Recall that you can only use timeout when using processes, not threads." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "n_inputs = 50\n", - "job_time = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4361a5fe28944ba784565f1febad695f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", - " from_inches(height, units), units))\n", - "/home/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/50xNx10_timeouts.png\n", - " warn('Filename: {}'.format(filename))\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with ezpq.Queue(n_workers=n_workers) as Q:\n", - "\n", - " for i in range(n_inputs):\n", - " sleep_sec = random.randint(0, job_time*2000) / 1000 # sleep between 0s and 2s (avg=1s).\n", - "\n", - " Q.put(function=time.sleep, args=sleep_sec,\n", - " timeout=job_time*1.8) # 1.8 < 2; timeout will occur on ~10% of jobs.\n", - "\n", - " Q.waitpb()\n", - "\n", - " results = Q.collect()\n", - " \n", - "title = '{} inputs, 0-2s job time, {} workers (timeout=1.8s).'.format(n_inputs, job_time, n_workers)\n", - "\n", - "plt = ezpq.Plot(results).build(color_by='cancelled',\n", - " color_pal=['blue', 'red'],\n", - " title=title)\n", - "\n", - "plt.save('imgs/{}xNx{}_timeouts.png'.format(n_inputs, n_workers))\n", - "plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Logging\n", - "\n", - "Writing to a log is as simple as adding `log_file='/path/to/file.csv'`. \n", - "\n", - "The example below illustrates a simply real-world use case. The goal is to obtain the total count and size of files within various folders. One process will be spawned to enumerate each folder. By doing so in parallel, the entire operation only takes as long as the single-most long operation." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define the function to enumerate the files." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "import os, fnmatch\n", - "def enum_files(path, pattern):\n", - " '''https://stackoverflow.com/a/1724723'''\n", - " n, size = 0, 0\n", - " for root, dirs, files in os.walk(path, followlinks=True):\n", - " for name in files:\n", - " if pattern is None or fnmatch.fnmatch(name, pattern):\n", - " n += 1\n", - " size += os.path.getsize( os.path.join(root, name) )\n", - " \n", - " return (n, size)\n", - "\n", - "root_dir = os.environ['HOME']\n", - "\n", - "include_dirs = [os.path.join(root_dir, x)\n", - " for x in ['bin', 'Desktop', 'Documents', 'Downloads', 'Music', 'Pictures']]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "log_file = 'ezpq_log.csv'\n", - "\n", - "# first, remove existing log file.\n", - "if os.path.exists(log_file):\n", - " os.unlink(log_file)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each result will be written to a CSV file and discarded. Afterward, we'll read the CSV back in and draw a plot.\n", - "\n", - "Notice there is no call to `get()` or `collect()`. Because `auto_remove=True`, jobs are discarded; there is no completed queue to pull from." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bbd4261aefaf473ebef5582808ccf030", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Process 3:\n", - "Traceback (most recent call last):\n", - " File \"/usr/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", - " self.run()\n", - " File \"/usr/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"../ezpq/Queue.py\", line 587, in _job_wrap\n", - " raise ex_obj\n", - " File \"../ezpq/Queue.py\", line 578, in _job_wrap\n", - " out = _job.function(*args, **kwargs)\n", - " File \"\", line 9, in enum_files\n", - " size += os.path.getsize( os.path.join(root, name) )\n", - " File \"/usr/lib/python3.6/genericpath.py\", line 50, in getsize\n", - " return os.stat(filename).st_size\n", - "FileNotFoundError: [Errno 2] No such file or directory: '/home/donald/Documents/Dropbox/pwsh/pskafka/kafkacat-1.4.0RC1/tmp-bootstrap/usr/local/lib/librdkafka++.so'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(n_workers = len(include_dirs),\n", - " log_file=log_file, auto_remove=True) as Q:\n", - "\n", - " for path in include_dirs:\n", - " Q.put(name=path,\n", - " function=enum_files,\n", - " kwargs={'path': path, 'pattern': '*'})\n", - "\n", - " Q.waitpb()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read the CSV file back in with pandas." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qididnameprioritylanetimeoutfunctionargskwargssubmittedstartedendedprocessedexitcodecancelledruntimeoutputexceptioncallback
0141e2d041/home/donald/bin100NaN0enum_filesNaN{'path': '/home/donald/bin', 'pattern': '*'}2019-02-18 20:21:14.7915652019-02-18 20:21:14.8975352019-02-18 20:21:14.9015912019-02-18 20:21:14.9937350False0.004056(3, 83860646)NaNNaN
1141e2d042/home/donald/Desktop100NaN0enum_filesNaN{'path': '/home/donald/Desktop', 'pattern': '*'}2019-02-18 20:21:14.7915872019-02-18 20:21:14.9038322019-02-18 20:21:14.9089472019-02-18 20:21:14.9947980False0.005115(17, 1162796)NaNNaN
2141e2d045/home/donald/Music100NaN0enum_filesNaN{'path': '/home/donald/Music', 'pattern': '*'}2019-02-18 20:21:14.7916202019-02-18 20:21:14.9263632019-02-18 20:21:14.9303862019-02-18 20:21:14.9952480False0.004023(0, 0)NaNNaN
3141e2d043/home/donald/Documents100NaN0enum_filesNaN{'path': '/home/donald/Documents', 'pattern': ...2019-02-18 20:21:14.7915962019-02-18 20:21:14.9106652019-02-18 20:21:15.0018432019-02-18 20:21:15.0933781False0.091178NaNTraceback (most recent call last):\\n File \".....NaN
4141e2d044/home/donald/Downloads100NaN0enum_filesNaN{'path': '/home/donald/Downloads', 'pattern': ...2019-02-18 20:21:14.7916072019-02-18 20:21:14.9176412019-02-18 20:21:15.0712682019-02-18 20:21:15.0938540False0.153627(4523, 6313058280)NaNNaN
\n", - "
" - ], - "text/plain": [ - " qid id name priority lane timeout function \\\n", - "0 141e2d04 1 /home/donald/bin 100 NaN 0 enum_files \n", - "1 141e2d04 2 /home/donald/Desktop 100 NaN 0 enum_files \n", - "2 141e2d04 5 /home/donald/Music 100 NaN 0 enum_files \n", - "3 141e2d04 3 /home/donald/Documents 100 NaN 0 enum_files \n", - "4 141e2d04 4 /home/donald/Downloads 100 NaN 0 enum_files \n", - "\n", - " args kwargs \\\n", - "0 NaN {'path': '/home/donald/bin', 'pattern': '*'} \n", - "1 NaN {'path': '/home/donald/Desktop', 'pattern': '*'} \n", - "2 NaN {'path': '/home/donald/Music', 'pattern': '*'} \n", - "3 NaN {'path': '/home/donald/Documents', 'pattern': ... \n", - "4 NaN {'path': '/home/donald/Downloads', 'pattern': ... \n", - "\n", - " submitted started \\\n", - "0 2019-02-18 20:21:14.791565 2019-02-18 20:21:14.897535 \n", - "1 2019-02-18 20:21:14.791587 2019-02-18 20:21:14.903832 \n", - "2 2019-02-18 20:21:14.791620 2019-02-18 20:21:14.926363 \n", - "3 2019-02-18 20:21:14.791596 2019-02-18 20:21:14.910665 \n", - "4 2019-02-18 20:21:14.791607 2019-02-18 20:21:14.917641 \n", - "\n", - " ended processed exitcode cancelled \\\n", - "0 2019-02-18 20:21:14.901591 2019-02-18 20:21:14.993735 0 False \n", - "1 2019-02-18 20:21:14.908947 2019-02-18 20:21:14.994798 0 False \n", - "2 2019-02-18 20:21:14.930386 2019-02-18 20:21:14.995248 0 False \n", - "3 2019-02-18 20:21:15.001843 2019-02-18 20:21:15.093378 1 False \n", - "4 2019-02-18 20:21:15.071268 2019-02-18 20:21:15.093854 0 False \n", - "\n", - " runtime output \\\n", - "0 0.004056 (3, 83860646) \n", - "1 0.005115 (17, 1162796) \n", - "2 0.004023 (0, 0) \n", - "3 0.091178 NaN \n", - "4 0.153627 (4523, 6313058280) \n", - "\n", - " exception callback \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 Traceback (most recent call last):\\n File \"..... NaN \n", - "4 NaN NaN " - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "date_cols = ['submitted', 'started', 'ended', 'processed']\n", - "\n", - "results\n", - "\n", - "df = pd.read_csv(log_file, parse_dates=date_cols)\n", - "\n", - "os.unlink(log_file)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ezpq.Plot requires a dictionary of job data records.\n", - "records = df.to_dict(orient='records')\n", - "\n", - "plt = ezpq.Plot(records).build(color_by='name', bar_width=5)\n", - "\n", - "# plt.save('...')\n", - "plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Callback\n", - "\n", - "If instantiated with a callback, `ezpq.Queue` will pass a dictionary of job data as the single argument to the callback function. It does this synchronously, so the callback must not take too much time." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [], - "source": [ - "def slow_callback(x):\n", - " time.sleep(0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7302cd202fc24d008dc238022747528e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(n_workers=n_workers, callback=slow_callback, poll=1) as Q:\n", - " for _ in range(n_inputs):\n", - " Q.put(time.sleep, 1)\n", - " Q.waitpb()\n", - " results = Q.collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "inputHidden": false, - "outputHidden": false - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ezpq.Plot(results).build(show_legend=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Error Handling" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "def reciprocal(x):\n", - " time.sleep(1)\n", - " return 1/x\n", - "\n", - "with ezpq.Queue(n_workers=5, job_runner=threading.Thread) as Q:\n", - " for _ in range(20):\n", - " Q.put(reciprocal, random.randint(0, 5), suppress_errors=True)\n", - " Q.wait()\n", - " output = Q.collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'qid': '873bf141',\n", - " 'id': 5,\n", - " 'name': 5,\n", - " 'priority': 100,\n", - " 'lane': None,\n", - " 'timeout': 0,\n", - " 'function': 'reciprocal',\n", - " 'args': [0],\n", - " 'kwargs': None,\n", - " 'submitted': datetime.datetime(2019, 2, 18, 20, 21, 55, 726351),\n", - " 'started': datetime.datetime(2019, 2, 18, 20, 21, 55, 827401),\n", - " 'ended': datetime.datetime(2019, 2, 18, 20, 21, 56, 828921),\n", - " 'processed': datetime.datetime(2019, 2, 18, 20, 21, 56, 830606),\n", - " 'exitcode': -1,\n", - " 'cancelled': False,\n", - " 'runtime': 1.0015199184417725,\n", - " 'output': None,\n", - " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", - " 'callback': None},\n", - " {'qid': '873bf141',\n", - " 'id': 6,\n", - " 'name': 6,\n", - " 'priority': 100,\n", - " 'lane': None,\n", - " 'timeout': 0,\n", - " 'function': 'reciprocal',\n", - " 'args': [0],\n", - " 'kwargs': None,\n", - " 'submitted': datetime.datetime(2019, 2, 18, 20, 21, 55, 726363),\n", - " 'started': datetime.datetime(2019, 2, 18, 20, 21, 56, 830782),\n", - " 'ended': datetime.datetime(2019, 2, 18, 20, 21, 57, 832082),\n", - " 'processed': datetime.datetime(2019, 2, 18, 20, 21, 57, 834327),\n", - " 'exitcode': -1,\n", - " 'cancelled': False,\n", - " 'runtime': 1.0012998580932617,\n", - " 'output': None,\n", - " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", - " 'callback': None},\n", - " {'qid': '873bf141',\n", - " 'id': 15,\n", - " 'name': 15,\n", - " 'priority': 100,\n", - " 'lane': None,\n", - " 'timeout': 0,\n", - " 'function': 'reciprocal',\n", - " 'args': [0],\n", - " 'kwargs': None,\n", - " 'submitted': datetime.datetime(2019, 2, 18, 20, 21, 55, 726513),\n", - " 'started': datetime.datetime(2019, 2, 18, 20, 21, 57, 835738),\n", - " 'ended': datetime.datetime(2019, 2, 18, 20, 21, 58, 836210),\n", - " 'processed': datetime.datetime(2019, 2, 18, 20, 21, 58, 838107),\n", - " 'exitcode': -1,\n", - " 'cancelled': False,\n", - " 'runtime': 1.000471830368042,\n", - " 'output': None,\n", - " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", - " 'callback': None},\n", - " {'qid': '873bf141',\n", - " 'id': 16,\n", - " 'name': 16,\n", - " 'priority': 100,\n", - " 'lane': None,\n", - " 'timeout': 0,\n", - " 'function': 'reciprocal',\n", - " 'args': [0],\n", - " 'kwargs': None,\n", - " 'submitted': datetime.datetime(2019, 2, 18, 20, 21, 55, 726524),\n", - " 'started': datetime.datetime(2019, 2, 18, 20, 21, 58, 838259),\n", - " 'ended': datetime.datetime(2019, 2, 18, 20, 21, 59, 839682),\n", - " 'processed': datetime.datetime(2019, 2, 18, 20, 21, 59, 841945),\n", - " 'exitcode': -1,\n", - " 'cancelled': False,\n", - " 'runtime': 1.0014238357543945,\n", - " 'output': None,\n", - " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", - " 'callback': None}]" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[x for x in output if x['exception'] is not None]" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "# for x in output:\n", - "# if x['exception'] is not None:\n", - "# x['exitcode'] = 1\n", - "# else:\n", - "# x['exitcode'] = 0" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ezpq.Plot(output).build(color_by='exitcode', color_pal=['red', 'green'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Shared Objects" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "def fibonacci(shared):\n", - " shared_clone = tuple(shared)\n", - " shared[0] = shared[1]\n", - " shared[1] = sum(shared_clone)\n", - " return shared[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1,\n", - " 2,\n", - " 3,\n", - " 5,\n", - " 8,\n", - " 13,\n", - " 21,\n", - " 34,\n", - " 55,\n", - " 89,\n", - " 144,\n", - " 233,\n", - " 377,\n", - " 610,\n", - " 987,\n", - " 1597,\n", - " 2584,\n", - " 4181,\n", - " 6765,\n", - " 10946]" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import multiprocessing as mp\n", - "mgr = mp.Manager()\n", - "shared_list = mgr.list([0,1])\n", - "\n", - "with ezpq.Queue(n_workers=10) as Q:\n", - " for _ in range(20):\n", - " Q.put(fibonacci, args=shared_list)\n", - " Q.wait()\n", - " output = Q.collect()\n", - "\n", - "mgr.shutdown()\n", - " \n", - "[x['output'] for x in output]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# New in v0.2.0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## `tqdm` Integration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add `show_progress=True` to `ezpq.Queue()`; this works for `map()` and the `@decorator`" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "with ezpq.Queue(5, show_progress=True) as Q:\n", - " output = Q.map(time.sleep, range(5))" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f21ab39826574cd0b41bd14d4fdbb18e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "@ezpq.Queue(n_workers=5, show_progress=True)\n", - "def time_sleep(x):\n", - " time.sleep(x)\n", - "\n", - "output = time_sleep(range(5))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or add `show_progress=True` to `ezpq.map()`" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cc51eb73e523441f836b1840dc76551c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(5) as Q:\n", - " output = Q.map(time.sleep, range(5), show_progress=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or add `show_progress=True` to `ezpq.Queue.wait()`" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "beaf4092a03d406dac81d506653c5e39", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(5) as Q:\n", - " for i in range(5):\n", - " Q.put(time.sleep, i)\n", - " Q.wait(show_progress=True)\n", - " output = Q.collect()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or just call `ezpq.Queue.waitpb()` in place of `ezpq.Queue.wait()`." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "from random import random" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f27ed6344a8f4d03b692affa5817615b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(5) as Q:\n", - " for i in range(50):\n", - " Q.put(time.sleep, random())\n", - " Q.waitpb()\n", - " output = Q.collect()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## map" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3422853d24e74e37ac7c31e3a1c638b9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "['0^2 = 0',\n", - " '1^2 = 1',\n", - " '2^2 = 4',\n", - " '3^2 = 9',\n", - " '4^2 = 16',\n", - " '5^2 = 25',\n", - " '6^2 = 36',\n", - " '7^2 = 49',\n", - " '8^2 = 64',\n", - " '9^2 = 81']" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def my_pow(x, k):\n", - " return '{}^{} = {}'.format(x, k, x**k)\n", - "\n", - "with ezpq.Queue(10, show_progress=True) as Q:\n", - " output = Q.map(my_pow, iterable=range(1000), args=2, show_progress=True)\n", - " \n", - "[x['output'] for x in output[:10]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Synchronous Lanes" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "jobs = {\n", - " 'Lane 1': [1,2,3],\n", - " 'Lane 2': [4,5,6],\n", - " 'Lane 3': [7,8,9],\n", - " 'Lane 4': [10,11,12],\n", - " 'Lane 5': [13,14,15],\n", - " 'Lane 6': [16,17,18]\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b1a67e1276c14d6986ef0f00225bb8ec", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=18), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with ezpq.Queue(n_workers=3) as Q:\n", - " for lane in jobs.keys():\n", - " for job in jobs[lane]:\n", - " Q.put(time.sleep, args=1,\n", - " lane=lane, name='Job '+str(job))\n", - " Q.waitpb()\n", - " output = Q.collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ezpq.Plot(output).build(color_by='lane',\n", - " color_pal=['blue', 'red', 'green',\n", - " 'purple', 'orange', 'brown'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set the lane based on an expression." - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1e19f7d5676243a4a50b31e872bbe0b7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# `ezpq` Examples" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "%matplotlib inline" + ], + "outputs": [], + "execution_count": 1, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "import sys\n", + "import os" + ], + "outputs": [], + "execution_count": 2, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "if os.path.exists('../ezpq/__init__.py'):\n", + " sys.path.insert(0, '../')\n", + "import ezpq" + ], + "outputs": [], + "execution_count": 3, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import time\n", + "import random\n", + "import logging as log\n", + "import multiprocessing as mp\n", + "import threading\n", + "logger = log.getLogger()\n", + "logger.setLevel(log.INFO)\n", + "# logger.setLevel(log.DEBUG) # uncomment to view verbose output" + ], + "outputs": [], + "execution_count": 4, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "from datetime import datetime" + ], + "outputs": [], + "execution_count": 5, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "# Context manager to time functions\n", + "class MyTimer():\n", + " '''https://www.blog.pythonlibrary.org/2016/05/24/python-101-an-intro-to-benchmarking-your-code/'''\n", + " def __init__(self):\n", + " self.start = time.time()\n", + "\n", + " def __enter__(self):\n", + " return self\n", + "\n", + " def __exit__(self, exc_type, exc_val, exc_tb):\n", + " end = time.time()\n", + " runtime = round(end - self.start, 3)\n", + " msg = 'Runtime: {time}'\n", + " print(msg.format(time=runtime))" + ], + "outputs": [], + "execution_count": 6, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Elementary example\n", + "\nI'll start with a step-by-step example to illustrate the basic operations. The code below is what we want to optimize. It simply loops 10 times, sleeping 1 second each time, for a total of 10 seconds." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "```python\n", + "for _ in range(10):\n", + " time.sleep(1)\n", + "```" + ], + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Create the Queue with *n* workers." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q = ezpq.Queue(n_workers=5)" + ], + "outputs": [], + "execution_count": 7, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Put jobs in the Queue." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "for _ in range(10):\n", + " Q.put(time.sleep, 1)" + ], + "outputs": [], + "execution_count": 8, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "size() will count all jobs in the waiting, working, and completed queues." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q.size()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 9, + "data": { + "text/plain": [ + "10" + ] + }, + "metadata": {} + } + ], + "execution_count": 9, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Create a list to store job data (runtime and output)." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "results = list()" + ], + "outputs": [], + "execution_count": 10, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "`get()` is used to pop the highest priority job from the *completed* queue. The result must be stored or it will be lost.\n", + "\nWhen no output is available and `wait` is true, the operation will wait for the next job to finish." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "done = Q.get(wait=True) # get() returns a dict\n", + "\nresults.append(done) # append dictionary to results list." + ], + "outputs": [], + "execution_count": 11, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Count remaining jobs after the operation above." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q.size()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 12, + "data": { + "text/plain": [ + "9" + ] + }, + "metadata": {} + } + ], + "execution_count": 12, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "`collect()` is used to repeatedly `get()` all completed jobs.\n", + "\n", + "`collect()` does not support the `wait` parameter;\n", + "\nInstead, wait for all running and queued jobs to complete with `wait()` before calling `collect()`." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q.wait()\n", + "\n", + "done_list = Q.collect() # collect() returns a list\n", + "\nresults.extend(done_list) # extend result list " + ], + "outputs": [], + "execution_count": 13, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Verify no jobs remain." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q.size()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 14, + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {} + } + ], + "execution_count": 14, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Dispose of the `ezpq`. This will ensure all jobs are stopped and data is discarded." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "Q.dispose()" + ], + "outputs": [], + "execution_count": 15, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Now, inspect a single job result." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "results[0]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 16, + "data": { + "text/plain": [ + "{'qid': '9030a545',\n", + " 'id': 1,\n", + " 'name': 1,\n", + " 'priority': 100,\n", + " 'lane': None,\n", + " 'timeout': 0,\n", + " 'function': 'sleep',\n", + " 'args': [1],\n", + " 'kwargs': None,\n", + " 'submitted': datetime.datetime(2019, 3, 12, 23, 54, 52, 228484),\n", + " 'started': datetime.datetime(2019, 3, 12, 23, 54, 52, 324976),\n", + " 'ended': datetime.datetime(2019, 3, 12, 23, 54, 53, 332575),\n", + " 'processed': datetime.datetime(2019, 3, 12, 23, 54, 53, 368588),\n", + " 'exitcode': 0,\n", + " 'cancelled': False,\n", + " 'runtime': 1.007598876953125,\n", + " 'output': None,\n", + " 'exception': None,\n", + " 'callback': None}" + ] + }, + "metadata": {} + } + ], + "execution_count": 16, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Create a pandas dataframe for a grid view of the results." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df = pd.DataFrame(results)\n", + "\ndf.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 17, + "data": { + "text/plain": [ + " args callback cancelled ended exception exitcode \\\n", + "0 [1] None False 2019-03-12 23:54:53.332575 None 0 \n", + "1 [1] None False 2019-03-12 23:54:53.336425 None 0 \n", + "2 [1] None False 2019-03-12 23:54:53.341192 None 0 \n", + "3 [1] None False 2019-03-12 23:54:53.345096 None 0 \n", + "4 [1] None False 2019-03-12 23:54:53.350268 None 0 \n", + "\n", + " function id kwargs lane name output priority processed \\\n", + "0 sleep 1 None None 1 None 100 2019-03-12 23:54:53.368588 \n", + "1 sleep 2 None None 2 None 100 2019-03-12 23:54:53.369051 \n", + "2 sleep 3 None None 3 None 100 2019-03-12 23:54:53.369197 \n", + "3 sleep 4 None None 4 None 100 2019-03-12 23:54:53.369314 \n", + "4 sleep 5 None None 5 None 100 2019-03-12 23:54:53.454563 \n", + "\n", + " qid runtime started submitted \\\n", + "0 9030a545 1.007599 2019-03-12 23:54:52.324976 2019-03-12 23:54:52.228484 \n", + "1 9030a545 1.007510 2019-03-12 23:54:52.328915 2019-03-12 23:54:52.228505 \n", + "2 9030a545 1.007855 2019-03-12 23:54:52.333337 2019-03-12 23:54:52.228516 \n", + "3 9030a545 1.007642 2019-03-12 23:54:52.337454 2019-03-12 23:54:52.228524 \n", + "4 9030a545 1.008313 2019-03-12 23:54:52.341955 2019-03-12 23:54:52.228532 \n", + "\n", + " timeout \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
argscallbackcancelledendedexceptionexitcodefunctionidkwargslanenameoutputpriorityprocessedqidruntimestartedsubmittedtimeout
0[1]NoneFalse2019-03-12 23:54:53.332575None0sleep1NoneNone1None1002019-03-12 23:54:53.3685889030a5451.0075992019-03-12 23:54:52.3249762019-03-12 23:54:52.2284840
1[1]NoneFalse2019-03-12 23:54:53.336425None0sleep2NoneNone2None1002019-03-12 23:54:53.3690519030a5451.0075102019-03-12 23:54:52.3289152019-03-12 23:54:52.2285050
2[1]NoneFalse2019-03-12 23:54:53.341192None0sleep3NoneNone3None1002019-03-12 23:54:53.3691979030a5451.0078552019-03-12 23:54:52.3333372019-03-12 23:54:52.2285160
3[1]NoneFalse2019-03-12 23:54:53.345096None0sleep4NoneNone4None1002019-03-12 23:54:53.3693149030a5451.0076422019-03-12 23:54:52.3374542019-03-12 23:54:52.2285240
4[1]NoneFalse2019-03-12 23:54:53.350268None0sleep5NoneNone5None1002019-03-12 23:54:53.4545639030a5451.0083132019-03-12 23:54:52.3419552019-03-12 23:54:52.2285320
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 17, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Lastly, visualize the steps." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(results).build()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 18, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 18, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "In the chart above, the colored lines overlap because the jobs were run in parallel. Contrast that with the following example using serial processing." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Practical Examples" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Shared parameters; adjust as desired." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "n_workers = 10 # number of workers to process the queue.\n", + "n_inputs = 50 # number of inputs to the queue.\n", + "job_time = 1 # number of seconds each input will 'sleep'" + ], + "outputs": [], + "execution_count": 19, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers=n_workers) as Q: # create an ezpq.Queue() with 10 workers. \n", + " for i in range(n_inputs): # create 50 jobs, each sleeping for 1 second.\n", + " Q.put(function=time.sleep, args=job_time)\n", + " \n", + " Q.waitpb() # wait for all jobs to complete.\n", + " \n", + " results = Q.collect() # collect completed jobs from queue.\n", + " \n", + "df = pd.DataFrame(results)\n", + "\ndf.head()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "88b0a8ca0cca4b14be29c36c6ebd7ed8" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "execute_result", + "execution_count": 20, + "data": { + "text/plain": [ + " args callback cancelled ended exception exitcode \\\n", + "0 [1] None False 2019-03-12 23:54:57.786251 None 0 \n", + "1 [1] None False 2019-03-12 23:54:57.791527 None 0 \n", + "2 [1] None False 2019-03-12 23:54:57.796455 None 0 \n", + "3 [1] None False 2019-03-12 23:54:57.800084 None 0 \n", + "4 [1] None False 2019-03-12 23:54:57.805892 None 0 \n", + "\n", + " function id kwargs lane name output priority processed \\\n", + "0 sleep 1 None None 1 None 100 2019-03-12 23:54:57.813994 \n", + "1 sleep 2 None None 2 None 100 2019-03-12 23:54:57.814404 \n", + "2 sleep 3 None None 3 None 100 2019-03-12 23:54:57.814531 \n", + "3 sleep 4 None None 4 None 100 2019-03-12 23:54:57.814645 \n", + "4 sleep 5 None None 5 None 100 2019-03-12 23:54:57.814780 \n", + "\n", + " qid runtime started submitted \\\n", + "0 163e44b7 1.007813 2019-03-12 23:54:56.778438 2019-03-12 23:54:56.670451 \n", + "1 163e44b7 1.008554 2019-03-12 23:54:56.782973 2019-03-12 23:54:56.670491 \n", + "2 163e44b7 1.008746 2019-03-12 23:54:56.787709 2019-03-12 23:54:56.670502 \n", + "3 163e44b7 1.007728 2019-03-12 23:54:56.792356 2019-03-12 23:54:56.670517 \n", + "4 163e44b7 1.008828 2019-03-12 23:54:56.797064 2019-03-12 23:54:56.670532 \n", + "\n", + " timeout \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
argscallbackcancelledendedexceptionexitcodefunctionidkwargslanenameoutputpriorityprocessedqidruntimestartedsubmittedtimeout
0[1]NoneFalse2019-03-12 23:54:57.786251None0sleep1NoneNone1None1002019-03-12 23:54:57.813994163e44b71.0078132019-03-12 23:54:56.7784382019-03-12 23:54:56.6704510
1[1]NoneFalse2019-03-12 23:54:57.791527None0sleep2NoneNone2None1002019-03-12 23:54:57.814404163e44b71.0085542019-03-12 23:54:56.7829732019-03-12 23:54:56.6704910
2[1]NoneFalse2019-03-12 23:54:57.796455None0sleep3NoneNone3None1002019-03-12 23:54:57.814531163e44b71.0087462019-03-12 23:54:56.7877092019-03-12 23:54:56.6705020
3[1]NoneFalse2019-03-12 23:54:57.800084None0sleep4NoneNone4None1002019-03-12 23:54:57.814645163e44b71.0077282019-03-12 23:54:56.7923562019-03-12 23:54:56.6705170
4[1]NoneFalse2019-03-12 23:54:57.805892None0sleep5NoneNone5None1002019-03-12 23:54:57.814780163e44b71.0088282019-03-12 23:54:56.7970642019-03-12 23:54:56.6705320
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 20, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "title='{} inputs, {}s job time, {} workers (parallel).'.format(n_inputs, job_time, n_workers)\n", + "\n", + "plt = ezpq.Plot(results).build(title=title)\n", + "\n", + "plt.save('imgs/{}x{}x{}.png'.format(n_inputs, job_time, n_workers))\n", + "plt" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", + " from_inches(height, units), units))\n", + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/50x1x10.png\n", + " warn('Filename: {}'.format(filename))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 21, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 21, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Job Priority" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Jobs are prioritized by (1) custom priority and (2) arrival time. By using a priority queue for both pending and completed jobs, you can be sure that higher priority jobs are executed and retrieved sooner.\n", + "\nThe default priority value is 100. Lower values take higher priority." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers=n_workers) as Q:\n", + "\n", + " for i in range(n_inputs):\n", + " Q.put(function=time.sleep, args=job_time,\n", + " priority=int((i+1) % 10 != 0)+1) # prioritize numbers divisible by ten.\n", + "\n", + " Q.waitpb()\n", + "\n", + " results = Q.collect()\n", + "\n", + "title = '{} inputs, {}s job time, {} workers (prioritized).'.format(n_inputs, job_time, n_workers)\n", + "\n", + "plt = ezpq.Plot(results).build(title=title, color_by='priority')\n", + "\n", + "plt.save('imgs/100x1x10_priority.png')\n", + "plt" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "b253d5779858467a8cbb325db0e9db4a" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", + " from_inches(height, units), units))\n", + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/100x1x10_priority.png\n", + " warn('Filename: {}'.format(filename))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 22, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 22, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Threads\n", + "\n", + "By default, `ezpq.Queue` uses `multiprocessing.Process` to spawn new jobs. However, you can use `threading.Thread` instead by specifying `job_runner=Thread` when creating the Queue. Some of the differences between processes and threads are explained [here](https://medium.com/@bfortuner/python-multithreading-vs-multiprocessing-73072ce5600b).\n", + "\n", + "> Note that, inherently, `Thread` loses the ability to enforce a timeout, to kill jobs, or to obtain exit codes.\n", + "\n", + "Aside from that, there's no difference in how you interact with ezpq.\n", + "\nIn addition to using Threads, the example below varies the job time between 0-2 seconds." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "from threading import Thread\n", + "\n", + "with ezpq.Queue(n_workers=n_workers, job_runner=Thread) as Q:\n", + "\n", + " for i in range(n_inputs):\n", + " sleep_sec = random.randint(0, job_time*2000) / 1000 # sleep for random time between 0-2s (avg=1s).\n", + " \n", + " Q.put(function=time.sleep, args=sleep_sec)\n", + "\n", + " Q.waitpb()\n", + "\n", + " results = Q.collect()\n", + "\n", + "title = '{} inputs, 0-2s job time, {} workers (threads).'.format(n_inputs, job_time, n_workers)\n", + "plt = ezpq.Plot(results).build(title=title)\n", + "\n", + "plt.save('imgs/100xNx10.png')\n", + "plt" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "a0346d73f9394f4bb70f040a4b35d9e9" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", + " from_inches(height, units), units))\n", + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/100xNx10.png\n", + " warn('Filename: {}'.format(filename))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 23, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 23, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Timeout\n", + "\n> Recall that you can only use timeout when using processes, not threads." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "n_inputs = 50\n", + "job_time = 1" + ], + "outputs": [], + "execution_count": 24, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers=n_workers) as Q:\n", + "\n", + " for i in range(n_inputs):\n", + " sleep_sec = random.randint(0, job_time*2000) / 1000 # sleep between 0s and 2s (avg=1s).\n", + "\n", + " Q.put(function=time.sleep, args=sleep_sec,\n", + " timeout=job_time*1.8) # 1.8 < 2; timeout will occur on ~10% of jobs.\n", + "\n", + " Q.waitpb()\n", + "\n", + " results = Q.collect()\n", + " \n", + "title = '{} inputs, 0-2s job time, {} workers (timeout=1.8s).'.format(n_inputs, job_time, n_workers)\n", + "\n", + "plt = ezpq.Plot(results).build(color_by='cancelled',\n", + " color_pal=['blue', 'red'],\n", + " title=title)\n", + "\n", + "plt.save('imgs/{}xNx{}_timeouts.png'.format(n_inputs, n_workers))\n", + "plt" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "41eb98b8762841f7a0b16b2cadb4d807" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:706: UserWarning: Saving 6.4 x 4.8 in image.\n", + " from_inches(height, units), units))\n", + "/Users/donald/envs/mypy36/lib/python3.6/site-packages/plotnine/ggplot.py:707: UserWarning: Filename: imgs/50xNx10_timeouts.png\n", + " warn('Filename: {}'.format(filename))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 25, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 25, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Logging\n", + "\n", + "Writing to a log is as simple as adding `log_file='/path/to/file.csv'`. \n", + "\nThe example below illustrates a simply real-world use case. The goal is to obtain the total count and size of files within various folders. One process will be spawned to enumerate each folder. By doing so in parallel, the entire operation only takes as long as the single-most long operation." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Define the function to enumerate the files." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import os, fnmatch\n", + "def enum_files(path, pattern):\n", + " '''https://stackoverflow.com/a/1724723'''\n", + " n, size = 0, 0\n", + " for root, dirs, files in os.walk(path, followlinks=True):\n", + " for name in files:\n", + " if pattern is None or fnmatch.fnmatch(name, pattern):\n", + " n += 1\n", + " size += os.path.getsize( os.path.join(root, name) )\n", + " \n", + " return (n, size)\n", + "\n", + "root_dir = os.environ['HOME']\n", + "\n", + "include_dirs = [os.path.join(root_dir, x)\n", + " for x in ['bin', 'Desktop', 'Documents', 'Downloads', 'Music', 'Pictures']]" + ], + "outputs": [], + "execution_count": 26, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "log_file = 'ezpq_log.csv'\n", + "\n", + "# first, remove existing log file.\n", + "if os.path.exists(log_file):\n", + " os.unlink(log_file)" + ], + "outputs": [], + "execution_count": 27, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Each result will be written to a CSV file and discarded. Afterward, we'll read the CSV back in and draw a plot.\n", + "\nNotice there is no call to `get()` or `collect()`. Because `auto_remove=True`, jobs are discarded; there is no completed queue to pull from." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers = len(include_dirs),\n", + " log_file=log_file, auto_remove=True) as Q:\n", + "\n", + " for path in include_dirs:\n", + " Q.put(name=path,\n", + " function=enum_files,\n", + " kwargs={'path': path, 'pattern': '*'})\n", + "\n Q.waitpb()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "60e13de27e9049fb8cde7c89c03cdba0" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Process 1:\n", + "Traceback (most recent call last):\n", + " File \"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", + " self.run()\n", + " File \"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", + " self._target(*self._args, **self._kwargs)\n", + " File \"../ezpq/Queue.py\", line 587, in _job_wrap\n", + " raise ex_obj\n", + " File \"../ezpq/Queue.py\", line 578, in _job_wrap\n", + " out = _job.function(*args, **kwargs)\n", + " File \"\", line 9, in enum_files\n", + " size += os.path.getsize( os.path.join(root, name) )\n", + " File \"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/genericpath.py\", line 50, in getsize\n", + " return os.stat(filename).st_size\n", + "FileNotFoundError: [Errno 2] No such file or directory: '/Users/donald/bin/kudu/build/latest/bin/hive-home'\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 28, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Read the CSV file back in with pandas." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "date_cols = ['submitted', 'started', 'ended', 'processed']\n", + "\n", + "results\n", + "\n", + "df = pd.read_csv(log_file, parse_dates=date_cols)\n", + "\n", + "os.unlink(log_file)\n", + "\ndf.head()" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 29, + "data": { + "text/plain": [ + " qid id name priority lane timeout function \\\n", + "0 2db4adcb 2 /Users/donald/Desktop 100 NaN 0 enum_files \n", + "1 2db4adcb 5 /Users/donald/Music 100 NaN 0 enum_files \n", + "2 2db4adcb 6 /Users/donald/Pictures 100 NaN 0 enum_files \n", + "3 2db4adcb 3 /Users/donald/Documents 100 NaN 0 enum_files \n", + "4 2db4adcb 4 /Users/donald/Downloads 100 NaN 0 enum_files \n", + "\n", + " args kwargs \\\n", + "0 NaN {'path': '/Users/donald/Desktop', 'pattern': '*'} \n", + "1 NaN {'path': '/Users/donald/Music', 'pattern': '*'} \n", + "2 NaN {'path': '/Users/donald/Pictures', 'pattern': ... \n", + "3 NaN {'path': '/Users/donald/Documents', 'pattern':... \n", + "4 NaN {'path': '/Users/donald/Downloads', 'pattern':... \n", + "\n", + " submitted started \\\n", + "0 2019-03-12 23:55:52.432877 2019-03-12 23:55:52.542806 \n", + "1 2019-03-12 23:55:52.432944 2019-03-12 23:55:52.561550 \n", + "2 2019-03-12 23:55:52.432960 2019-03-12 23:55:52.567884 \n", + "3 2019-03-12 23:55:52.432895 2019-03-12 23:55:52.548391 \n", + "4 2019-03-12 23:55:52.432928 2019-03-12 23:55:52.555055 \n", + "\n", + " ended processed exitcode cancelled \\\n", + "0 2019-03-12 23:55:52.555101 2019-03-12 23:55:52.637417 0 False \n", + "1 2019-03-12 23:55:52.584082 2019-03-12 23:55:52.639257 0 False \n", + "2 2019-03-12 23:55:52.583016 2019-03-12 23:55:52.639970 0 False \n", + "3 2019-03-12 23:55:52.659204 2019-03-12 23:55:52.736533 0 False \n", + "4 2019-03-12 23:55:52.927651 2019-03-12 23:55:52.940752 0 False \n", + "\n", + " runtime output exception callback \n", + "0 0.012295 (8, 2223032) NaN NaN \n", + "1 0.022532 (57, 3311151) NaN NaN \n", + "2 0.015132 (4, 6338) NaN NaN \n", + "3 0.110813 (2037, 13147879277) NaN NaN \n", + "4 0.372596 (7541, 762292776) NaN NaN " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
qididnameprioritylanetimeoutfunctionargskwargssubmittedstartedendedprocessedexitcodecancelledruntimeoutputexceptioncallback
02db4adcb2/Users/donald/Desktop100NaN0enum_filesNaN{'path': '/Users/donald/Desktop', 'pattern': '*'}2019-03-12 23:55:52.4328772019-03-12 23:55:52.5428062019-03-12 23:55:52.5551012019-03-12 23:55:52.6374170False0.012295(8, 2223032)NaNNaN
12db4adcb5/Users/donald/Music100NaN0enum_filesNaN{'path': '/Users/donald/Music', 'pattern': '*'}2019-03-12 23:55:52.4329442019-03-12 23:55:52.5615502019-03-12 23:55:52.5840822019-03-12 23:55:52.6392570False0.022532(57, 3311151)NaNNaN
22db4adcb6/Users/donald/Pictures100NaN0enum_filesNaN{'path': '/Users/donald/Pictures', 'pattern': ...2019-03-12 23:55:52.4329602019-03-12 23:55:52.5678842019-03-12 23:55:52.5830162019-03-12 23:55:52.6399700False0.015132(4, 6338)NaNNaN
32db4adcb3/Users/donald/Documents100NaN0enum_filesNaN{'path': '/Users/donald/Documents', 'pattern':...2019-03-12 23:55:52.4328952019-03-12 23:55:52.5483912019-03-12 23:55:52.6592042019-03-12 23:55:52.7365330False0.110813(2037, 13147879277)NaNNaN
42db4adcb4/Users/donald/Downloads100NaN0enum_filesNaN{'path': '/Users/donald/Downloads', 'pattern':...2019-03-12 23:55:52.4329282019-03-12 23:55:52.5550552019-03-12 23:55:52.9276512019-03-12 23:55:52.9407520False0.372596(7541, 762292776)NaNNaN
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 29, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "# ezpq.Plot requires a dictionary of job data records.\n", + "records = df.to_dict(orient='records')\n", + "\n", + "plt = ezpq.Plot(records).build(color_by='name', bar_width=5)\n", + "\n", + "# plt.save('...')\n", + "plt" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 30, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 30, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Callback\n", + "\nIf instantiated with a callback, `ezpq.Queue` will pass a dictionary of job data as the single argument to the callback function. It does this synchronously, so the callback must not take too much time." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def slow_callback(x):\n", + " time.sleep(0.5)" + ], + "outputs": [], + "execution_count": 31, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers=n_workers, callback=slow_callback, poll=1) as Q:\n", + " for _ in range(n_inputs):\n", + " Q.put(time.sleep, 1)\n", + " Q.waitpb()\n", + " results = Q.collect()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "42eddf20694643e4882d46c520208800" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 32, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(results).build(show_legend=False)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 33, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 33, + "metadata": { + "inputHidden": false, + "outputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Error Handling" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def reciprocal(x):\n", + " time.sleep(1)\n", + " return 1/x\n", + "\n", + "with ezpq.Queue(n_workers=5, job_runner=threading.Thread) as Q:\n", + " for _ in range(20):\n", + " Q.put(reciprocal, random.randint(0, 5), suppress_errors=True)\n", + " Q.wait()\n", + " output = Q.collect()" + ], + "outputs": [], + "execution_count": 34, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "[x for x in output if x['exception'] is not None]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 35, + "data": { + "text/plain": [ + "[{'qid': '0cb0d2fb',\n", + " 'id': 9,\n", + " 'name': 9,\n", + " 'priority': 100,\n", + " 'lane': None,\n", + " 'timeout': 0,\n", + " 'function': 'reciprocal',\n", + " 'args': [0],\n", + " 'kwargs': None,\n", + " 'submitted': datetime.datetime(2019, 3, 12, 23, 56, 34, 672407),\n", + " 'started': datetime.datetime(2019, 3, 12, 23, 56, 35, 799073),\n", + " 'ended': datetime.datetime(2019, 3, 12, 23, 56, 36, 800570),\n", + " 'processed': datetime.datetime(2019, 3, 12, 23, 56, 36, 830566),\n", + " 'exitcode': -1,\n", + " 'cancelled': False,\n", + " 'runtime': 1.0014970302581787,\n", + " 'output': None,\n", + " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", + " 'callback': None},\n", + " {'qid': '0cb0d2fb',\n", + " 'id': 20,\n", + " 'name': 20,\n", + " 'priority': 100,\n", + " 'lane': None,\n", + " 'timeout': 0,\n", + " 'function': 'reciprocal',\n", + " 'args': [0],\n", + " 'kwargs': None,\n", + " 'submitted': datetime.datetime(2019, 3, 12, 23, 56, 34, 672626),\n", + " 'started': datetime.datetime(2019, 3, 12, 23, 56, 37, 858694),\n", + " 'ended': datetime.datetime(2019, 3, 12, 23, 56, 38, 862628),\n", + " 'processed': datetime.datetime(2019, 3, 12, 23, 56, 38, 883271),\n", + " 'exitcode': -1,\n", + " 'cancelled': False,\n", + " 'runtime': 1.0039341449737549,\n", + " 'output': None,\n", + " 'exception': 'Traceback (most recent call last):\\n File \"../ezpq/Queue.py\", line 578, in _job_wrap\\n out = _job.function(*args, **kwargs)\\n File \"\", line 3, in reciprocal\\n return 1/x\\nZeroDivisionError: division by zero\\n',\n", + " 'callback': None}]" + ] + }, + "metadata": {} + } + ], + "execution_count": 35, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "# for x in output:\n", + "# if x['exception'] is not None:\n", + "# x['exitcode'] = 1\n", + "# else:\n", + "# x['exitcode'] = 0" + ], + "outputs": [], + "execution_count": 36, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(output).build(color_by='exitcode', color_pal=['red', 'green'])" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 37, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 37, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Shared Objects" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def fibonacci(shared):\n", + " shared_clone = tuple(shared)\n", + " shared[0] = shared[1]\n", + " shared[1] = sum(shared_clone)\n", + " return shared[1]" + ], + "outputs": [], + "execution_count": 38, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import multiprocessing as mp\n", + "mgr = mp.Manager()\n", + "shared_list = mgr.list([0,1])\n", + "\n", + "with ezpq.Queue(n_workers=10) as Q:\n", + " for _ in range(20):\n", + " Q.put(fibonacci, args=shared_list)\n", + " Q.wait()\n", + " output = Q.collect()\n", + "\n", + "mgr.shutdown()\n", + " \n", + "[x['output'] for x in output]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 39, + "data": { + "text/plain": [ + "[1,\n", + " 2,\n", + " 3,\n", + " 5,\n", + " 8,\n", + " 13,\n", + " 21,\n", + " 34,\n", + " 55,\n", + " 89,\n", + " 144,\n", + " 233,\n", + " 377,\n", + " 610,\n", + " 987,\n", + " 1597,\n", + " 2584,\n", + " 4181,\n", + " 6765,\n", + " 10946]" + ] + }, + "metadata": {} + } + ], + "execution_count": 39, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "# New in v0.2.0" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## `tqdm` Integration" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Add `show_progress=True` to `ezpq.Queue()`; this works for `map()` and the `@decorator`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(5, show_progress=True) as Q:\n", + " output = Q.map(time.sleep, range(5))" + ], + "outputs": [], + "execution_count": 40, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "@ezpq.Queue(n_workers=5, show_progress=True)\n", + "def time_sleep(x):\n", + " time.sleep(x)\n", + "\noutput = time_sleep(range(5))" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "cd81600d87fa4ed6aab9a529d6479101" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 41, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Or add `show_progress=True` to `ezpq.map()`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(5) as Q:\n", + " output = Q.map(time.sleep, range(5), show_progress=True)" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "b30a962d637c4201802ad0eacaa1fc05" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 42, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Or add `show_progress=True` to `ezpq.Queue.wait()`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(5) as Q:\n", + " for i in range(5):\n", + " Q.put(time.sleep, i)\n", + " Q.wait(show_progress=True)\n", + " output = Q.collect()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "5a40789060c74495b10e6d26da9d6e1a" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 43, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Or just call `ezpq.Queue.waitpb()` in place of `ezpq.Queue.wait()`." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "from random import random" + ], + "outputs": [], + "execution_count": 44, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(5) as Q:\n", + " for i in range(50):\n", + " Q.put(time.sleep, random())\n", + " Q.waitpb()\n", + " output = Q.collect()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=50), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "1c9a2e6991c34205acc58566cb6f0733" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 45, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## map" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "def my_pow(x, k):\n", + " return '{}^{} = {}'.format(x, k, x**k)\n", + "\n\n", + "with ezpq.Queue(10) as Q:\n", + " # raise all elements of `iterable` to power 2.\n", + " output = Q.map(my_pow, iterable=range(100), args=2)\n", + " \n", + "[x['output'] for x in output[:10]]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 46, + "data": { + "text/plain": [ + "['0^2 = 0',\n", + " '1^2 = 1',\n", + " '2^2 = 4',\n", + " '3^2 = 9',\n", + " '4^2 = 16',\n", + " '5^2 = 25',\n", + " '6^2 = 36',\n", + " '7^2 = 49',\n", + " '8^2 = 64',\n", + " '9^2 = 81']" + ] + }, + "metadata": {} + } + ], + "execution_count": 46, + "metadata": { + "scrolled": true + } + }, + { + "cell_type": "markdown", + "source": [ + "## starmap\n", + "\niterate over a list of lists." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "# list of lists to iterate over.\n", + "args_list = [( x, x%4 ) # (x, k)\n", + " for x in range(100)]\n", + "\nargs_list[:10]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 47, + "data": { + "text/plain": [ + "[(0, 0),\n", + " (1, 1),\n", + " (2, 2),\n", + " (3, 3),\n", + " (4, 0),\n", + " (5, 1),\n", + " (6, 2),\n", + " (7, 3),\n", + " (8, 0),\n", + " (9, 1)]" + ] + }, + "metadata": {} + } + ], + "execution_count": 47, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(10) as Q:\n", + " output = Q.starmap(my_pow, iterable=args_list)\n", + " \n", + "[x['output'] for x in output[:10]]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 48, + "data": { + "text/plain": [ + "['0^0 = 1',\n", + " '1^1 = 1',\n", + " '2^2 = 4',\n", + " '3^3 = 27',\n", + " '4^0 = 1',\n", + " '5^1 = 5',\n", + " '6^2 = 36',\n", + " '7^3 = 343',\n", + " '8^0 = 1',\n", + " '9^1 = 9']" + ] + }, + "metadata": {} + } + ], + "execution_count": 48, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## starmapkw\n", + "\n`starmap`, but with a list of dicts." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "# list of dicts to iterate over.\n", + "kwargs_list = [{ 'x':x, 'k':x%4 } # (x, k)\n", + " for x in range(100)]\n", + "\nkwargs_list[:10]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 49, + "data": { + "text/plain": [ + "[{'x': 0, 'k': 0},\n", + " {'x': 1, 'k': 1},\n", + " {'x': 2, 'k': 2},\n", + " {'x': 3, 'k': 3},\n", + " {'x': 4, 'k': 0},\n", + " {'x': 5, 'k': 1},\n", + " {'x': 6, 'k': 2},\n", + " {'x': 7, 'k': 3},\n", + " {'x': 8, 'k': 0},\n", + " {'x': 9, 'k': 1}]" + ] + }, + "metadata": {} + } + ], + "execution_count": 49, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(10) as Q:\n", + " output = Q.starmapkw(my_pow, iterable=kwargs_list)\n", + "\n[x['output'] for x in output[:10]]" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 50, + "data": { + "text/plain": [ + "['0^0 = 1',\n", + " '1^1 = 1',\n", + " '2^2 = 4',\n", + " '3^3 = 27',\n", + " '4^0 = 1',\n", + " '5^1 = 5',\n", + " '6^2 = 36',\n", + " '7^3 = 343',\n", + " '8^0 = 1',\n", + " '9^1 = 9']" + ] + }, + "metadata": {} + } + ], + "execution_count": 50, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Synchronous Lanes" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "jobs = {\n", + " 'Lane 1': [1,2,3],\n", + " 'Lane 2': [4,5,6],\n", + " 'Lane 3': [7,8,9],\n", + " 'Lane 4': [10,11,12],\n", + " 'Lane 5': [13,14,15],\n", + " 'Lane 6': [16,17,18]\n", + "}" + ], + "outputs": [], + "execution_count": 51, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(n_workers=3) as Q:\n", + " for lane in jobs.keys():\n", + " for job in jobs[lane]:\n", + " Q.put(time.sleep, args=1,\n", + " lane=lane, name='Job '+str(job))\n", + " Q.waitpb()\n", + " output = Q.collect()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=18), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "062e5be2c86940c1b84bafedffaac745" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 52, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(output).build(color_by='lane',\n", + " color_pal=['blue', 'red', 'green',\n", + " 'purple', 'orange', 'brown'])" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 53, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 53, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Set the lane based on an expression." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "with ezpq.Queue(5) as Q:\n", + " for i in range(20):\n", + " Q.put(time.sleep, random(),\n", + " lane = i % 5)\n", + " \n", + " Q.waitpb()\n", + " output = Q.collect()\n", + " \n", + "ezpq.Plot(output).build(color_by='lane', theme='dark')" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "aa04c73090ab446da549715c23327b07" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 54, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 54, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Plot Facetting" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "all_output = list()\n", + "\n", + "for qid in [1, 2, 3]:\n", + " with ezpq.Queue(5, qid='queue_' + str(qid)) as Q:\n", + " for i in range(20):\n", + " lane = i % 5\n", + " Q.put(time.sleep, args=1,\n", + " lane=lane, name='Job '+str(job))\n", + " Q.waitpb()\n", + " all_output.extend( Q.collect() )" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "0ef8fe4c9f9a4a53b45d4a56166bb9ea" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "020ac7a17bbb441eacc238278285007d" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "99151c04267f47bf9d179566af90ab4e" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "execution_count": 55, + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(all_output).build(facet_by='qid',\n", + " color_by='lane',\n", + " color_pal=['blue', 'orange', 'green',\n", + " 'red', 'purple'])" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 56, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 56, + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Plot Themes & Colors" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "ezpq.Plot(all_output).build(facet_by='qid', theme='dark',\n", + " color_by='lane',\n", + " color_pal=['cyan', 'orange', 'lime',\n", + " 'red', 'yellow'])" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "execution_count": 57, + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "execution_count": 57, + "metadata": {} } - ], - "source": [ - "with ezpq.Queue(5) as Q:\n", - " for i in range(20):\n", - " Q.put(time.sleep, random(),\n", - " lane = i % 5)\n", - " \n", - " Q.waitpb()\n", - " output = Q.collect()\n", - " \n", - "ezpq.Plot(output).build(color_by='lane', theme='dark')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plot Facetting" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "821725d7a29e487a874721c49a446d6b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6bb1082eb6bf4205848e512f9d7244f2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c3077386a89d4e6a856dd06a19ab70c6", - "version_major": 2, - "version_minor": 0 + ], + "metadata": { + "kernel_info": { + "name": "mypy36" + }, + "kernelspec": { + "name": "mypy36", + "language": "python", + "display_name": "mypy36" + }, + "language_info": { + "name": "python", + "version": "3.6.6", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=20), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "all_output = list()\n", - "\n", - "for qid in [1, 2, 3]:\n", - " with ezpq.Queue(5, qid='queue_' + str(qid)) as Q:\n", - " for i in range(20):\n", - " lane = i % 5\n", - " Q.put(time.sleep, args=1,\n", - " lane=lane, name='Job '+str(job))\n", - " Q.waitpb()\n", - " all_output.extend( Q.collect() )" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ezpq.Plot(all_output).build(facet_by='qid',\n", - " color_by='lane',\n", - " color_pal=['blue', 'orange', 'green',\n", - " 'red', 'purple'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plot Themes & Colors" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "nteract": { + "version": "0.12.3" } - ], - "source": [ - "ezpq.Plot(all_output).build(facet_by='qid', theme='dark',\n", - " color_by='lane',\n", - " color_pal=['cyan', 'orange', 'lime',\n", - " 'red', 'yellow'])" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "mypy36" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" }, - "nteract": { - "version": "0.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/ezpq/Queue.py b/ezpq/Queue.py index 7d60796..4d84fdd 100644 --- a/ezpq/Queue.py +++ b/ezpq/Queue.py @@ -685,6 +685,40 @@ def map(self, function, iterable, args=None, kwargs=None, timeout=0, show_progre return self.collect() + def starmap(self, function, iterable, args=None, kwargs=None, timeout=0, show_progress=False): + + assert hasattr(iterable, '__iter__') + + if args is None: + args = [] + elif not isinstance(args, list): + args = list(args) + + for x in iterable: + job = ezpq.Job(function=function, args=list(x) + args, kwargs=kwargs, timeout=timeout) + self.submit(job) + + self.wait(show_progress=show_progress) + + return self.collect() + + def starmapkw(self, function, iterable, args=None, kwargs=None, timeout=0, show_progress=False): + + assert hasattr(iterable, '__iter__') + + if kwargs is None: + kwargs = {} + elif not isinstance(kwargs, dict): + kwargs = dict(kwargs) + + for x in iterable: + job = ezpq.Job(function=function, args=args, kwargs={**x, **kwargs}, timeout=timeout) + self.submit(job) + + self.wait(show_progress=show_progress) + + return self.collect() + def get(self, wait=False, poll=0, timeout=0): """Pops the highest priority item from the completed queue. diff --git a/ezpq/__init__.py b/ezpq/__init__.py index 9accef2..0796f9d 100644 --- a/ezpq/__init__.py +++ b/ezpq/__init__.py @@ -1,5 +1,5 @@ -__version__ = "0.2.1" +__version__ = "0.2.2" from .Job import Job from .Queue import Queue -from .Plot import Plot \ No newline at end of file +from .Plot import Plot diff --git a/setup.py b/setup.py index 2bfa21c..a112649 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ezpq", - version="0.2.1", + version="0.2.2", #download_url = 'https://github.com/dm3ll3n/ezpq/releases/download/0.2.0/ezpq-0.2.0.tar.gz', description="an easy parallel queueing system", long_description=README, diff --git a/test/test_processes.py b/test/test_processes.py index 87f35fb..e305507 100644 --- a/test/test_processes.py +++ b/test/test_processes.py @@ -22,11 +22,11 @@ def setUp(self): def test_priority(self): self.Q._stop() - + for i,x in enumerate(self.input): self.Q.put(function=return_me, args=x, priority=-i) # should result in reversed inputs. - + self.Q.start() self.Q.wait() @@ -34,13 +34,27 @@ def test_priority(self): self.assertEqual(tuple(reversed(self.input)), out_list) - def test_map(self): + def test_map(self): job_data = self.Q.map(function=return_me, iterable=self.input) out_list = tuple(job['output'] for job in job_data) self.assertEqual(self.input, out_list) + def test_starmap(self): + job_data = self.Q.starmap(function=return_me, iterable=[[x] for x in self.input]) + + out_list = tuple(job['output'] for job in job_data) + + self.assertEqual(self.input, out_list) + + def test_starmapkw(self): + job_data = self.Q.starmapkw(function=return_me, iterable=[{'x': x} for x in self.input]) + + out_list = tuple(job['output'] for job in job_data) + + self.assertEqual(self.input, out_list) + def test_lanes(self): for i, x in enumerate(self.input): self.Q.put(function=return_me, args=x, diff --git a/test/test_threads.py b/test/test_threads.py index a0839ed..b729e67 100644 --- a/test/test_threads.py +++ b/test/test_threads.py @@ -22,11 +22,11 @@ def setUp(self): def test_priority(self): self.Q._stop() - + for i,x in enumerate(self.input): self.Q.put(function=return_me, args=x, priority=-i) # should result in reversed inputs. - + self.Q.start() self.Q.wait() @@ -34,13 +34,27 @@ def test_priority(self): self.assertEqual(tuple(reversed(self.input)), out_list) - def test_map(self): + def test_map(self): job_data = self.Q.map(function=return_me, iterable=self.input) out_list = tuple(job['output'] for job in job_data) self.assertEqual(self.input, out_list) + def test_starmap(self): + job_data = self.Q.starmap(function=return_me, iterable=[[x] for x in self.input]) + + out_list = tuple(job['output'] for job in job_data) + + self.assertEqual(self.input, out_list) + + def test_starmapkw(self): + job_data = self.Q.starmapkw(function=return_me, iterable=[{'x': x} for x in self.input]) + + out_list = tuple(job['output'] for job in job_data) + + self.assertEqual(self.input, out_list) + def test_lanes(self): for i, x in enumerate(self.input): self.Q.put(function=return_me, args=x,