From c38a5761d128e368208d7d82ef43611efa2ba457 Mon Sep 17 00:00:00 2001 From: AlexNg Date: Tue, 12 Dec 2023 13:26:41 +0800 Subject: [PATCH 1/5] + Added pure python chunk spliting implementation --- src/thread/utils/algorithm.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 src/thread/utils/algorithm.py diff --git a/src/thread/utils/algorithm.py b/src/thread/utils/algorithm.py new file mode 100644 index 0000000..1a20af5 --- /dev/null +++ b/src/thread/utils/algorithm.py @@ -0,0 +1,53 @@ +""" +This file shall host the shared algorithms + +If it gets too dense, we could consider splitting it into a library import +|_ algorithm/ + |_ __init__.py + |_ a.py + |_ b.py +""" + +from typing import List, Sequence, Any + + +def chunk_split(dataset: Sequence[Any], number_of_chunks: int) -> List[List[Any]]: + """ + Splits a dataset into balanced chunks + + If the size of the dataset is not fully divisible by the number of chunks, it is split like this + > `[ [n+1], [n+1], [n+1], [n], [n], [n] ]` + + + Parameters + ---------- + :param dataset: This should be the dataset you want to split into chunks + :param number_of_chunks: The should be the number of chunks it will attempt to split into + + + Returns + ------- + :returns list[list[Any]]: The split dataset + + Raises + ------ + AssertionError: The number of chunks specified is larger than the dataset size + """ + length = len(dataset) + assert length >= number_of_chunks, 'The number of chunks specified is larger than the dataset size' + + chunk_count = length // number_of_chunks + overflow = length % number_of_chunks + + i = 0 + split = [] + while i < length: + chunk_length = chunk_count + int(overflow > 0) + b = i + chunk_length + + split.append(dataset[i:b]) + overflow -= 1 + i = b + + return split + From 3ca6a04ad2155e9417cdc1e6c4753831ae895cf7 Mon Sep 17 00:00:00 2001 From: AlexNg Date: Tue, 12 Dec 2023 13:27:59 +0800 Subject: [PATCH 2/5] + Export algorithm to util direcotry --- src/thread/utils/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/thread/utils/__init__.py b/src/thread/utils/__init__.py index 2906842..16e9e45 100644 --- a/src/thread/utils/__init__.py +++ b/src/thread/utils/__init__.py @@ -4,3 +4,7 @@ from .logging_config import ColorLogger from .config import Settings + +from . import ( + algorithm, +) From 5657f4a182bef2345f28992ee258057178d40114 Mon Sep 17 00:00:00 2001 From: AlexNg Date: Tue, 12 Dec 2023 13:29:52 +0800 Subject: [PATCH 3/5] + Chunk spliting integration --- src/thread/thread.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/thread/thread.py b/src/thread/thread.py index 5562683..6c0512e 100644 --- a/src/thread/thread.py +++ b/src/thread/thread.py @@ -3,9 +3,9 @@ import signal import threading -import numpy from . import exceptions from .utils.config import Settings +from .utils.algorithm import chunk_split from functools import wraps from typing import ( @@ -509,10 +509,10 @@ def start(self) -> None: name_format = self.overflow_kwargs.get('name') and self.overflow_kwargs['name'] + '%s' self.overflow_kwargs = { i: v for i,v in self.overflow_kwargs.items() if i != 'name' and i != 'args' } - for i, data_chunk in enumerate(numpy.array_split(self.dataset, max_threads)): + for i, data_chunk in enumerate(chunk_split(self.dataset, max_threads)): chunk_thread = Thread( target = self.function, - args = [i, data_chunk.tolist(), *parsed_args, *self.overflow_args], + args = [i, data_chunk, *parsed_args, *self.overflow_args], name = name_format and name_format % i or None, **self.overflow_kwargs ) From 61eb61550511e5e8a881a145c367e14b29808fd2 Mon Sep 17 00:00:00 2001 From: AlexNg Date: Tue, 12 Dec 2023 13:40:21 +0800 Subject: [PATCH 4/5] + Pure python integration --- tests/test_parallelprocessing.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_parallelprocessing.py b/tests/test_parallelprocessing.py index 8686b59..53b68ac 100644 --- a/tests/test_parallelprocessing.py +++ b/tests/test_parallelprocessing.py @@ -1,5 +1,4 @@ import time -import numpy import pytest from src.thread import ParallelProcessing, exceptions @@ -19,7 +18,7 @@ def _dummy_raiseException(x: Exception, delay: float = 0): # >>>>>>>>>> General Use <<<<<<<<<< # def test_threadsScaleDown(): """This test is for testing if threads scale down `max_threads` when the dataset is lesser than the thread count""" - dataset = numpy.arange(0, 2).tolist() + dataset = list(range(0, 2)) new = ParallelProcessing( function = _dummy_dataProcessor, dataset = dataset, @@ -32,7 +31,7 @@ def test_threadsScaleDown(): def test_threadsProcessing(): """This test is for testing if threads correctly order data in the `dataset` arrangement""" - dataset = numpy.arange(0, 500).tolist() + dataset = list(range(0, 500)) new = ParallelProcessing( function = _dummy_dataProcessor, dataset = dataset, @@ -48,7 +47,7 @@ def test_threadsProcessing(): # >>>>>>>>>> Raising Exceptions <<<<<<<<<< # def test_raises_StillRunningError(): """This test should raise ThreadStillRunningError""" - dataset = numpy.arange(0, 8).tolist() + dataset = list(range(0, 8)) new = ParallelProcessing( function = _dummy_dataProcessor, dataset = dataset, From 8fd7c0782086235e299ed228900bfa759040e888 Mon Sep 17 00:00:00 2001 From: AlexNg Date: Tue, 12 Dec 2023 13:42:44 +0800 Subject: [PATCH 5/5] - Removed numpy and typing-extensions --- poetry.lock | 47 +---------------------------------------------- pyproject.toml | 2 -- 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/poetry.lock b/poetry.lock index 50261dc..68df6df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -149,51 +149,6 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -[[package]] -name = "numpy" -version = "1.26.2" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"}, - {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"}, - {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"}, - {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"}, - {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"}, - {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"}, - {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"}, - {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"}, - {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"}, - {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"}, - {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"}, - {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"}, - {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"}, - {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"}, - {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"}, - {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"}, - {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"}, - {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"}, - {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"}, - {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"}, - {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"}, - {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"}, - {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"}, - {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"}, - {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"}, - {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"}, - {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"}, - {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"}, - {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"}, - {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"}, - {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"}, - {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"}, - {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"}, - {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, -] - [[package]] name = "packaging" version = "23.2" @@ -361,4 +316,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "a2958d3bed5f28eddd1a0fea932d032a0df6fa534edb29d0094f6a5f6b8bd12f" +content-hash = "43238a4a9b3c3d7aa608a6cbd6a95dd47889b7edfeb392db46529c2389c9c120" diff --git a/pyproject.toml b/pyproject.toml index f61a664..5df4095 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,6 @@ thread = "thread.__main__:app" [tool.poetry.dependencies] python = "^3.9" -numpy = "^1.26.2" -typing-extensions = "^4.8.0" typer = {extras = ["all"], version = "^0.9.0"}