From 97cf3819035486628b2dcc2ad03e3b427fbf8046 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Thu, 19 Mar 2020 11:34:54 -0700 Subject: [PATCH] feat: add pure python implementation (#20) * feat: add pure-python implmentation * feat: extract most of checksum to shared base class * chore: move setup.py static settings to setup.cfg * Update src/crc32c/_checksum.py Co-Authored-By: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> * fix: move trove classifier to beta, add python_requires Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> --- setup.cfg | 44 ++++++++++++++ setup.py | 52 ++++++----------- src/crc32c/__init__.py | 116 ++++++------------------------------- src/crc32c/_checksum.py | 86 ++++++++++++++++++++++++++++ src/crc32c/cffi.py | 72 +++++++++++++++++++++++ src/crc32c/python.py | 124 ++++++++++++++++++++++++++++++++++++++++ tests/test___init__.py | 37 ++++++++---- 7 files changed, 389 insertions(+), 142 deletions(-) create mode 100644 setup.cfg create mode 100644 src/crc32c/_checksum.py create mode 100644 src/crc32c/cffi.py create mode 100644 src/crc32c/python.py diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..ac71ac5c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,44 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[metadata] +name = google-crc32c +version = 0.0.2 +description = A python wrapper of the C library 'Google CRC32C' +url = https://github.com/googleapis/python-crc32c +long_description = file: README.md +long_description_content_type = text/markdown +author = Google LLC +author_email = googleapis-packages@google.com + +license = Apache 2.0 +platforms = Posix, MacOS X, Windows +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + License :: OSI Approved :: Apache Software License + Operating System :: OS Independent + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + +[options] +zip_safe = True +python_requires = >=3.5 + +[options.extras_require] +testing = pytest + diff --git a/setup.py b/setup.py index e0b81ab4..3230a163 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ import setuptools import setuptools.command.build_ext - +import warnings _EXTRA_DLL = "extra-dll" _DLL_FILENAME = "crc32c.dll" @@ -46,48 +46,34 @@ def run(self): return result -def main(): +def main(build_cffi=True): build_path = os.path.join("src", "crc32c_build.py") builder = "{}:FFIBUILDER".format(build_path) cffi_dep = "cffi >= 1.0.0" - with io.open("README.md", encoding="utf-8") as readme_file: - readme = readme_file.read() - setuptools.setup( - name="google-crc32c", - version="0.0.2", - description="A python wrapper of the C library 'Google CRC32C'", - long_description=readme, - long_description_content_type="text/markdown", - author="Google LLC", - author_email="googleapis-packages@oogle.com", - scripts=(), - url="https://github.com/googleapis/python-crc32c", packages=["crc32c"], package_dir={"": "src"}, - license="Apache 2.0", - platforms="Posix; MacOS X; Windows", package_data={"crc32c": [os.path.join(_EXTRA_DLL, _DLL_FILENAME)]}, - zip_safe=True, - setup_requires=[cffi_dep], - cffi_modules=[builder], - install_requires=[cffi_dep], - classifiers=[ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - ], + setup_requires=[cffi_dep] if build_cffi else [], + cffi_modules=[builder] if build_cffi else [], + install_requires=[cffi_dep] if build_cffi else [], cmdclass={"build_ext": BuildExtWithDLL}, - extras_require={"testing": ["pytest"]}, ) if __name__ == "__main__": - main() + import sys + try: + main() + except KeyboardInterrupt: + raise + except SystemExit: + # If installation fails, it is likely a compilation error with CFFI + # Try to install again. + warnings.warn( + "Compiling the CFFI Extension crc32c has failed. Only a pure " + "python implementation will be usable." + ) + main(build_cffi=False) + diff --git a/src/crc32c/__init__.py b/src/crc32c/__init__.py index 1a224e7d..b44591c9 100644 --- a/src/crc32c/__init__.py +++ b/src/crc32c/__init__.py @@ -12,106 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import struct +import warnings -# NOTE: ``__config__`` **must** be the first import because it (may) -# modify the search path used to locate shared libraries. -import crc32c.__config__ -import crc32c._crc32c_cffi +_SLOW_CRC32C_WARNING = ( + "As the c extension couldn't be imported, `google-crc32c` is using a " + "pure python implementation that is significantly slower. If possible, " + "please configure a c build environment and compile the extension" +) +# If available, default to CFFI Implementation, otherwise, use pure python. +try: + from crc32c import cffi as _crc32c + implementation = "cffi" +except ImportError: + from crc32c import python as _crc32c + warnings.warn(RuntimeWarning, "_SLOW_CRC32C_WARNING",) + implementation = "python" -def extend(crc, chunk): - """Update an existing CRC checksum with new chunk of data. +extend = _crc32c.extend +value = _crc32c.value - Args - crc (int): An existing CRC check sum. - chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. - Intended to be a byte string or similar. +Checksum = _crc32c.Checksum - Returns - int: New CRC checksum computed by extending existing CRC - with ``chunk``. - """ - return crc32c._crc32c_cffi.lib.crc32c_extend(crc, chunk, len(chunk)) - - -def value(chunk): - """Compute a CRC checksum for a chunk of data. - - Args - chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. - Intended to be a byte string or similar. - - Returns - int: New CRC checksum computed for ``chunk``. - """ - return crc32c._crc32c_cffi.lib.crc32c_value(chunk, len(chunk)) - - -class Checksum(object): - """Hashlib-alike helper for CRC32C operations. - - Args: - initial_value (Optional[bytes]): the initial chunk of data from - which the CRC32C checksum is computed. Defaults to b''. - """ - - __slots__ = ("_crc",) - - def __init__(self, initial_value=b""): - self._crc = value(initial_value) - - def update(self, chunk): - """Update the checksum with a new chunk of data. - - Args: - chunk (Optional[bytes]): a chunk of data used to extend - the CRC32C checksum. - """ - self._crc = extend(self._crc, chunk) - - def digest(self): - """Big-endian order, per RFC 4960. - - See: https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c - - Returns: - bytes: An eight-byte digest string. - """ - return struct.pack(">L", self._crc) - - def hexdigest(self): - """Like :meth:`digest` except returns as a bytestring of double length. - - Returns - bytes: A sixteen byte digest string, contaiing only hex digits. - """ - return "{:08x}".format(self._crc).encode("ascii") - - def copy(self): - """Create another checksum with the same CRC32C value. - - Returns: - Checksum: the new instance. - """ - clone = self.__class__() - clone._crc = self._crc - return clone - - def consume(self, stream, chunksize): - """Consume chunks from a stream, extending our CRC32 checksum. - - Args: - stream (BinaryIO): the stream to consume. - chunksize (int): the size of the read to perform - - Returns: - Generator[bytes, None, None]: Tterable of the chunks read from the - stream. - """ - while True: - chunk = stream.read(chunksize) - if not chunk: - break - self._crc = extend(self._crc, chunk) - yield chunk +__all__ = ["extend", "value", "Checksum", "implementation"] diff --git a/src/crc32c/_checksum.py b/src/crc32c/_checksum.py new file mode 100644 index 00000000..fa299c7c --- /dev/null +++ b/src/crc32c/_checksum.py @@ -0,0 +1,86 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import struct + + +class CommonChecksum(object): + """Hashlib-alike helper for CRC32C operations. + + This class should not be used directly and requires an update implementation. + + Args: + initial_value (Optional[bytes]): the initial chunk of data from + which the CRC32C checksum is computed. Defaults to b''. + """ + + def __init__(self, initial_value=b""): + self._crc = 0 + if initial_value != b"": + self.update(initial_value) + + def update(self, data): + """Update the checksum with a new chunk of data. + + Args: + chunk (Optional[bytes]): a chunk of data used to extend + the CRC32C checksum. + """ + raise NotImplemented() + + def digest(self): + """Big-endian order, per RFC 4960. + + See: https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c + + Returns: + bytes: An eight-byte digest string. + """ + return struct.pack(">L", self._crc) + + def hexdigest(self): + """Like :meth:`digest` except returns as a bytestring of double length. + + Returns + bytes: A sixteen byte digest string, contaiing only hex digits. + """ + return "{:08x}".format(self._crc).encode("ascii") + + def copy(self): + """Create another checksum with the same CRC32C value. + + Returns: + Checksum: the new instance. + """ + clone = self.__class__() + clone._crc = self._crc + return clone + + def consume(self, stream, chunksize): + """Consume chunks from a stream, extending our CRC32 checksum. + + Args: + stream (BinaryIO): the stream to consume. + chunksize (int): the size of the read to perform + + Returns: + Generator[bytes, None, None]: Iterable of the chunks read from the + stream. + """ + while True: + chunk = stream.read(chunksize) + if not chunk: + break + self.update(chunk) + yield chunk diff --git a/src/crc32c/cffi.py b/src/crc32c/cffi.py new file mode 100644 index 00000000..a2899f15 --- /dev/null +++ b/src/crc32c/cffi.py @@ -0,0 +1,72 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import struct + +# NOTE: ``__config__`` **must** be the first import because it (may) +# modify the search path used to locate shared libraries. +import crc32c.__config__ +import crc32c._crc32c_cffi +from crc32c._checksum import CommonChecksum + + +def extend(crc, chunk): + """Update an existing CRC checksum with new chunk of data. + + Args + crc (int): An existing CRC check sum. + chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. + Intended to be a byte string or similar. + + Returns + int: New CRC checksum computed by extending existing CRC + with ``chunk``. + """ + return crc32c._crc32c_cffi.lib.crc32c_extend(crc, chunk, len(chunk)) + + +def value(chunk): + """Compute a CRC checksum for a chunk of data. + + Args + chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. + Intended to be a byte string or similar. + + Returns + int: New CRC checksum computed for ``chunk``. + """ + return crc32c._crc32c_cffi.lib.crc32c_value(chunk, len(chunk)) + + +class Checksum(CommonChecksum): + """Hashlib-alike helper for CRC32C operations. + + Args: + initial_value (Optional[bytes]): the initial chunk of data from + which the CRC32C checksum is computed. Defaults to b''. + """ + + __slots__ = ("_crc",) + + def __init__(self, initial_value=b""): + self._crc = value(initial_value) + + def update(self, chunk): + """Update the checksum with a new chunk of data. + + Args: + chunk (Optional[bytes]): a chunk of data used to extend + the CRC32C checksum. + """ + self._crc = extend(self._crc, chunk) diff --git a/src/crc32c/python.py b/src/crc32c/python.py new file mode 100644 index 00000000..9746b694 --- /dev/null +++ b/src/crc32c/python.py @@ -0,0 +1,124 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import array +import struct + +from crc32c._checksum import CommonChecksum + + +def extend(crc, chunk): + """Update an existing CRC checksum with new chunk of data. + + Args + crc (int): An existing CRC check sum. + chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. + Intended to be a byte string or similar. + + Returns + int: New CRC checksum computed by extending existing CRC + with ``chunk``. + """ + c = Checksum() + c._crc = crc + c.update(chunk) + return c._crc + + +def value(chunk): + """Compute a CRC checksum for a chunk of data. + + Args + chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data. + Intended to be a byte string or similar. + + Returns + int: New CRC checksum computed for ``chunk``. + """ + c = Checksum() + c.update(chunk) + return c._crc + + +class Checksum(CommonChecksum): + """Hashlib-alike helper for CRC32C operations. + + Args: + initial_value (Optional[bytes]): the initial chunk of data from + which the CRC32C checksum is computed. Defaults to b''. + """ + + def __init__(self, initial_value=b""): + self._crc = 0 + if initial_value != b"": + self.update(initial_value) + + def update(self, data): + """Update the checksum with a new chunk of data. + + Args: + chunk (Optional[bytes]): a chunk of data used to extend + the CRC32C checksum. + """ + if type(data) != array.array or data.itemsize != 1: + buffer = array.array("B", data) + else: + buffer = data + self._crc = self._crc ^ 0xFFFFFFFF + for b in buffer: + table_poly = _TABLE[(b ^ self._crc) & 0xFF] + self._crc = table_poly ^ ((self._crc >> 8) & 0xFFFFFFFF) + self._crc = self._crc ^ 0xFFFFFFFF + + +# fmt:off +_TABLE = [ + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, + 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, + 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c, 0xf165b798, 0x30e349b, 0xd7c45070, + 0x25afd373, 0x36ff2087, 0xc494a384, 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, + 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x61c6936, 0xf477ea35, 0xaa64d611, 0x580f5512, + 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 0x30e349b1, + 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x5125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, + 0x6ef07595, 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, + 0x67dafa54, 0x95b17957, 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0xc38d26c, + 0xfe53516f, 0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, + 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0xf36e6f7, 0x61c69362, 0x93ad1061, + 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 0xeb1fcbad, + 0x197448ae, 0xa24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, + 0xa55230e6, 0xfb410cc2, 0x92a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, + 0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, + 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x82f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 0x92a8fc17, 0x60c37f14, 0x73938ce0, + 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, + 0xf94ad42f, 0xb21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, + 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0xe330a81, + 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, + 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, + 0xd3d3e1a, 0x1e6dcdee, 0xec064eed, 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, + 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8, + 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x7198540, 0x590ab964, + 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x20bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, + 0x37faccf1, 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, + 0x4f48173d, 0xbd23943e, 0xf36e6f75, 0x105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, + 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 +] diff --git a/tests/test___init__.py b/tests/test___init__.py index de8f276a..b71791c0 100644 --- a/tests/test___init__.py +++ b/tests/test___init__.py @@ -19,7 +19,6 @@ import crc32c - EMPTY = b"" EMPTY_CRC = 0x00000000 @@ -193,27 +192,43 @@ def test_value(chunk, expected): assert crc32c.value(chunk) == expected +def pytest_generate_tests(metafunc): + if "_crc32c" in metafunc.fixturenames: + metafunc.parametrize("_crc32c", ["python", "cffi"], indirect=True) + +@pytest.fixture +def _crc32c(request): + if request.param == "python": + from crc32c import python + return python + elif request.param == "cffi": + from crc32c import cffi + return cffi + else: + raise ValueError("invalid internal test config") + + class TestChecksum(object): @staticmethod - def test_ctor_defaults(): + def test_ctor_defaults(_crc32c): helper = crc32c.Checksum() assert helper._crc == 0 @staticmethod - def test_ctor_explicit(): + def test_ctor_explicit(_crc32c): chunk = b"DEADBEEF" helper = crc32c.Checksum(chunk) assert helper._crc == crc32c.value(chunk) @staticmethod - def test_update(): + def test_update(_crc32c): chunk = b"DEADBEEF" helper = crc32c.Checksum() helper.update(chunk) assert helper._crc == crc32c.value(chunk) @staticmethod - def test_update_w_multiple_chunks(): + def test_update_w_multiple_chunks(_crc32c): helper = crc32c.Checksum() for index in itertools.islice(range(ISCSI_LENGTH), 0, None, 7): @@ -223,29 +238,29 @@ def test_update_w_multiple_chunks(): assert helper._crc == ISCSI_CRC @staticmethod - def test_digest_zero(): + def test_digest_zero(_crc32c): helper = crc32c.Checksum() assert helper.digest() == b"\x00" * 4 @staticmethod - def test_digest_nonzero(): + def test_digest_nonzero(_crc32c): helper = crc32c.Checksum() helper._crc = 0x01020304 assert helper.digest() == b"\x01\x02\x03\x04" @staticmethod - def test_hexdigest_zero(): + def test_hexdigest_zero(_crc32c): helper = crc32c.Checksum() assert helper.hexdigest() == b"00" * 4 @staticmethod - def test_hexdigest_nonzero(): + def test_hexdigest_nonzero(_crc32c): helper = crc32c.Checksum() helper._crc = 0x091A3B2C assert helper.hexdigest() == b"091a3b2c" @staticmethod - def test_copy(): + def test_copy(_crc32c): chunk = b"DEADBEEF" helper = crc32c.Checksum(chunk) clone = helper.copy() @@ -255,7 +270,7 @@ def test_copy(): @staticmethod @pytest.mark.parametrize("chunksize", [1, 3, 5, 7, 11, 13, ISCSI_LENGTH]) - def test_consume_stream(chunksize): + def test_consume_stream(_crc32c, chunksize): helper = crc32c.Checksum() expected = list(iscsi_chunks(chunksize)) stream = mock.Mock(spec=["read"])