Skip to content

Commit

Permalink
feat: add pure python implementation (#20)
Browse files Browse the repository at this point in the history
* feat: add pure-python implmentation

* feat: extract most of checksum to shared base class

* chore: move setup.py static settings to setup.cfg

* Update src/crc32c/_checksum.py

Co-Authored-By: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com>

* fix: move trove classifier to beta, add python_requires


Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com>
  • Loading branch information
crwilcox and busunkim96 committed Mar 19, 2020
1 parent 8a238c3 commit 97cf381
Show file tree
Hide file tree
Showing 7 changed files with 389 additions and 142 deletions.
44 changes: 44 additions & 0 deletions setup.cfg
@@ -0,0 +1,44 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[metadata]
name = google-crc32c
version = 0.0.2
description = A python wrapper of the C library 'Google CRC32C'
url = https://github.com/googleapis/python-crc32c
long_description = file: README.md
long_description_content_type = text/markdown
author = Google LLC
author_email = googleapis-packages@google.com

license = Apache 2.0
platforms = Posix, MacOS X, Windows
classifiers =
Development Status :: 4 - Beta
Intended Audience :: Developers
License :: OSI Approved :: Apache Software License
Operating System :: OS Independent
Programming Language :: Python :: 3
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8

[options]
zip_safe = True
python_requires = >=3.5

[options.extras_require]
testing = pytest

52 changes: 19 additions & 33 deletions setup.py
Expand Up @@ -18,7 +18,7 @@

import setuptools
import setuptools.command.build_ext

import warnings

_EXTRA_DLL = "extra-dll"
_DLL_FILENAME = "crc32c.dll"
Expand Down Expand Up @@ -46,48 +46,34 @@ def run(self):
return result


def main():
def main(build_cffi=True):
build_path = os.path.join("src", "crc32c_build.py")
builder = "{}:FFIBUILDER".format(build_path)
cffi_dep = "cffi >= 1.0.0"

with io.open("README.md", encoding="utf-8") as readme_file:
readme = readme_file.read()

setuptools.setup(
name="google-crc32c",
version="0.0.2",
description="A python wrapper of the C library 'Google CRC32C'",
long_description=readme,
long_description_content_type="text/markdown",
author="Google LLC",
author_email="googleapis-packages@oogle.com",
scripts=(),
url="https://github.com/googleapis/python-crc32c",
packages=["crc32c"],
package_dir={"": "src"},
license="Apache 2.0",
platforms="Posix; MacOS X; Windows",
package_data={"crc32c": [os.path.join(_EXTRA_DLL, _DLL_FILENAME)]},
zip_safe=True,
setup_requires=[cffi_dep],
cffi_modules=[builder],
install_requires=[cffi_dep],
classifiers=[
"Development Status :: 2 - Pre-Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
],
setup_requires=[cffi_dep] if build_cffi else [],
cffi_modules=[builder] if build_cffi else [],
install_requires=[cffi_dep] if build_cffi else [],
cmdclass={"build_ext": BuildExtWithDLL},
extras_require={"testing": ["pytest"]},
)


if __name__ == "__main__":
main()
import sys
try:
main()
except KeyboardInterrupt:
raise
except SystemExit:
# If installation fails, it is likely a compilation error with CFFI
# Try to install again.
warnings.warn(
"Compiling the CFFI Extension crc32c has failed. Only a pure "
"python implementation will be usable."
)
main(build_cffi=False)

116 changes: 18 additions & 98 deletions src/crc32c/__init__.py
Expand Up @@ -12,106 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import struct
import warnings

# NOTE: ``__config__`` **must** be the first import because it (may)
# modify the search path used to locate shared libraries.
import crc32c.__config__
import crc32c._crc32c_cffi
_SLOW_CRC32C_WARNING = (
"As the c extension couldn't be imported, `google-crc32c` is using a "
"pure python implementation that is significantly slower. If possible, "
"please configure a c build environment and compile the extension"
)

# If available, default to CFFI Implementation, otherwise, use pure python.
try:
from crc32c import cffi as _crc32c
implementation = "cffi"
except ImportError:
from crc32c import python as _crc32c
warnings.warn(RuntimeWarning, "_SLOW_CRC32C_WARNING",)
implementation = "python"

def extend(crc, chunk):
"""Update an existing CRC checksum with new chunk of data.
extend = _crc32c.extend
value = _crc32c.value

Args
crc (int): An existing CRC check sum.
chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data.
Intended to be a byte string or similar.
Checksum = _crc32c.Checksum

Returns
int: New CRC checksum computed by extending existing CRC
with ``chunk``.
"""
return crc32c._crc32c_cffi.lib.crc32c_extend(crc, chunk, len(chunk))


def value(chunk):
"""Compute a CRC checksum for a chunk of data.
Args
chunk (Union[bytes, List[int], Tuple[int]]): A new chunk of data.
Intended to be a byte string or similar.
Returns
int: New CRC checksum computed for ``chunk``.
"""
return crc32c._crc32c_cffi.lib.crc32c_value(chunk, len(chunk))


class Checksum(object):
"""Hashlib-alike helper for CRC32C operations.
Args:
initial_value (Optional[bytes]): the initial chunk of data from
which the CRC32C checksum is computed. Defaults to b''.
"""

__slots__ = ("_crc",)

def __init__(self, initial_value=b""):
self._crc = value(initial_value)

def update(self, chunk):
"""Update the checksum with a new chunk of data.
Args:
chunk (Optional[bytes]): a chunk of data used to extend
the CRC32C checksum.
"""
self._crc = extend(self._crc, chunk)

def digest(self):
"""Big-endian order, per RFC 4960.
See: https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c
Returns:
bytes: An eight-byte digest string.
"""
return struct.pack(">L", self._crc)

def hexdigest(self):
"""Like :meth:`digest` except returns as a bytestring of double length.
Returns
bytes: A sixteen byte digest string, contaiing only hex digits.
"""
return "{:08x}".format(self._crc).encode("ascii")

def copy(self):
"""Create another checksum with the same CRC32C value.
Returns:
Checksum: the new instance.
"""
clone = self.__class__()
clone._crc = self._crc
return clone

def consume(self, stream, chunksize):
"""Consume chunks from a stream, extending our CRC32 checksum.
Args:
stream (BinaryIO): the stream to consume.
chunksize (int): the size of the read to perform
Returns:
Generator[bytes, None, None]: Tterable of the chunks read from the
stream.
"""
while True:
chunk = stream.read(chunksize)
if not chunk:
break
self._crc = extend(self._crc, chunk)
yield chunk
__all__ = ["extend", "value", "Checksum", "implementation"]
86 changes: 86 additions & 0 deletions src/crc32c/_checksum.py
@@ -0,0 +1,86 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import struct


class CommonChecksum(object):
"""Hashlib-alike helper for CRC32C operations.
This class should not be used directly and requires an update implementation.
Args:
initial_value (Optional[bytes]): the initial chunk of data from
which the CRC32C checksum is computed. Defaults to b''.
"""

def __init__(self, initial_value=b""):
self._crc = 0
if initial_value != b"":
self.update(initial_value)

def update(self, data):
"""Update the checksum with a new chunk of data.
Args:
chunk (Optional[bytes]): a chunk of data used to extend
the CRC32C checksum.
"""
raise NotImplemented()

def digest(self):
"""Big-endian order, per RFC 4960.
See: https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c
Returns:
bytes: An eight-byte digest string.
"""
return struct.pack(">L", self._crc)

def hexdigest(self):
"""Like :meth:`digest` except returns as a bytestring of double length.
Returns
bytes: A sixteen byte digest string, contaiing only hex digits.
"""
return "{:08x}".format(self._crc).encode("ascii")

def copy(self):
"""Create another checksum with the same CRC32C value.
Returns:
Checksum: the new instance.
"""
clone = self.__class__()
clone._crc = self._crc
return clone

def consume(self, stream, chunksize):
"""Consume chunks from a stream, extending our CRC32 checksum.
Args:
stream (BinaryIO): the stream to consume.
chunksize (int): the size of the read to perform
Returns:
Generator[bytes, None, None]: Iterable of the chunks read from the
stream.
"""
while True:
chunk = stream.read(chunksize)
if not chunk:
break
self.update(chunk)
yield chunk

0 comments on commit 97cf381

Please sign in to comment.