From 9fa795d4008dc554de83dd9118095b2d0a2b0f9a Mon Sep 17 00:00:00 2001 From: "Nathan Voxland (Activeloop)" <151186252+nvoxland-al@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:38:41 +0000 Subject: [PATCH] Auto-convert numbers to strings when appending to a text dtype tensor (#2782) Auto-convert numbers to strings when appending to a text dtype tensor --- deeplake/core/chunk/base_chunk.py | 4 ++++ deeplake/core/chunk/tests/test_base_chunk.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 deeplake/core/chunk/tests/test_base_chunk.py diff --git a/deeplake/core/chunk/base_chunk.py b/deeplake/core/chunk/base_chunk.py index cbceec6f9a..0f8d9143ea 100644 --- a/deeplake/core/chunk/base_chunk.py +++ b/deeplake/core/chunk/base_chunk.py @@ -1,3 +1,4 @@ +import numbers from abc import abstractmethod import struct import numpy as np @@ -605,6 +606,9 @@ def is_empty_tensor(self): ) def _text_sample_to_byte_string(self, sample): + if isinstance(sample, numbers.Number): + sample = str(sample) + try: return str(sample.numpy().reshape(())).encode("utf-8") except AttributeError: diff --git a/deeplake/core/chunk/tests/test_base_chunk.py b/deeplake/core/chunk/tests/test_base_chunk.py new file mode 100644 index 0000000000..3cc2b7a8f9 --- /dev/null +++ b/deeplake/core/chunk/tests/test_base_chunk.py @@ -0,0 +1,19 @@ +import pytest + +from deeplake.core.chunk.uncompressed_chunk import UncompressedChunk +from deeplake.core.meta import TensorMeta + + +def test_text_sample_to_byte_string(): + chunk = UncompressedChunk( + min_chunk_size=10, + max_chunk_size=1000, + tiling_threshold=1000, + tensor_meta=TensorMeta(), + ) + + assert chunk._text_sample_to_byte_string("test") == b"test" + assert chunk._text_sample_to_byte_string(3) == b"3" + assert chunk._text_sample_to_byte_string(3.5) == b"3.5" + assert chunk._text_sample_to_byte_string(None) == b"" + assert chunk._text_sample_to_byte_string([]) == b""