Skip to content

Bump libdeeplake version. #12226

Bump libdeeplake version.

Bump libdeeplake version. #12226

GitHub Actions / JUnit Test Report failed Apr 26, 2024 in 0s

1608 tests run, 482 passed, 1125 skipped, 1 failed.

Annotations

Check failure on line 948 in deeplake/enterprise/test_pytorch.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_pytorch.test_pytorch_data_decode

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x98 in position 0: invalid start byte
Raw output
local_auth_ds = Dataset(path='./hub_pytest/test_pytorch/test_pytorch_data_decode', tensors=['generic', 'text', 'json', 'list', 'class_label', 'image'])
cat_path = '/home/runner/work/deeplake/deeplake/deeplake/tests/dummy_data/images/cat.jpeg'

    @requires_libdeeplake
    @requires_torch
    @pytest.mark.flaky
    @pytest.mark.slow
    def test_pytorch_data_decode(local_auth_ds, cat_path):
        with local_auth_ds as ds:
            ds.create_tensor("generic")
            for i in range(10):
                ds.generic.append(i)
            ds.create_tensor("text", htype="text")
            for i in range(10):
                ds.text.append(f"hello {i}")
            ds.create_tensor("json", htype="json")
            for i in range(10):
                ds.json.append({"x": i})
            ds.create_tensor("list", htype="list")
            for i in range(10):
                ds.list.append([i, i + 1])
            ds.create_tensor("class_label", htype="class_label")
            animals = [
                "cat",
                "dog",
                "bird",
                "fish",
                "horse",
                "cow",
                "pig",
                "sheep",
                "goat",
                "chicken",
            ]
            ds.class_label.extend(animals)
            ds.create_tensor("image", htype="image", sample_compression="jpeg")
            for i in range(10):
                ds.image.append(deeplake.read(cat_path))
    
        decode_method = {tensor: "data" for tensor in list(ds.tensors.keys())}
        ptds = (
            ds.dataloader()
            .transform(identity)
            .pytorch(decode_method=decode_method, collate_fn=identity_collate)
        )
>       for i, batch in enumerate(ptds):

deeplake/enterprise/test_pytorch.py:948: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/enterprise/dataloader.py:881: in __next__
    return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/loader.py:156: in __next__
    return next(self._iterator)
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:80: in __next__
    return self.get_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:117: in get_data
    batch = self._next_data()
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/indra/pytorch/single_process_iterator.py:102: in _next_data
    sample[tensor] = bytes_to_text(sample[tensor], "json")
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

buffer = b'\x98\x90\xda\xf1}U\x00\x00', htype = 'json'

    def bytes_to_text(buffer, htype):
        buffer = bytes(buffer)
        if htype == "json":
            arr = np.empty(1, dtype=object)
>           arr[0] = json.loads(bytes.decode(buffer), cls=HubJsonDecoder)
E           UnicodeDecodeError: 'utf-8' codec can't decode byte 0x98 in position 0: invalid start byte

deeplake/core/serialize.py:481: UnicodeDecodeError