Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Direct support for embedded CBOR-in-CBOR #286

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
78 changes: 78 additions & 0 deletions cborg/src/Codec/CBOR/Decoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ module Codec.CBOR.Decoding
, decodeMapLenOrIndef -- :: Decoder s (Maybe Int)
, decodeBreakOr -- :: Decoder s Bool

-- ** Embedded CBOR data
-- $embedded-cbor
, decodeEmbeddedCBOR
, decodeTagEmbeddedCBOR
, decodeWithinBytes
, decodeBytesLen

-- ** Inspecting the token type
, peekTokenType -- :: Decoder s TokenType
, TokenType(..)
Expand Down Expand Up @@ -154,6 +161,7 @@ data DecodeAction s a
| ConsumeInt32 (Int# -> ST s (DecodeAction s a))
| ConsumeListLen (Int# -> ST s (DecodeAction s a))
| ConsumeMapLen (Int# -> ST s (DecodeAction s a))
| ConsumeBytesLen(Int# -> ST s (DecodeAction s a))
| ConsumeTag (Word# -> ST s (DecodeAction s a))

-- 64bit variants for 32bit machines
Expand Down Expand Up @@ -944,13 +952,83 @@ peekByteOffset = Decoder (\k -> return (PeekByteOffset (\off# -> k (I64# off#)))
-- > x <- decode
-- > !after <- peekByteOffset
--
-- @since 0.2.2.0
decodeWithByteSpan :: Decoder s a -> Decoder s (a, ByteOffset, ByteOffset)
decodeWithByteSpan da = do
!before <- peekByteOffset
x <- da
!after <- peekByteOffset
return (x, before, after)

--------------------------------------------------------------
-- Encoded CBOR Data Item, Tag 24, RFC 7049 section 2.4.4.1

-- $embedded-cbor
-- | Sometimes it is beneficial to carry an embedded CBOR data item that
-- is not meant to be decoded immediately at the time the enclosing data
-- item is being parsed. Tag 24 (CBOR data item) can be used to tag the
-- embedded byte string as a data item encoded in CBOR format.
--
-- This can also be used as an encoding trick to provide a length prefix for
-- the encoed bytes of a CBOR term.
--
-- See RFC 7049 section 2.4.4.1 .

-- | Decode an embedded CBOR data item. This is a bytes token that contains
-- further CBOR data.
--
-- @since 0.2.3.0
decodeEmbeddedCBOR :: Decoder s a -> Decoder s a
decodeEmbeddedCBOR da = do
decodeTagEmbeddedCBOR
decodeWithinBytes da

-- | Decode the tag for an embedded CBOR data item, tag 24.
--
-- This tag is used to indicate that the following bytes token contains further
-- data in CBOR format.
--
-- @since 0.2.3.0
decodeTagEmbeddedCBOR :: Decoder s ()
decodeTagEmbeddedCBOR = do
tag <- decodeTag
if tag == 24 then return ()
else fail "decodeTagEmbeddedCBOR: expected tag 24"

-- | Run a decoder on the contents of a bytes token. The decoder must consume
-- the whole contents exactly.
--
-- This is more efficient than decoding the bytes token and then running
-- another decoder on the bytes.
--
-- The trade-off however is that the size of the bytes token is not checked in
-- advance, so the inner decoder may consume too few or too many bytes. This
-- is checked afterwards however, so it will fail if there is a mismatch.
--
-- @since 0.2.3.0
decodeWithinBytes :: Decoder s a -> Decoder s a
decodeWithinBytes da = do
available <- decodeBytesLen
!before <- peekByteOffset
x <- da
!after <- peekByteOffset
let !consumed = after - before
if consumed == fromIntegral available
then return x
else fail $ "decodeWithinBytes: " ++ show available
++ " bytes available but " ++ show consumed ++ " consumed"

-- | This is an unsafe decoder primitive. It consumes and returns the length
-- prefix of a bytes token. This leaves the decoder at a byte offset that is
-- within a token. This is only useful to decode the body of a bytes token
-- as further CBOR. This pattern is captured by the safer higher level
-- functions 'decodeWithinBytes' and 'decodeEmbeddedCBOR'.
--
-- @since 0.2.3.0
decodeBytesLen :: Decoder s Int
decodeBytesLen = Decoder (\k -> return (ConsumeBytesLen (\n# -> k (I# n#))))
{-# INLINE decodeBytesLen #-}

{-
expectExactly :: Word -> Decoder (Word :#: s) s
expectExactly n = expectExactly_ n done
Expand Down
54 changes: 54 additions & 0 deletions cborg/src/Codec/CBOR/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ module Codec.CBOR.Encoding
, encodeFloat -- :: Float -> Encoding
, encodeDouble -- :: Double -> Encoding
, encodePreEncoded -- :: B.ByteString -> Encoding

-- ** Embedded CBOR data
-- $embedded-cbor
, encodeEmbeddedCBOR
, encodeWithinBytes
, encodeTagEmbeddedCBOR
) where

#include "cbor.h"
Expand Down Expand Up @@ -130,6 +136,7 @@ data Tokens =

-- Special
| TkEncoded {-# UNPACK #-} !B.ByteString Tokens
| TkEmbedded {-# UNPACK #-} !Word Tokens Tokens

| TkEnd
deriving (Show,Eq)
Expand Down Expand Up @@ -367,3 +374,50 @@ encodeDouble = Encoding . TkFloat64
-- @since 0.2.2.0
encodePreEncoded :: B.ByteString -> Encoding
encodePreEncoded = Encoding . TkEncoded

--------------------------------------------------------------
-- Encoded CBOR Data Item, Tag 24, RFC 7049 section 2.4.4.1

-- $embedded-cbor
-- | Sometimes it is beneficial to carry an embedded CBOR data item that
-- is not meant to be decoded immediately at the time the enclosing data
-- item is being parsed. Tag 24 (CBOR data item) can be used to tag the
-- embedded byte string as a data item encoded in CBOR format.
--
-- This can also be used as an encoding trick to provide a length prefix for
-- the encoed bytes of a CBOR term.
--
-- See RFC 7049 section 2.4.4.1 .

-- | Encode an embedded CBOR data item. This is a bytes token that contains
-- further CBOR data. The given size must match the eventual size of the
-- embedded 'Encoding'.
--
encodeEmbeddedCBOR :: Word -> Encoding -> Encoding
encodeEmbeddedCBOR sz x =
encodeTagEmbeddedCBOR
<> encodeWithinBytes sz x

-- | Encode a bytes token where the contents is further CBOR encoded data.
-- The correct final size of the bytes token must be supplied up front.
--
-- This is more efficient than running another encoder to produce bytes and
-- then including the bytes token.
--
-- The trade-off however is that the size of the bytes token must be provided
-- but it cannot be checked in advance, so the inner encoding may result in too
-- few or too many bytes. This is checked afterwards however, so it will fail
-- if there is a mismatch.
--
encodeWithinBytes :: Word -> Encoding -> Encoding
encodeWithinBytes sz (Encoding enc) = Encoding (TkEmbedded sz (enc TkEnd))

-- | Encode the tag for an embedded CBOR data item, tag 24.
--
-- This tag is used to indicate that the following bytes token contains further
-- data in CBOR format.
--
-- @since 0.2.3.0
encodeTagEmbeddedCBOR :: Encoding
encodeTagEmbeddedCBOR = encodeTag 24

53 changes: 53 additions & 0 deletions cborg/src/Codec/CBOR/Read.hs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@ go_fast !bs da@(ConsumeMapLen k) =
DecodeFailure -> go_fast_end bs da
DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)

go_fast !bs da@(ConsumeBytesLen k) =
case tryConsumeBytesLen (BS.unsafeHead bs) bs of
DecodeFailure -> go_fast_end bs da
DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)

go_fast !bs da@(ConsumeTag k) =
case tryConsumeTag (BS.unsafeHead bs) bs of
DecodeFailure -> go_fast_end bs da
Expand Down Expand Up @@ -825,6 +830,11 @@ go_fast_end !bs (ConsumeMapLen k) =
DecodeFailure -> return $! SlowFail bs "expected map len"
DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)

go_fast_end !bs (ConsumeBytesLen k) =
case tryConsumeBytesLen (BS.unsafeHead bs) bs of
DecodeFailure -> return $! SlowFail bs "expected bytes"
DecodedToken sz (I# n#) -> k n# >>= go_fast (BS.unsafeDrop sz bs)

go_fast_end !bs (ConsumeTag k) =
case tryConsumeTag (BS.unsafeHead bs) bs of
DecodeFailure -> return $! SlowFail bs "expected tag"
Expand Down Expand Up @@ -2093,6 +2103,49 @@ tryConsumeMapLenOrIndef hdr !bs = case word8ToWord hdr of
_ -> DecodeFailure


{-# INLINE tryConsumeBytesLen #-}
tryConsumeBytesLen :: Word8 -> ByteString -> DecodedToken Int
tryConsumeBytesLen hdr !bs = case word8ToWord hdr of

-- Bytes (type 2)
0x40 -> DecodedToken 1 0
0x41 -> DecodedToken 1 1
0x42 -> DecodedToken 1 2
0x43 -> DecodedToken 1 3
0x44 -> DecodedToken 1 4
0x45 -> DecodedToken 1 5
0x46 -> DecodedToken 1 6
0x47 -> DecodedToken 1 7
0x48 -> DecodedToken 1 8
0x49 -> DecodedToken 1 9
0x4a -> DecodedToken 1 10
0x4b -> DecodedToken 1 11
0x4c -> DecodedToken 1 12
0x4d -> DecodedToken 1 13
0x4e -> DecodedToken 1 14
0x4f -> DecodedToken 1 15
0x50 -> DecodedToken 1 16
0x51 -> DecodedToken 1 17
0x52 -> DecodedToken 1 18
0x53 -> DecodedToken 1 19
0x54 -> DecodedToken 1 20
0x55 -> DecodedToken 1 21
0x56 -> DecodedToken 1 22
0x57 -> DecodedToken 1 23
0x58 -> DecodedToken 2 (word8ToInt (eatTailWord8 bs))
0x59 -> DecodedToken 3 (word16ToInt (eatTailWord16 bs))
#if defined(ARCH_64bit)
0x5a -> DecodedToken 5 (word32ToInt (eatTailWord32 bs))
#else
0x5a -> case word32ToInt (eatTailWord32 bs) of
Just n -> DecodedToken 5 n
Nothing -> DecodeFailure
#endif
0x5b -> case word64ToInt (eatTailWord64 bs) of
Just n -> DecodedToken 9 n
Nothing -> DecodeFailure
_ -> DecodeFailure

{-# INLINE tryConsumeTag #-}
tryConsumeTag :: Word8 -> ByteString -> DecodedToken Word
tryConsumeTag hdr !bs = case word8ToWord hdr of
Expand Down
5 changes: 5 additions & 0 deletions cborg/src/Codec/CBOR/Write.hs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ buildStep vs1 k (BI.BufferRange op0 ope0) =
(B.byteString x) (buildStep vs' k)
(BI.BufferRange op ope0)

TkEmbedded sz e vs'
-> PI.runB bytesLenMP sz op >>= \op' ->
buildStep e (buildStep vs' k)
(BI.BufferRange op' ope0)

TkEnd -> k (BI.BufferRange op ope0)

| otherwise = return $ BI.bufferFull bound op (buildStep vs k)
Expand Down