/
SignatureComputer.hs
117 lines (103 loc) · 4.69 KB
/
SignatureComputer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{-# LANGUAGE LambdaCase, ViewPatterns, RankNTypes #-}
module SSync.SignatureComputer (
produceSignatureTable
, signatureTableSize
, BlockSize
, blockSize
, mkBlockSize
, blockSizeWord
, HashAlgorithm(..)
, hashForName
, nameForHash
) where
import Conduit
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Ratio ((%))
import Data.Monoid ((<>), Sum(..))
import Data.Serialize.Put (runPut, putWord32be, putByteString)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Word (Word32)
import SSync.Hash
import SSync.Util
import SSync.Util.Cereal
import SSync.Constants
import SSync.BlockSize
import qualified SSync.RollingChecksum as RC
hashForName :: Text -> Maybe HashAlgorithm
hashForName = forName
nameForHash :: HashAlgorithm -> Text
nameForHash = name
produceAndHash :: (Monad m) => HashState -> ConduitM ByteString ByteString m HashState
produceAndHash s0 = execStateC s0 $ awaitForever $ \bs -> do
updateS bs
yield bs
produceShortString :: (Monad m) => String -> Producer m ByteString
produceShortString s =
let bs = encodeUtf8 . T.pack $ s
in yield $ (BS.singleton . fromIntegral . BS.length $ bs) <> bs
produceVarInt :: (Monad m) => Word32 -> Producer m ByteString
produceVarInt = yield . runPut . putVarInt
-- each signature-block represents as close to 1MB of source data as possible
signatureBlockSizeForBlockSize :: Word32 -> Word32
signatureBlockSizeForBlockSize blockSz = min (1 + ((1024*1024) `div` blockSz)) maxSignatureBlockSize
-- receives blocks of data, produces blocks of signatures
sigs :: (Monad m) => Word32 -> Word32 -> HashAlgorithm -> Conduit ByteString m ByteString
sigs blockSz sigsPerBlock hashAlg = go 0 $ return ()
where go sigsSoFar sigData =
if sigsSoFar == sigsPerBlock
then do
yield . runPut $ putVarInt sigsSoFar >> sigData
go 0 $ return ()
else
await >>= \case
Just block -> do
let weak = RC.value . RC.forBlock rcZero $ block
strong = digest . update strongZero $ block
go (sigsSoFar + 1) (sigData >> putWord32be weak >> putByteString strong)
Nothing ->
yield . runPut $ putVarInt sigsSoFar >> sigData
rcZero = RC.init blockSz
strongZero = initState hashAlg
produceSignatureTableUnframed :: (Monad m) => HashAlgorithm -> BlockSize -> Conduit ByteString m ByteString
produceSignatureTableUnframed strongHashAlg (blockSizeWord -> blockSz) = do
let sigBlockSize = signatureBlockSizeForBlockSize blockSz
produceVarInt blockSz
produceShortString . T.unpack $ name strongHashAlg
produceVarInt sigBlockSize
rechunk (fromIntegral blockSz) $= sigs blockSz sigBlockSize strongHashAlg
produceSignatureTable :: (Monad m) => HashAlgorithm -> HashAlgorithm -> BlockSize -> Conduit ByteString m ByteString
produceSignatureTable checksumAlg strongHashAlg blockSz = do
produceShortString . T.unpack $ name checksumAlg
d <- withHashT checksumAlg $ do
withHashState' $ \hs -> produceSignatureTableUnframed strongHashAlg blockSz $= produceAndHash hs
digestS
yield d
-- | Returns the length (in bytes) of the signature table that would be computed
-- for a file of a given length using the given hash algorithms and
-- block size. This is useful for (e.g.) setting a @Content-Length@ header on
-- an HTTP message containing a signature table.
signatureTableSize :: HashAlgorithm -> HashAlgorithm -> BlockSize -> Integer -> Integer
signatureTableSize checksumAlg strongHashAlg (blockSizeWord -> blockSz) fileLen =
let hashedPart = do
produceVarInt blockSz
produceShortString . T.unpack $ name strongHashAlg
produceVarInt (signatureBlockSizeForBlockSize blockSz)
headerFooter = do
produceShortString . T.unpack $ name checksumAlg
d <- withHashT checksumAlg $ do
withHashState' $ \hs -> hashedPart $= produceAndHash hs
digestS
yield d
headerFooterSize = fromIntegral . getSum . runIdentity $ headerFooter $$ foldMapC (Sum . BS.length)
sigsPerBlock = fromIntegral $ signatureBlockSizeForBlockSize blockSz
blocks = ceiling (fileLen % fromIntegral blockSz)
fullSigBlocks = blocks `div` sigsPerBlock
leftoverSigs = blocks `rem` sigsPerBlock
sigSize = fromIntegral $ 4 + digestSize strongHashAlg
varIntSize i = fromIntegral . getSum . runIdentity $ (produceVarInt (fromIntegral i) $$ foldMapC (Sum . BS.length))
fullSigBlockLength = fullSigBlocks * (varIntSize sigsPerBlock + sigsPerBlock * sigSize)
partialSigBlockLength = varIntSize leftoverSigs + leftoverSigs * sigSize
in headerFooterSize + fullSigBlockLength + partialSigBlockLength