Skip to content

Commit c44cee0

Browse files
committed
cleanup for bistream
1 parent bf8ad02 commit c44cee0

File tree

6 files changed

+108
-64
lines changed

6 files changed

+108
-64
lines changed

data-bitcode.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ library
4545
, binary >= 0.8
4646
, containers >= 0.5
4747
, base16-bytestring
48+
, transformers
4849
default-language: Haskell2010
4950

5051
benchmark writer

src/Data/BitCode.hs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
{-# LANGUAGE DeriveGeneric #-}
55
module Data.BitCode where
66

7+
8+
import Prelude hiding (fromEnum, toEnum)
9+
import qualified Prelude as P
10+
711
import Data.Word (Word32, Word64)
812
import Data.Maybe (catMaybes)
913
import Data.Bits (FiniteBits, finiteBitSize, countLeadingZeros)
@@ -18,8 +22,8 @@ type Bit = Bool
1822
type Bits = [Bool]
1923

2024
-- * BitCode
21-
type BlockId = Int
22-
type Code = Int
25+
type BlockId = Word64
26+
type Code = Word64
2327

2428
-- * Source location
2529
type Loc = (Int, Int) -- Words, Bits
@@ -44,8 +48,8 @@ data Op = Lit !Val -- [1,vbr8:val]
4448
instance Binary Op
4549

4650
-- | The Fields contained in an abbreviated record can be one of the following.
47-
data Field = Vbr !Int !Val
48-
| Fix !Int !Val
51+
data Field = Vbr !Word64 !Val
52+
| Fix !Word64 !Val
4953
| Len !Val
5054
| Chr !Char
5155
| W64 !Val -- Literal values. These are not bein emitted.
@@ -67,7 +71,7 @@ data BitCode
6771
-- Layout: [1,vbr8:id,vbr4:newabbrevlen,<align32bits>,32bit:blocklen,<blocklen * words>,0,<align32bits>]
6872
-- 1 and 0 are vbr(current abbrev len); starting with 2 at the top level.
6973
= Block { blockId :: !BlockId -- ^ id
70-
, blockAbbrevLen :: !Int -- ^ abbrev len
74+
, blockAbbrevLen :: !Word64 -- ^ abbrev len
7175
, blockBody :: ![BitCode] -- ^ body
7276
}
7377
-- | A abbreviation definition record. Layout: [2,vbr5:#ops,op0,op1,...]
@@ -100,7 +104,13 @@ data NBitCode
100104

101105
instance Binary NBitCode
102106

103-
idOrCode :: NBitCode -> Int
107+
toEnum :: Enum a => Word64 -> a
108+
toEnum = P.toEnum . fromIntegral
109+
110+
fromEnum :: Enum a => a -> Word64
111+
fromEnum = fromIntegral . P.fromEnum
112+
113+
idOrCode :: NBitCode -> Word64
104114
idOrCode (NBlock i _) = i
105115
idOrCode (NRec i _) = i
106116

@@ -138,9 +148,9 @@ bitWidth x = finiteBitSize x - countLeadingZeros x
138148
denormalize :: NBitCode -> BitCode
139149
denormalize (NBlock id bs) = let bs' = map denormalize bs
140150
ids = map idOrCode bs
141-
abbrevWidth = if ids == []
142-
then 0
143-
else max 2 (bitWidth (maximum ids))
151+
abbrevWidth = fromIntegral $ if null ids
152+
then 2
153+
else max 2 (bitWidth (maximum ids))
144154
in Block id abbrevWidth (map denormalize bs)
145155
denormalize (NRec c vs) = UnabbrevRecord (fromIntegral c) vs
146156

src/Data/BitCode/Reader/FromBits.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
{-# LANGUAGE LambdaCase #-}
33
module Data.BitCode.Reader.FromBits where
44

5+
import Prelude hiding (fromEnum, toEnum)
6+
57
import Data.Word (Word8, Word64)
68
import Control.Applicative ((<|>))
79
import Control.Monad (replicateM)
@@ -36,7 +38,7 @@ instance FromBits Op where
3638

3739
instance FromBits Char where
3840
parse = decodeChar6 <$> parseFixed 6
39-
where decodeChar6 :: Int -> Char
41+
where decodeChar6 :: Word64 -> Char
4042
decodeChar6 63 = '_'
4143
decodeChar6 62 = '.'
4244
decodeChar6 c | 0 <= c && c < 26 = toEnum $ c + fromEnum 'a'
@@ -70,10 +72,10 @@ parseBlock n abbrevs = parseLocated (parseSubBlock n <|> parseUnabbrevRecord n <
7072
readFixed newWidth (fromEnum END_BLOCK)
7173
skipTo32bits
7274
if id == 0 then processBlockInfo blocks else return ()
73-
return $ Block id newWidth blocks
75+
return $ Block id (fromIntegral newWidth) blocks
7476
where processBlockInfo :: [BitCode] -> BitCodeReader ()
7577
processBlockInfo = go 0
76-
where go :: Int -> [BitCode] -> BitCodeReader ()
78+
where go :: Word64 -> [BitCode] -> BitCodeReader ()
7779
go _ [] = pure ()
7880
go id (Located _ r:bs) = go id (r:bs) -- ignore Located blocks, and just recurse to the contained block.
7981
go _ ((UnabbrevRecord 1 [id]):bs) = go (fromIntegral id) bs

src/Data/BitCode/Writer/Combinators.hs

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,51 @@
1-
{-# OPTIONS_GHC -fprof-auto #-}
1+
{-# OPTIONS_GHC -fprof-auto #-}
2+
{-# LANGUAGE BangPatterns #-}
23
module Data.BitCode.Writer.Combinators where
34

5+
import Prelude hiding (fromEnum, toEnum)
6+
47
import Data.BitCode
58
import Data.BitCode.Writer.Monad
69
import Data.Word (Word8, Word32)
7-
import Data.Bits (FiniteBits, testBit, popCount, shift)
10+
import Data.Bits (FiniteBits, testBit, popCount, shift, finiteBitSize, countLeadingZeros, setBit, (.|.), (.&.), zeroBits)
11+
12+
import Debug.Trace
13+
14+
highBit :: FiniteBits a => a -> Int
15+
highBit x = finiteBitSize x - countLeadingZeros x
816

917
-- * BitCode Functions
1018
emitBit :: Bit -> BitCodeWriter ()
1119
emitBit = tell . pure
1220
emitBits :: Int -> Bool -> BitCodeWriter ()
13-
emitBits n = tell . replicate n
14-
emitFixed :: (FiniteBits a) => Int -> a -> BitCodeWriter ()
15-
emitFixed n x = tell [testBit x i | i <- [0..(n-1)]]
16-
emitVBR :: (FiniteBits a) => Int -> a -> BitCodeWriter ()
17-
emitVBR n x = do
18-
emitFixed (n-1) x
19-
let tail = shift x (-n+1)
20-
in if popCount tail > 0
21-
then emitBit True >> emitVBR n tail
22-
else emitBit False
21+
emitBits n b = tell $! replicate n b
22+
emitFixed :: (Show a, FiniteBits a) => Int -> a -> BitCodeWriter ()
23+
emitFixed !n !x = tell $! [testBit x i | i <- [0..(n-1)]]
24+
-- [WARN] This assums it can "emitFixed" the smae type it is
25+
-- feed into. However we will use a few more bits and thus
26+
-- may overflow.
27+
emitVBR :: (Num a, Show a, FiniteBits a) => Int -> a -> BitCodeWriter ()
28+
emitVBR !n !x = emitFixed (n*chunks) $ go size x
29+
where size = highBit x
30+
n' = n - 1 -- chunks are of size n-1 + follow bit.
31+
chunks = case size `divMod` n' of
32+
(0,0) -> 1
33+
(d,0) -> d
34+
(d,_) -> d+1
35+
chunkMask :: (Num a, FiniteBits a) => a
36+
chunkMask = (setBit 0 n') - 1
37+
followBit :: FiniteBits a => a
38+
followBit = (setBit zeroBits n')
39+
go :: (Num a, FiniteBits a) => Int -> a -> a
40+
go m x | m < n = x .&. chunkMask
41+
| otherwise = shift (go (m-n') (shift x (-n'))) n .|. ((x .&. chunkMask) .|. followBit)
42+
43+
-- emitFixed (n-1) x
44+
-- let tail = shift x (-n+1)
45+
-- in if popCount tail > 0
46+
-- then emitBit True >> emitVBR n tail
47+
-- else emitBit False
48+
2349

2450
-- * Utility BitCode Functions
2551
emitWord8 :: Word8 -> BitCodeWriter ()

src/Data/BitCode/Writer/Monad.hs

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
{-# OPTIONS_GHC -fprof-auto #-}
1+
{-# OPTIONS_GHC -fprof-auto #-}
2+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
23
{-# LANGUAGE FlexibleInstances #-}
34
{-# LANGUAGE UndecidableInstances #-}
5+
{-# LANGUAGE BangPatterns #-}
46
module Data.BitCode.Writer.Monad
57
( BitCodeWriter
68
, evalBitCodeWriter, execBitCodeWriter
@@ -24,21 +26,36 @@ import Data.ByteString.Base16 (encode)
2426

2527
import Data.BitCode
2628

29+
import Control.Monad.Trans.State
30+
2731
import Data.Sequence (Seq(..), fromList)
2832
import Data.Foldable (toList)
2933

34+
import GHC.Stack (HasCallStack)
35+
3036
-- * BitCode
31-
data BitC = BitC { _words :: Int, _bits :: Int, _body :: Seq Bool } deriving (Show)
37+
data BitC = BitC { _words :: Int
38+
, _bits :: Int
39+
, _body :: (Seq Bool)
40+
} deriving (Show)
3241

3342
instance Monoid BitC where
3443
mempty = BitC 0 0 mempty
35-
(BitC w l bs) `mappend` (BitC w' l' bs') = BitC (w+w' + ((l+l') `div` 32)) ((l+l') `mod` 32) (bs `mappend` bs')
44+
mappend !(BitC w l bs) !(BitC w' l' bs') = let (!d,!m) = (l+l') `divMod` 32 in BitC (w+w' + d) m (bs `mappend` bs')
3645

3746
-- * BitCode Writer Monad (with access to the current state)
38-
newtype BitCodeWriter a = BitCode { unBitCode :: BitC -> (a, BitC) }
47+
newtype BitCodeWriter a = BitCode { unBitCode :: State BitC a }
48+
deriving (Functor, Applicative, Monad)
49+
50+
modifyBits :: HasCallStack => (BitC -> BitC) -> BitCodeWriter ()
51+
modifyBits = BitCode . modify
52+
modifyBits' :: HasCallStack => (BitC -> BitC) -> BitCodeWriter ()
53+
modifyBits' = BitCode . modify'
54+
getsBits :: HasCallStack => (BitC -> a) -> BitCodeWriter a
55+
getsBits = BitCode . gets
3956

4057
runBitCodeWriter :: BitCodeWriter a -> (a, BitC)
41-
runBitCodeWriter = flip unBitCode mempty
58+
runBitCodeWriter = flip runState mempty . unBitCode
4259

4360
evalBitCodeWriter :: BitCodeWriter a -> a
4461
evalBitCodeWriter = fst . runBitCodeWriter
@@ -49,40 +66,25 @@ execBitCodeWriter = toList . _body . snd . runBitCodeWriter
4966
class ToBits a where
5067
emit :: a -> BitCodeWriter ()
5168

52-
-- * Functor
53-
instance Functor BitCodeWriter where
54-
fmap f m = BitCode $ \b -> let (a, b') = unBitCode m b
55-
in (f a, b')
56-
-- * Applicative
57-
instance Applicative BitCodeWriter where
58-
pure a = BitCode $ \b -> (a, b)
59-
60-
m <*> n = BitCode $ \b ->
61-
let (f, b') = unBitCode m b
62-
(x, b'') = unBitCode n b'
63-
in (f x, b' `mappend` b'')
64-
65-
-- * Monad
66-
instance Monad BitCodeWriter where
67-
m >>= n = BitCode $ \b -> let (a, b') = unBitCode m b in unBitCode (n a) b'
68-
69+
{-
6970
-- * Monoid
7071
instance Monoid (BitCodeWriter ()) where
7172
mempty = pure ()
7273
m `mappend` n = BitCode $ \b ->
7374
let (_, a) = unBitCode m b
7475
(_, b) = unBitCode n b
7576
in ((), a `mappend` b)
77+
-}
7678

7779
-- * Low Level BitCode Functions (These need to know about BitCode and BitC)
7880
tell :: Bits -> BitCodeWriter ()
79-
tell bs = BitCode $ \b -> ((), b `mappend` b')
80-
where n = length bs
81-
b' = BitC (n `div` 32) (n `mod` 32) (fromList bs)
81+
tell bs = modifyBits' (\b -> b `mappend` b')
82+
where !n = length bs
83+
!b' = BitC (n `div` 32) (n `mod` 32) (fromList bs)
8284

8385
-- | Get the number of words and bits in the stream.
8486
ask :: BitCodeWriter (Int, Int)
85-
ask = BitCode $ \b@(BitC ws bs _) -> ((ws,bs),b)
87+
ask = getsBits (\(BitC ws bs _) -> (ws,bs))
8688

8789
-- * Utility
8890

src/Data/BitCode/Writer/ToBits.hs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
{-# OPTIONS_GHC -fprof-auto #-}
22
{-# LANGUAGE TupleSections #-}
33
{-# LANGUAGE FlexibleInstances #-}
4+
{-# LANGUAGE BangPatterns #-}
45
module Data.BitCode.Writer.ToBits where
56

7+
import Prelude hiding (toEnum, fromEnum)
8+
69
import Data.Word (Word8)
710

811
import Data.BitCode
@@ -13,25 +16,25 @@ import Data.BitCode.IDs.FixedAbbrev
1316

1417
-- * ToBits instances
1518
instance ToBits EncVal where
16-
emit (Fixed v) = emitFixed 3 (1 :: Word8) >> emitVBR 5 v
17-
emit (VBR v) = emitFixed 3 (2 :: Word8) >> emitVBR 5 v
18-
emit Arr = emitFixed 3 (3 :: Word8)
19-
emit Char6 = emitFixed 3 (4 :: Word8)
20-
emit Blob = emitFixed 3 (5 :: Word8)
19+
emit !(Fixed v) = emitFixed 3 (1 :: Word8) >> emitVBR 5 v
20+
emit !(VBR v) = emitFixed 3 (2 :: Word8) >> emitVBR 5 v
21+
emit !Arr = emitFixed 3 (3 :: Word8)
22+
emit !Char6 = emitFixed 3 (4 :: Word8)
23+
emit !Blob = emitFixed 3 (5 :: Word8)
2124

2225
instance ToBits Op where
23-
emit (Lit v) = emitBit True >> emit (W64 v)
24-
emit (Enc e) = emitBit False >> emit e
26+
emit !(Lit v) = emitBit True >> emit (W64 v)
27+
emit !(Enc e) = emitBit False >> emit e
2528

2629
instance ToBits [Op] where
2730
emit = mapM_ emit
2831

2932
instance ToBits Field where
30-
emit (Vbr n v) = emitVBR n v
31-
emit (Fix n v) = emitFixed n v
32-
emit (Chr c) = emitChar6 c
33-
emit (Len n) = emitVBR 6 n
34-
emit (W64 w) = emitVBR 8 w
33+
emit !(Vbr n v) = emitVBR (fromIntegral n) v
34+
emit !(Fix n v) = emitFixed (fromIntegral n) v
35+
emit !(Chr c) = emitChar6 c
36+
emit !(Len n) = emitVBR 6 n
37+
emit !(W64 w) = emitVBR 8 w
3538

3639
instance ToBits (Int, BitCode) where
3740
emit (width, (Located _ block)) = emit (width, block)
@@ -44,8 +47,8 @@ instance ToBits (Int, BitCode) where
4447
-- as the body may not be multiple of 32
4548
-- bits.
4649
putSized $ do
47-
mapM_ (emit . (len,)) body
48-
emitVBR len $ fromEnum END_BLOCK
50+
mapM_ (emit . (fromIntegral len :: Int,)) body
51+
emitVBR (fromIntegral len) $ fromEnum END_BLOCK
4952
align
5053
where putSized :: BitCodeWriter () -> BitCodeWriter ()
5154
putSized b = size b >>= emitWord32 >> b

0 commit comments

Comments
 (0)