Skip to content

Commit 9ed3a58

Browse files
committed
cleanup
1 parent c5e7f32 commit 9ed3a58

File tree

3 files changed

+353
-0
lines changed

3 files changed

+353
-0
lines changed

README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Data.Bitstream
2+
3+
*This is an alternative implementation to the `[Bool]` heavy solution in
4+
[Data.BitCode](https://github.com/angerman/data-bitcode).*
5+
6+
`Data.Bitstream` is a LLVM bitcode serializer. The module takes bitcode
7+
records and turns them into the bitcode bitstream, which is then written
8+
to disk as bytes.
9+
10+
## Bitcode
11+
12+
LLVM Bitcode is a series of bits emitted into a stream. This does not
13+
need to align with bytes. Notably bytes on disk usually store their least
14+
significant bit last, while it would come first in bitcode. Example:
15+
16+
| byte | `de` | `c0` | `17` | `0b` |
17+
| bits on disk | `11011110` | `1100000` | `00010111` | `00001011` |
18+
| bits in stream | `01111011` | `0000011` | `11101000` | `00001011` |
19+
20+
However flipping the bytes in the `ByteString` prior to writing to disk
21+
seems rather cheap (~16M/s) (see `bench.html`).
22+
23+
## Building
24+
25+
With [Data.BitCode](https://github.com/angerman/data-bitcode) and
26+
[Data.BitCode.LLVM](https://gihtub.com/angerman/data-bitcode-llvm), checked
27+
out in the same directory as
28+
[Data.Bistream](https://github.com/angerman/data-bitstream), this module should
29+
be buildable with `cabal new-build` (see also the `cabal.project` file).
30+
31+
## Testing
32+
33+
`cabal new-test` should run a series of tests, trying to produce bitcode and
34+
ensure some basic validity of the `Bitstream`.
35+
36+
## Benchmarking
37+
38+
`cabal new-bench` should try to do some rather expensive, yet common bit
39+
concatinations on `Bitstream` and the underlying `Stream` data types. The
40+
benchmark has two parts. One is a synthetic which tries some simple
41+
concatination. And the other one is a real world benchmark which attempts
42+
to build the object file for `main = putStrLn 'Hello World'` by taking the
43+
serialized bitcode records for said program and runs them through the
44+
serializer.
45+
46+
## Profiling
47+
48+
The module also contains the `test` executable. Which takes a serialized
49+
bitcode record file as input and runs the `Bitstream` serializer over it.
50+
`cabal new-build --enable-profiling` should produce the profiled binary.
51+
52+
```
53+
$(find dist-newstyle -name "test" -type f) bench/data/Main.bcbin +RTS -P
54+
```
55+
56+
should produce the `test.prof` profile.
57+
58+
*I like [viewprof](https://hackage.haskell.org/package/viewprof) a lot for
59+
viewing haskell profiles*

test/BitcodeSpec.hs

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
{-# OPTIONS_GHC -Wno-name-shadowing -Wno-orphans #-}
2+
{-# LANGUAGE OverloadedStrings, BinaryLiterals, FlexibleInstances, RecursiveDo #-}
3+
4+
module BitcodeSpec where
5+
6+
import Prelude hiding (words)
7+
8+
import Test.Tasty.Hspec
9+
10+
import Data.BitCode
11+
import Data.BitCode.Writer
12+
import Data.BitCode.Writer.Monad
13+
14+
import Data.Bits (zeroBits, setBit)
15+
import Data.String (IsString(..))
16+
import Data.Word (Word8)
17+
18+
-- import Data.BitCode.LLVM
19+
-- import Data.BitCode.LLVM.ToBitCode (ToNBitCode(..))
20+
-- import Data.BitCode.LLVM.Codes.Identification (Epoch(..))
21+
22+
-- | Helper function for the IsString instances
23+
bit :: Int -> Char -> Word8 -> Word8
24+
bit n '1' = flip setBit n
25+
bit _ '0' = id
26+
bit _ b = error $ "bit must be 0 or 1; " ++ b:" given."
27+
28+
isBinary :: String -> Bool
29+
isBinary = all (flip elem ['0','1'])
30+
31+
instance IsString Buff where
32+
fromString s | length s > 8 = error $ "cannot create buffer from " ++ s ++ "; more than eight bits"
33+
| not . isBinary $ s = error $ "cannot create buffer from " ++ s ++ "; elements must be 0 or 1"
34+
| otherwise = mkBuff (length s) (foldl f zeroBits (indexed s))
35+
where indexed :: [a] -> [(Int, a)]
36+
indexed = zip [0..]
37+
f :: Word8 -> (Int, Char) -> Word8
38+
f w (n, '1') = setBit w n
39+
f w _ = w
40+
41+
42+
instance IsString Word8 where
43+
fromString s | length s /= 8 = error $ "cannot create Word8 from " ++ s ++ "; must be 8 bits"
44+
| not . isBinary $ s = error $ "cannot create Word8 from " ++ s ++ "; elements must be 0 or 1"
45+
| otherwise = let (b0:b1:b2:b3:b4:b5:b6:b7:[]) = s
46+
in bit 0 b0 . bit 1 b1 . bit 2 b2 . bit 3 b3
47+
. bit 4 b4 . bit 5 b5 . bit 6 b6 . bit 7 b7 $ zeroBits
48+
49+
instance {-# OVERLAPS #-} IsString [Word8] where
50+
fromString = fromString' . filter (/= ' ')
51+
where fromString' :: String -> [Word8]
52+
fromString' s | (length s `mod` 8) /= 0 = error $ "cannot create [Word8] from " ++ s ++ "; must be multiple of 8 bits"
53+
| not . isBinary $ s = error $ "cannot create [Word8] from " ++ s ++ "; elements must be 0 or 1"
54+
| otherwise = go s
55+
56+
go :: String -> [Word8]
57+
go [] = []
58+
go (b0:b1:b2:b3:b4:b5:b6:b7:rest) = let word = bit 0 b0 . bit 1 b1 . bit 2 b2 . bit 3 b3
59+
. bit 4 b4 . bit 5 b5 . bit 6 b6 . bit 7 b7 $ zeroBits
60+
in word : go rest
61+
go s = error $ "cannot creates [Word8] from " ++ s ++ "; must be multiple of 8 chars."
62+
63+
instance IsString (Stream [] a) where
64+
fromString = fromString' . filter (/= ' ')
65+
where fromString' :: String -> Stream [] a
66+
fromString' s | not . isBinary $ s = error $ "cannot create List Stream from " ++ s ++ "; elements must be 0 or 1"
67+
| otherwise = let (ws, buff) = go s in S ws buff (length s)
68+
go :: String -> ([Word8], Buff)
69+
go s = let l = 8 * (length s `div` 8) in
70+
(words $ take l s, b $ drop l s)
71+
72+
-- type helper.
73+
b :: String -> Buff
74+
b = fromString
75+
76+
w :: String -> Word8
77+
w = fromString
78+
79+
words :: String -> [Word8]
80+
words = fromString
81+
82+
ls :: String -> Stream [] Word8
83+
ls = fromString
84+
85+
86+
-- * Specifications
87+
88+
spec_helper :: Spec
89+
spec_helper = do
90+
describe "Buff" $ do
91+
it "has an IsString instance" $ do
92+
"0" `shouldBe` (mkBuff 1 0b00000000)
93+
"1" `shouldBe` (mkBuff 1 0b00000001)
94+
"11" `shouldBe` (mkBuff 2 0b00000011)
95+
"10100101" `shouldBe` (mkBuff 8 0b10100101)
96+
"10101010" `shouldBe` (mkBuff 8 0b01010101)
97+
98+
describe "Word8" $ do
99+
it "has an IsString instance" $ do
100+
w "00000000" `shouldBe` 0
101+
w "10000000" `shouldBe` 1
102+
w "11000000" `shouldBe` 3
103+
w "00000001" `shouldBe` 128
104+
105+
describe "List Stream" $ do
106+
it "has an IsString isntance" $ do
107+
ls "" `shouldBe` (S [] nullBuff 0)
108+
ls "1" `shouldBe` (S [] (mkBuff 1 0b00000001) 1)
109+
ls "10101010 101" `shouldBe` (S [0b01010101] (mkBuff 3 0b00000101) 11)
110+
111+
spec_buff :: Spec
112+
spec_buff = do
113+
describe "Buff" $ do
114+
it "should add" $ do
115+
nullBuff `addBuff` nullBuff
116+
`shouldBe` (Nothing, nullBuff)
117+
nullBuff `addBuff` (mkBuff 4 0b00000101)
118+
`shouldBe` (Nothing, mkBuff 4 0b00000101)
119+
(mkBuff 1 0b00000001) `addBuff` (mkBuff 1 0b00000001)
120+
`shouldBe` (Nothing, mkBuff 2 0b00000011)
121+
(mkBuff 4 0b00000101) `addBuff` (mkBuff 4 0b00000101)
122+
`shouldBe` (Just 0b01010101, nullBuff)
123+
(mkBuff 6 0b00010101) `addBuff` (mkBuff 4 0b00001010)
124+
`shouldBe` (Just 0b10010101, (mkBuff 2 0b00000010))
125+
(mkBuff 6 0b00010101) `addBuff` (mkBuff 7 0b01010101)
126+
`shouldBe` (Just 0b01010101, (mkBuff 5 0b00010101))
127+
128+
spec_stream :: Spec
129+
spec_stream = do
130+
describe "Stream" $ do
131+
it "should be a monoid" $ do
132+
(S [] nullBuff 0 `mappend` S [] (mkBuff 4 0b10100000) 4)
133+
`shouldBe` (S [] (mkBuff 4 0b10100000) 4)
134+
(S [1] nullBuff 8 `mappend` S [2] (mkBuff 4 0b10100000) 12)
135+
`shouldBe` (S [1,2] (mkBuff 4 0b10100000) 20)
136+
(S [] (mkBuff 4 0b00001010) 4 `mappend` S [] (mkBuff 4 0b00000101) 4)
137+
`shouldBe` (S [0b01011010] nullBuff 8)
138+
139+
-- 101 + 10101010 101
140+
-- = 10110101 010101
141+
(S [] (mkBuff 3 0b00000101) 3) `mappend` (S [0b01010101] (mkBuff 3 0b00000101) 11)
142+
`shouldBe` (S [0b10101101] (mkBuff 6 0b00101010) 14)
143+
144+
ls "101" `mappend` (ls "10101010 101") `shouldBe` (ls "10110101 010101")
145+
146+
-- 01010101 101 + 10101010 11001100 000111
147+
-- = 01010101 10110101 01011001 10000011 1
148+
149+
ls "01010101 101" `mappend` (ls "10101010 11001100 000111")
150+
`shouldBe` (ls "01010101 10110101 01011001 10000011 1")
151+
152+
(S [0b10101010] (mkBuff 3 0b00000101) 11) `mappend` (S [0b01010101, 0b00110011] (mkBuff 6 0b00111000) 22)
153+
`shouldBe` (S [0b10101010,0b10101101,0b10011010,0b11000001] (mkBuff 1 0b00000001) 33)
154+
155+
spec_bitstream :: Spec
156+
spec_bitstream = do
157+
describe "Bitstream" $ do
158+
it "should track location" $ do
159+
evalBitstream 0 (loc) `shouldBe` 0
160+
evalBitstream 0 (emitBit True >> loc) `shouldBe` 1
161+
evalBitstream 0 (emitBit True >> emitBit False >> loc) `shouldBe` 2
162+
evalBitstream 0 (bitstream [] (Buff 1 1) 1 >> bitstream [] (Buff 6 0) 6 >> loc) `shouldBe` 7
163+
evalBitstream 0 (emitBit True >> alignWord8 >> loc) `shouldBe` 8
164+
165+
it "should produce word aligned results" $ do
166+
execBitstream 0 (pure ()) `shouldBe` []
167+
execBitstream 0 (emitBit False) `shouldBe` [0b00000000]
168+
execBitstream 0 (emitBit True) `shouldBe` [0b00000001]
169+
execBitstream 0 (emitBit True >> emitBit False >> emitBit True) `shouldBe` [0b00000101]
170+
171+
it "should produce the proper darwin header" $ do
172+
execBitstream 0 (withHeader True (pure ())) `shouldBe`
173+
[ 0xde, 0xc0, 0x17, 0x0b -- 0x0b17c0de header
174+
, 0x00, 0x00, 0x00, 0x00 -- version: 0
175+
, 0x14, 0x00, 0x00, 0x00 -- offset: 20
176+
, 0x04, 0x00, 0x00, 0x00 -- body length: 4 (llvmheader)
177+
, 0x07, 0x00, 0x00, 0x01 -- cpu type: ABI64 | X86
178+
, 0x42, 0x43, 0xc0, 0xde -- LLVM header. "BC" 0x0de
179+
]
180+
it "should be able to emit a fixed number of bits" $ do
181+
execBitstream 0 (emitFixed 6 0) `shouldBe` [0x00]
182+
execBitstream 0 (emitFixed 6 1) `shouldBe` (words "10000000")
183+
execBitstream 0 (emitFixed 6 2) `shouldBe` (words "01000000")
184+
execBitstream 0 (emitFixed 6 1 >> emitFixed 6 2)
185+
`shouldBe` (words "100000 010000 0000")
186+
187+
it "should be able to emit a variable number of bits" $ do
188+
execBitstream 0 (emitVBR 3 1) `shouldBe` (words "10000000")
189+
execBitstream 0 (emitVBR 3 2) `shouldBe` (words "01000000")
190+
execBitstream 0 (emitVBR 3 3) `shouldBe` (words "11000000")
191+
execBitstream 0 (emitVBR 3 4) `shouldBe` (words "00110000")
192+
execBitstream 0 (emitVBR 3 5) `shouldBe` (words "10110000")
193+
execBitstream 0 (emitVBR 3 9) `shouldBe` (words "10101000")
194+
195+
execBitstream 0 (emitVBR 4 0) `shouldBe` (words "00000000")
196+
execBitstream 0 (emitVBR 4 1) `shouldBe` (words "10000000")
197+
execBitstream 0 (emitVBR 4 2) `shouldBe` (words "01000000")
198+
execBitstream 0 (emitVBR 4 4) `shouldBe` (words "00100000")
199+
execBitstream 0 (emitVBR 4 8) `shouldBe` (words "00011000")
200+
execBitstream 0 (emitVBR 4 16) `shouldBe` (words "00010100")
201+
execBitstream 0 (emitVBR 4 32) `shouldBe` (words "00010010")
202+
execBitstream 0 (emitVBR 4 64) `shouldBe` (words "00010001 10000000")
203+
204+
it "should be able to emit char6 encoded data" $ do
205+
execBitstream 0 (mapM emitChar6 ("abcd" :: String))
206+
`shouldBe` (words "000000 100000 010000 110000")
207+
208+
it "handle withOffset" $ do
209+
True `shouldBe` True
210+
let action :: Bitstream ()
211+
action = mdo
212+
emitWord32 n
213+
n <- withOffset 0 $ do
214+
emitWord32 0
215+
emitWord32 0xffffffff
216+
emitWord32 0
217+
emitWord32 0xff00ff00
218+
locWords -- should be two now.
219+
emitWord8 3
220+
pure ()
221+
execBitstream 0 action `shouldBe`
222+
[ 0x04, 0x00, 0x00, 0x00 -- four words
223+
, 0x00, 0x00, 0x00, 0x00 -- word 1
224+
, 0xff, 0xff, 0xff, 0xff -- word 2
225+
, 0x00, 0x00, 0x00, 0x00 -- word 3
226+
, 0x00, 0xff, 0x00, 0xff -- word 4
227+
, 0x03 ]
228+
229+
it "should align to word32" $ do
230+
execBitstream 0 (emitFixed 6 1 >> alignWord32)
231+
`shouldBe` (words "1000 0000 0000 0000 0000 0000 0000 0000")
232+
execBitstream 0 (emitFixed 6 1 >> emitFixed 6 1 >> alignWord32)
233+
`shouldBe` (words "1000 0010 0000 0000 0000 0000 0000 0000")
234+
execBitstream 0 (emitWord32 0 >> alignWord32)
235+
`shouldBe` [0x00,0x00,0x00,0x00]
236+
237+
spec_bitcode :: Spec
238+
spec_bitcode = do
239+
describe "bitcode serializer" $ do
240+
it "should emit a simple empy block" $ do
241+
-- emit a block with id 1, and no body.
242+
let action = emitTopLevel [Block 1 3 []]
243+
result = execBitstream 0 (action)
244+
result `shouldBe`
245+
(words $ "10 10000000 1100" -- enter block (1), block id (1), len (3)
246+
++ "00 0000 0000 0000 0000" -- align 32 bits
247+
++ "1000 0000 0000 0000 0000 0000 0000 0000" -- body contains 1 word
248+
++ "000" -- end block (0)
249+
++ "0 0000 0000 0000 0000 0000 0000 0000" -- align 32 bits
250+
)
251+
252+
-- it "should serialize an ident block" $ do
253+
-- let bc = map denormalize $ toBitCode (Ident "LLVM" Current)
254+
-- result = execBitstream 0 (emitTopLevel bc)
255+
-- result `shouldBe`
256+
-- (words $ "10 10110000 0100" -- block: 13, len: 2 2+8+4 = 14
257+
-- ++ "00 0000 0000 0000 0000" -- 2+4*4 = 18
258+
-- ++ "1100 0000 0000 0000 0000 0000 0000 0000" -- body contains 3 words
259+
-- ++ "11 100000 001000" -- unabbrev record: 1, 4 ops 2+6+6 = 14
260+
-- ++ "001101010000" -- vbr:6 ('L') = 12
261+
-- ++ "001101010000" -- vbr:6 ('L') = 12
262+
-- ++ "011011010000" -- vbr:6 ('V') = 12
263+
-- ++ "101101010000" -- vbr:6 ('M') = 12
264+
-- ++ "11 010000 100000" -- unabbrev record: 2, 1 op 2+6+6 = 14
265+
-- ++ "000000" -- vbr:6 (0) = 6
266+
-- ++ "00" -- end block (0) = 2
267+
-- -- sum = 84 = 2 * 32 + 20
268+
-- ++ "0000 0000 0000" -- align to word boundary.
269+
-- )
270+
-- it "should serialize an empty module" $ do
271+
-- let bc = toBitCode (Just (Ident "LLVM" Current), Module { mVersion = 1
272+
-- , mTriple = Nothing
273+
-- , mDatalayout = Nothing
274+
-- , mValues = []
275+
-- , mDecls = []
276+
-- , mDefns = []
277+
-- , mFns = []
278+
-- , mConsts = []
279+
-- , mTypes = []})
280+
-- bc' = map denormalize bc
281+
-- result = execBitstream 0 (emitTopLevel bc')
282+
-- result `shouldBe`
283+
-- [ 0x35, 0x08, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00
284+
-- , 0x07, 0x04, 0x2b, 0xb0, 0x82, 0x2d, 0xb4, 0xc2
285+
-- , 0x42, 0x00, 0x00, 0x00, 0x21, 0x14, 0x00, 0x00
286+
-- , 0x08, 0x00, 0x00, 0x00, 0x23, 0x08, 0x82, 0x10
287+
-- , 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00
288+
-- , 0x07, 0x01, 0x00, 0x00, 0xc1, 0x41, 0x00, 0x00
289+
-- , 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
290+
-- , 0x00, 0x00, 0x00, 0x00
291+
-- ]
292+

test/Tasty.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --tree-display #-}
2+

0 commit comments

Comments
 (0)