Skip to content

Commit a4c5dc0

Browse files
committed
initial checkin
0 parents  commit a4c5dc0

File tree

18 files changed

+1009
-0
lines changed

18 files changed

+1009
-0
lines changed

.gitignore

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
dist
2+
dist-*
3+
cabal-dev
4+
*.o
5+
*.hi
6+
*.chi
7+
*.chs.h
8+
*.dyn_o
9+
*.dyn_hi
10+
.hpc
11+
.hsenv
12+
.cabal-sandbox/
13+
cabal.sandbox.config
14+
*.prof
15+
*.aux
16+
*.hp
17+
*.eventlog
18+
.stack-work/
19+
cabal.project.local

LICENSE

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Copyright Moritz Angermann (c) 2016
2+
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
* Redistributions of source code must retain the above copyright
9+
notice, this list of conditions and the following disclaimer.
10+
11+
* Redistributions in binary form must reproduce the above
12+
copyright notice, this list of conditions and the following
13+
disclaimer in the documentation and/or other materials provided
14+
with the distribution.
15+
16+
* Neither the name of Moritz Angermann nor the names of other
17+
contributors may be used to endorse or promote products derived
18+
from this software without specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Setup.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import Distribution.Simple
2+
main = defaultMain

data-bitcode.cabal

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: data-bitcode
2+
version: 0.1.0.0
3+
synopsis: bitcode reader and writer.
4+
description: Please see README.md
5+
homepage: https://github.com/lichtzwerge/data-bitcode#readme
6+
license: BSD3
7+
license-file: LICENSE
8+
author: Moritz Angermann
9+
maintainer: moritz@lichtzwerge.de
10+
copyright: lichtzwerge GmbH
11+
category: Code Generation
12+
build-type: Simple
13+
-- extra-source-files:
14+
cabal-version: >=1.10
15+
16+
library
17+
hs-source-dirs: src
18+
exposed-modules: Data.BitCode
19+
, Data.BitCode.AbbrevOpEncoding
20+
, Data.BitCode.Codes.BlockInfo
21+
, Data.BitCode.IDs.FixedAbbrev
22+
, Data.BitCode.IDs.StandardBlock
23+
, Data.BitCode.Reader
24+
, Data.BitCode.Reader.Monad
25+
, Data.BitCode.Reader.Combinators
26+
, Data.BitCode.Reader.FromBits
27+
, Data.BitCode.Writer
28+
, Data.BitCode.Writer.Monad
29+
, Data.BitCode.Writer.Combinators
30+
, Data.BitCode.Writer.ToBits
31+
build-depends: base >= 4.7 && < 5
32+
, bytestring >= 0.10
33+
, base16-bytestring
34+
default-language: Haskell2010
35+
36+
source-repository head
37+
type: git
38+
location: https://github.com/lichtzwerge/data-bitcode

src/Data/BitCode.hs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
{-# LANGUAGE UndecidableInstances #-}
2+
{-# LANGUAGE FlexibleInstances #-}
3+
{-# LANGUAGE RankNTypes #-}
4+
module Data.BitCode where
5+
6+
import Data.Word (Word32, Word64)
7+
import Data.Maybe (catMaybes)
8+
import Data.Bits (FiniteBits, finiteBitSize, countLeadingZeros)
9+
10+
11+
--- Bit Codes ------------------------------------------------------------------
12+
-- see BitCodes.h (e.g. http://llvm.org/docs/doxygen/html/BitCodes_8h_source.html)
13+
-- * Bits
14+
type Bit = Bool
15+
type Bits = [Bool]
16+
17+
-- * BitCode
18+
type BlockId = Int
19+
type Code = Int
20+
21+
-- * Source location
22+
type Loc = (Int, Int) -- Words, Bits
23+
24+
-- | Bit Code data values can be 64bit wide.
25+
type Val = Word64
26+
data EncVal = Fixed !Val -- code 1 fixed value
27+
| VBR !Val -- code 2 vbr value
28+
| Arr -- code 3 Array -- the documentation sais, an Array needs to be followed by an op.
29+
-- when reading an array, the first is a vbr6 field indicating the length.
30+
| Char6 -- code 4 6-bit char
31+
| Blob -- code 5 note: the value for this is: [vbr6:val,pad32bit,8bit array,pad32bit]
32+
deriving Show
33+
34+
-- | Operators for abbreviated records, are encoded as either literal (1) or encoded value (0).
35+
data Op = Lit !Val -- [1,vbr8:val]
36+
| Enc !EncVal -- [0,f3:enc(,vbr5:val)?], vbr5 value only if given.
37+
deriving Show
38+
39+
-- | The Fields contained in an abbreviated record can be one of the following.
40+
data Field = Vbr !Int !Val
41+
| Fix !Int !Val
42+
| Len !Val
43+
| Chr !Char
44+
| W64 !Val -- Literal values. These are not bein emitted.
45+
-- WARN: this is somewhat a hack, to make parsing and writing identical to id,
46+
-- without having to track abbreviations in the writer and ensure the
47+
-- abbreviated record matches the def abbrev. This could be considered
48+
-- a TODO, as it would be an improvement to enforce the that AbbrevRecord
49+
-- matches the actuall DefAbbrev.
50+
deriving Show
51+
52+
-- | Bit Code Data consists of a series of blocks. Their interpretation is dependent
53+
-- on the container they are in. The top level blocks are emitted with an abbreviation
54+
-- width of 2. This allows the following four block types, which allow to define any
55+
-- other set of blocks.
56+
data BitCode
57+
-- | Combine ENTER_SUBBLOCK(1) with END_BLOCK(0)
58+
-- Layout: [1,vbr8:id,vbr4:newabbrevlen,<align32bits>,32bit:blocklen,<blocklen * words>,0,<align32bits>]
59+
-- 1 and 0 are vbr(current abbrev len); starting with 2 at the top level.
60+
= Block { blockId :: !BlockId -- ^ id
61+
, blockAbbrevLen :: !Int -- ^ abbrev len
62+
, blockBody :: ![BitCode] -- ^ body
63+
}
64+
-- | A abbreviation definition record. Layout: [2,vbr5:#ops,op0,op1,...]
65+
| DefAbbrevRecord { defRecordOps :: ![Op]
66+
}
67+
-- | An unabbreviated record. Layout: [3,vbr6:code,vbr6:#ops,vbr6:op0,...]
68+
| UnabbrevRecord { uRecordCode :: !Val -- ^ code encoded vbr6
69+
, uRecordOps :: ![Val] -- ^ generic ops, encoded vbr6
70+
}
71+
-- | An abbreviated record. Layout [<abbrevcode>, fields, ...]
72+
| AbbrevRecord { aRecordCode :: !Code
73+
, aRecordFields :: ![Field]
74+
}
75+
| Located { srcLoc :: (Loc, Loc), unLoc :: !BitCode }
76+
deriving Show
77+
78+
-- | BitCode contains some additional control information,
79+
-- like abbreviation records, or the BLOCKINFO block, which
80+
-- assist in decoding, but provide no information after
81+
-- parsing the bitcode. Normalized bitcode is a simpler
82+
-- structure consisting of only Blocks and Records.
83+
--
84+
-- Note: Normalized BitCode will erase location information.
85+
data NBitCode
86+
= NBlock !BlockId ![NBitCode]
87+
| NRec !Code ![Val]
88+
deriving Show
89+
90+
idOrCode :: NBitCode -> Int
91+
idOrCode (NBlock i _) = i
92+
idOrCode (NRec i _) = i
93+
94+
normalize :: BitCode -> Maybe NBitCode
95+
normalize (Block 0 _ _) = Nothing
96+
normalize (Block id _ b) = Just (NBlock id (catMaybes . map normalize $ b))
97+
normalize (DefAbbrevRecord{}) = Nothing
98+
normalize (Located _ bs) = normalize bs
99+
normalize (UnabbrevRecord c vs) = Just (NRec (fromIntegral c) vs)
100+
normalize (AbbrevRecord _ flds) = let (code:ops) = map toVal . filter (not . isControl) $ flds
101+
in Just (NRec (fromIntegral code) ops)
102+
where
103+
-- As Abbreviated records can contain arrays, and
104+
-- arrays have thier length encoded in the field,
105+
-- Ops is anything but array length.
106+
--
107+
-- NOTE: This way we don't have to go back to the
108+
-- abbrev definition to figure out which
109+
-- ops are control ops and which are not.
110+
isControl :: Field -> Bool
111+
isControl (Len _) = True
112+
isControl _ = False
113+
114+
toVal :: Field -> Val
115+
toVal (Vbr _ n) = n
116+
toVal (Fix _ n) = n
117+
toVal (Len _) = error "Len is a control op"
118+
toVal (Chr c) = fromIntegral . fromEnum $ c
119+
toVal (W64 v) = v
120+
121+
bitWidth :: (FiniteBits a) => a -> Int
122+
bitWidth x = finiteBitSize x - countLeadingZeros x
123+
124+
-- | Extract the id or the code for a BitCode element
125+
denormalize :: NBitCode -> BitCode
126+
denormalize (NBlock id bs) = let bs' = map denormalize bs
127+
ids = map idOrCode bs
128+
abbrevWidth = if ids == []
129+
then 0
130+
else max 2 (bitWidth (maximum ids))
131+
in Block id abbrevWidth (map denormalize bs)
132+
denormalize (NRec c vs) = UnabbrevRecord (fromIntegral c) vs
133+
134+
records :: (Enum a) => [NBitCode] -> [(a, [Val])]
135+
records bs = [(toEnum c, vs) | NRec c vs <- bs]
136+
blocks :: (Enum a) => [NBitCode] -> [(a,[NBitCode])]
137+
blocks bs = [(toEnum c, bs') | NBlock c bs' <- bs]
138+
139+
lookupBlock :: (Enum a) => a -> [NBitCode] -> Maybe [NBitCode]
140+
lookupBlock e bs = lookup (fromEnum e) [(c,b) | NBlock c b <- bs]
141+
142+
lookupRecord :: (Enum a) => a -> [NBitCode] -> Maybe [Val]
143+
lookupRecord e bs = lookup (fromEnum e) [(c,v) | NRec c v <- bs]
144+
145+
--------------------------------------------------------------------------------
146+
-- Turn things into Val's for use in records
147+
class ToVal a where
148+
toVal :: a -> [Val]
149+
150+
instance {-# OVERLAPPABLE #-} (Enum a) => ToVal a where
151+
toVal = pure . fromIntegral . fromEnum
152+
153+
instance {-# OVERLAPPING #-} (ToVal a) => ToVal [a] where
154+
toVal = concatMap toVal
155+
156+
--------------------------------------------------------------------------------
157+
-- NBitCode construction
158+
mkBlock :: (Enum a) => a -> [NBitCode] -> NBitCode
159+
mkBlock e = NBlock (fromEnum e)
160+
161+
mkRec :: (Enum a, ToVal b) => a -> b -> NBitCode
162+
mkRec e = NRec (fromEnum e) . toVal
163+
164+
mkEmptyRec :: (Enum a) => a -> NBitCode
165+
mkEmptyRec e = NRec (fromEnum e) []
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
module Data.BitCode.AbbrevOpEncoding where
2+
3+
-- | BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
4+
-- This is actually a union of two different things:
5+
-- 1. It could be a literal integer value ("the operand is always 17").
6+
-- 2. It could be an encoding specification ("this operand encoded like so").
7+
--
8+
-- These are encoded as
9+
data AbbrevOpEncoding
10+
-- | Placehodler for 0. Do not use.
11+
= Unused
12+
-- | A fixed width field, Val specifies number of bits.
13+
| Fixed
14+
-- | A VBR field where Val specifies the width of each chunk.
15+
| VBR
16+
-- | A sequence of fields, next field species elt encoding.
17+
| Array
18+
-- | A 6-bit fixed field which maps to [a-zA-Z0-9._].
19+
| Char6
20+
-- | 32-bit aligned array of 8-bit characters.
21+
| Blob
22+
deriving (Show, Enum)

src/Data/BitCode/Abbreviation.hs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
module Data.BitCode.Abbreviation
2+
( addAbbrev, lookupAbbrev
3+
, addGlobalAbbrev, lookupGlobalAbbrev
4+
, AbbrevMap
5+
, GlobalAbbrevMap
6+
)
7+
where
8+
9+
import Data.BitCode
10+
import Data.Maybe (fromMaybe)
11+
12+
newtype AbbrevMap = AbbrevMap [(Code, BitCode)] deriving Show
13+
newtype GlobalAbbrevMap = GlobalAbbrevMap [(BlockId, AbbrevMap)] deriving Show
14+
15+
instance Monoid AbbrevMap where
16+
mempty = AbbrevMap []
17+
(AbbrevMap m) `mappend` (AbbrevMap n) = AbbrevMap (m ++ n)
18+
19+
instance Monoid GlobalAbbrevMap where
20+
mempty = GlobalAbbrevMap []
21+
(GlobalAbbrevMap m) `mappend` (GlobalAbbrevMap n) = GlobalAbbrevMap (m ++ n)
22+
23+
lookupGlobalAbbrev :: GlobalAbbrevMap -> BlockId -> AbbrevMap
24+
lookupGlobalAbbrev (GlobalAbbrevMap g) blockId = fromMaybe mempty (lookup blockId g)
25+
26+
addGlobalAbbrev :: GlobalAbbrevMap -> BlockId -> BitCode -> GlobalAbbrevMap
27+
addGlobalAbbrev (GlobalAbbrevMap g) blockId block = GlobalAbbrevMap g'
28+
where g' = go g blockId block
29+
go :: [(BlockId, AbbrevMap)] -> BlockId -> BitCode -> [(BlockId, AbbrevMap)]
30+
go [] id b = [(blockId, addAbbrev mempty block)]
31+
go (gb@(id', bs):g') id block | id == id' = (id, addAbbrev bs block):go g' id block
32+
| otherwise = gb:go g' id block
33+
34+
lookupAbbrev :: AbbrevMap -> Code -> Maybe BitCode
35+
lookupAbbrev (AbbrevMap m) = flip lookup m
36+
37+
addAbbrev :: AbbrevMap -> BitCode -> AbbrevMap
38+
addAbbrev (AbbrevMap m) r@(DefAbbrevRecord ops) = AbbrevMap $ (nextId,r):m
39+
where nextId = 1 + foldr max 3 (map fst m)
40+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module Data.BitCode.Codes.BlockInfo where
2+
3+
-- | BlockInfoCodes - The blockinfo block contains metadata about user-defined
4+
-- blocks.
5+
--
6+
-- DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
7+
-- block, instead of the BlockInfo block.
8+
data BlockInfo
9+
-- | Placeholder for 0. Do not use.
10+
= BLOCKINFO_CODE_UNDEFINED
11+
-- | SETBID: [blockid#]
12+
| BLOCKINFO_CODE_SETBID
13+
-- | BLOCKNAME: [name]
14+
| BLOCKINFO_CODE_BLOCKNAME
15+
-- | BLOCKINFO_CODE_SETRECORDNAME: [id, name]
16+
| BLOCKINFO_CODE_SETRECORDNAME
17+
deriving (Show, Enum)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
module Data.BitCode.IDs.FixedAbbrev where
2+
3+
-- | The standard abbrev namespace always has a way to exit a block, enter a
4+
-- nested block, define abbrevs, and define an unabbreviated record.
5+
data FixedAbbrev
6+
-- | Must be zero to guarantee termination for broken bitcode.
7+
= END_BLOCK
8+
| ENTER_SUBBLOCK
9+
-- | DEFINE_ABBREV - Defines an abbrev for the current block. It consists
10+
-- of a vbr5 for # operand infos. Each operand info is emitted with a
11+
-- single bit to indicate if it is a literal encoding. If so, the value is
12+
-- emitted with a vbr8. If not, the encoding is emitted as 3 bits followed
13+
-- by the info value as a vbr5 if needed.
14+
| DEFINE_ABBREV
15+
-- | UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
16+
-- a vbr6 for the # operands, followed by vbr6's for each operand.
17+
| UNABBREV_RECORD
18+
-- | This is not a code, this is a marker for the first abbrev assignment.
19+
| FIRST_APPLICATION_ABBREV
20+
deriving (Show, Enum)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
module Data.BitCode.IDs.StandardBlock where
2+
-- | StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
3+
-- block, which contains metadata about other blocks in the file.
4+
data StandardBlock
5+
-- | BLOCKINFO_BLOCK (0) is used to define metadata about blocks, for example,
6+
-- standard abbrevs that should be available to all blocks of a specified
7+
-- ID.
8+
= BLOCKINFO
9+
-- | Block IDs 1-7 are reserved for future expansion.
10+
| RESERVED_1 | RESERVED_2 | RESERVED_3 | RESERVED_4 | RESERVED_5 | RESERVED_6 | RESERVED_7
11+
-- | This is the marker for the first application block id (8).
12+
| FIRST_APPLICATION_BLOCKID
13+
deriving (Show, Enum)

0 commit comments

Comments
 (0)