|
| 1 | +{-# LANGUAGE UndecidableInstances #-} |
| 2 | +{-# LANGUAGE FlexibleInstances #-} |
| 3 | +{-# LANGUAGE RankNTypes #-} |
| 4 | +module Data.BitCode where |
| 5 | + |
| 6 | +import Data.Word (Word32, Word64) |
| 7 | +import Data.Maybe (catMaybes) |
| 8 | +import Data.Bits (FiniteBits, finiteBitSize, countLeadingZeros) |
| 9 | + |
| 10 | + |
| 11 | +--- Bit Codes ------------------------------------------------------------------ |
| 12 | +-- see BitCodes.h (e.g. http://llvm.org/docs/doxygen/html/BitCodes_8h_source.html) |
| 13 | +-- * Bits |
| 14 | +type Bit = Bool |
| 15 | +type Bits = [Bool] |
| 16 | + |
| 17 | +-- * BitCode |
| 18 | +type BlockId = Int |
| 19 | +type Code = Int |
| 20 | + |
| 21 | +-- * Source location |
| 22 | +type Loc = (Int, Int) -- Words, Bits |
| 23 | + |
| 24 | +-- | Bit Code data values can be 64bit wide. |
| 25 | +type Val = Word64 |
| 26 | +data EncVal = Fixed !Val -- code 1 fixed value |
| 27 | + | VBR !Val -- code 2 vbr value |
| 28 | + | Arr -- code 3 Array -- the documentation sais, an Array needs to be followed by an op. |
| 29 | + -- when reading an array, the first is a vbr6 field indicating the length. |
| 30 | + | Char6 -- code 4 6-bit char |
| 31 | + | Blob -- code 5 note: the value for this is: [vbr6:val,pad32bit,8bit array,pad32bit] |
| 32 | + deriving Show |
| 33 | + |
| 34 | +-- | Operators for abbreviated records, are encoded as either literal (1) or encoded value (0). |
| 35 | +data Op = Lit !Val -- [1,vbr8:val] |
| 36 | + | Enc !EncVal -- [0,f3:enc(,vbr5:val)?], vbr5 value only if given. |
| 37 | + deriving Show |
| 38 | + |
| 39 | +-- | The Fields contained in an abbreviated record can be one of the following. |
| 40 | +data Field = Vbr !Int !Val |
| 41 | + | Fix !Int !Val |
| 42 | + | Len !Val |
| 43 | + | Chr !Char |
| 44 | + | W64 !Val -- Literal values. These are not bein emitted. |
| 45 | + -- WARN: this is somewhat a hack, to make parsing and writing identical to id, |
| 46 | + -- without having to track abbreviations in the writer and ensure the |
| 47 | + -- abbreviated record matches the def abbrev. This could be considered |
| 48 | + -- a TODO, as it would be an improvement to enforce the that AbbrevRecord |
| 49 | + -- matches the actuall DefAbbrev. |
| 50 | + deriving Show |
| 51 | + |
| 52 | +-- | Bit Code Data consists of a series of blocks. Their interpretation is dependent |
| 53 | +-- on the container they are in. The top level blocks are emitted with an abbreviation |
| 54 | +-- width of 2. This allows the following four block types, which allow to define any |
| 55 | +-- other set of blocks. |
| 56 | +data BitCode |
| 57 | + -- | Combine ENTER_SUBBLOCK(1) with END_BLOCK(0) |
| 58 | + -- Layout: [1,vbr8:id,vbr4:newabbrevlen,<align32bits>,32bit:blocklen,<blocklen * words>,0,<align32bits>] |
| 59 | + -- 1 and 0 are vbr(current abbrev len); starting with 2 at the top level. |
| 60 | + = Block { blockId :: !BlockId -- ^ id |
| 61 | + , blockAbbrevLen :: !Int -- ^ abbrev len |
| 62 | + , blockBody :: ![BitCode] -- ^ body |
| 63 | + } |
| 64 | + -- | A abbreviation definition record. Layout: [2,vbr5:#ops,op0,op1,...] |
| 65 | + | DefAbbrevRecord { defRecordOps :: ![Op] |
| 66 | + } |
| 67 | + -- | An unabbreviated record. Layout: [3,vbr6:code,vbr6:#ops,vbr6:op0,...] |
| 68 | + | UnabbrevRecord { uRecordCode :: !Val -- ^ code encoded vbr6 |
| 69 | + , uRecordOps :: ![Val] -- ^ generic ops, encoded vbr6 |
| 70 | + } |
| 71 | + -- | An abbreviated record. Layout [<abbrevcode>, fields, ...] |
| 72 | + | AbbrevRecord { aRecordCode :: !Code |
| 73 | + , aRecordFields :: ![Field] |
| 74 | + } |
| 75 | + | Located { srcLoc :: (Loc, Loc), unLoc :: !BitCode } |
| 76 | + deriving Show |
| 77 | + |
| 78 | +-- | BitCode contains some additional control information, |
| 79 | +-- like abbreviation records, or the BLOCKINFO block, which |
| 80 | +-- assist in decoding, but provide no information after |
| 81 | +-- parsing the bitcode. Normalized bitcode is a simpler |
| 82 | +-- structure consisting of only Blocks and Records. |
| 83 | +-- |
| 84 | +-- Note: Normalized BitCode will erase location information. |
| 85 | +data NBitCode |
| 86 | + = NBlock !BlockId ![NBitCode] |
| 87 | + | NRec !Code ![Val] |
| 88 | + deriving Show |
| 89 | + |
| 90 | +idOrCode :: NBitCode -> Int |
| 91 | +idOrCode (NBlock i _) = i |
| 92 | +idOrCode (NRec i _) = i |
| 93 | + |
| 94 | +normalize :: BitCode -> Maybe NBitCode |
| 95 | +normalize (Block 0 _ _) = Nothing |
| 96 | +normalize (Block id _ b) = Just (NBlock id (catMaybes . map normalize $ b)) |
| 97 | +normalize (DefAbbrevRecord{}) = Nothing |
| 98 | +normalize (Located _ bs) = normalize bs |
| 99 | +normalize (UnabbrevRecord c vs) = Just (NRec (fromIntegral c) vs) |
| 100 | +normalize (AbbrevRecord _ flds) = let (code:ops) = map toVal . filter (not . isControl) $ flds |
| 101 | + in Just (NRec (fromIntegral code) ops) |
| 102 | + where |
| 103 | + -- As Abbreviated records can contain arrays, and |
| 104 | + -- arrays have thier length encoded in the field, |
| 105 | + -- Ops is anything but array length. |
| 106 | + -- |
| 107 | + -- NOTE: This way we don't have to go back to the |
| 108 | + -- abbrev definition to figure out which |
| 109 | + -- ops are control ops and which are not. |
| 110 | + isControl :: Field -> Bool |
| 111 | + isControl (Len _) = True |
| 112 | + isControl _ = False |
| 113 | + |
| 114 | + toVal :: Field -> Val |
| 115 | + toVal (Vbr _ n) = n |
| 116 | + toVal (Fix _ n) = n |
| 117 | + toVal (Len _) = error "Len is a control op" |
| 118 | + toVal (Chr c) = fromIntegral . fromEnum $ c |
| 119 | + toVal (W64 v) = v |
| 120 | + |
| 121 | +bitWidth :: (FiniteBits a) => a -> Int |
| 122 | +bitWidth x = finiteBitSize x - countLeadingZeros x |
| 123 | + |
| 124 | +-- | Extract the id or the code for a BitCode element |
| 125 | +denormalize :: NBitCode -> BitCode |
| 126 | +denormalize (NBlock id bs) = let bs' = map denormalize bs |
| 127 | + ids = map idOrCode bs |
| 128 | + abbrevWidth = if ids == [] |
| 129 | + then 0 |
| 130 | + else max 2 (bitWidth (maximum ids)) |
| 131 | + in Block id abbrevWidth (map denormalize bs) |
| 132 | +denormalize (NRec c vs) = UnabbrevRecord (fromIntegral c) vs |
| 133 | + |
| 134 | +records :: (Enum a) => [NBitCode] -> [(a, [Val])] |
| 135 | +records bs = [(toEnum c, vs) | NRec c vs <- bs] |
| 136 | +blocks :: (Enum a) => [NBitCode] -> [(a,[NBitCode])] |
| 137 | +blocks bs = [(toEnum c, bs') | NBlock c bs' <- bs] |
| 138 | + |
| 139 | +lookupBlock :: (Enum a) => a -> [NBitCode] -> Maybe [NBitCode] |
| 140 | +lookupBlock e bs = lookup (fromEnum e) [(c,b) | NBlock c b <- bs] |
| 141 | + |
| 142 | +lookupRecord :: (Enum a) => a -> [NBitCode] -> Maybe [Val] |
| 143 | +lookupRecord e bs = lookup (fromEnum e) [(c,v) | NRec c v <- bs] |
| 144 | + |
| 145 | +-------------------------------------------------------------------------------- |
| 146 | +-- Turn things into Val's for use in records |
| 147 | +class ToVal a where |
| 148 | + toVal :: a -> [Val] |
| 149 | + |
| 150 | +instance {-# OVERLAPPABLE #-} (Enum a) => ToVal a where |
| 151 | + toVal = pure . fromIntegral . fromEnum |
| 152 | + |
| 153 | +instance {-# OVERLAPPING #-} (ToVal a) => ToVal [a] where |
| 154 | + toVal = concatMap toVal |
| 155 | + |
| 156 | +-------------------------------------------------------------------------------- |
| 157 | +-- NBitCode construction |
| 158 | +mkBlock :: (Enum a) => a -> [NBitCode] -> NBitCode |
| 159 | +mkBlock e = NBlock (fromEnum e) |
| 160 | + |
| 161 | +mkRec :: (Enum a, ToVal b) => a -> b -> NBitCode |
| 162 | +mkRec e = NRec (fromEnum e) . toVal |
| 163 | + |
| 164 | +mkEmptyRec :: (Enum a) => a -> NBitCode |
| 165 | +mkEmptyRec e = NRec (fromEnum e) [] |
0 commit comments