angerman
diff --git a/‎.gitignore‎
Lines changed: 19 additions & 0 deletions b/‎.gitignore‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 30 additions & 0 deletions b/‎LICENSE‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎Setup.hs‎
Lines changed: 2 additions & 0 deletions b/‎Setup.hs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎data-bitcode.cabal‎
Lines changed: 38 additions & 0 deletions b/‎data-bitcode.cabal‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/Data/BitCode.hs‎
Lines changed: 165 additions & 0 deletions b/‎src/Data/BitCode.hs‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎src/Data/BitCode/AbbrevOpEncoding.hs‎
Lines changed: 22 additions & 0 deletions b/‎src/Data/BitCode/AbbrevOpEncoding.hs‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/Data/BitCode/Abbreviation.hs‎
Lines changed: 40 additions & 0 deletions b/‎src/Data/BitCode/Abbreviation.hs‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/Data/BitCode/Codes/BlockInfo.hs‎
Lines changed: 17 additions & 0 deletions b/‎src/Data/BitCode/Codes/BlockInfo.hs‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/Data/BitCode/IDs/FixedAbbrev.hs‎
Lines changed: 20 additions & 0 deletions b/‎src/Data/BitCode/IDs/FixedAbbrev.hs‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/Data/BitCode/IDs/StandardBlock.hs‎
Lines changed: 13 additions & 0 deletions b/‎src/Data/BitCode/IDs/StandardBlock.hs‎
Lines changed: 13 additions & 0 deletions
@@ -0,0 +1,19 @@
+dist
+dist-*
+cabal-dev
+*.o
+*.hi
+*.chi
+*.chs.h
+*.dyn_o
+*.dyn_hi
+.hpc
+.hsenv
+.cabal-sandbox/
+cabal.sandbox.config
+*.prof
+*.aux
+*.hp
+*.eventlog
+.stack-work/
+cabal.project.local
@@ -0,0 +1,30 @@
+Copyright Moritz Angermann (c) 2016
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Moritz Angermann nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
@@ -0,0 +1,38 @@
+name:                data-bitcode
+version:             0.1.0.0
+synopsis:            bitcode reader and writer.
+description:         Please see README.md
+homepage:            https://github.com/lichtzwerge/data-bitcode#readme
+license:             BSD3
+license-file:        LICENSE
+author:              Moritz Angermann
+maintainer:          moritz@lichtzwerge.de
+copyright:           lichtzwerge GmbH
+category:            Code Generation
+build-type:          Simple
+-- extra-source-files:
+cabal-version:       >=1.10
+
+library
+  hs-source-dirs:      src
+  exposed-modules:     Data.BitCode
+                     , Data.BitCode.AbbrevOpEncoding
+                     , Data.BitCode.Codes.BlockInfo
+                     , Data.BitCode.IDs.FixedAbbrev
+                     , Data.BitCode.IDs.StandardBlock
+                     , Data.BitCode.Reader
+                     , Data.BitCode.Reader.Monad
+                     , Data.BitCode.Reader.Combinators
+                     , Data.BitCode.Reader.FromBits
+                     , Data.BitCode.Writer
+                     , Data.BitCode.Writer.Monad
+                     , Data.BitCode.Writer.Combinators
+                     , Data.BitCode.Writer.ToBits
+  build-depends:       base >= 4.7 && < 5
+                     , bytestring >= 0.10
+                     , base16-bytestring
+  default-language:    Haskell2010
+
+source-repository head
+  type:     git
+  location: https://github.com/lichtzwerge/data-bitcode
@@ -0,0 +1,165 @@
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE RankNTypes #-}
+module Data.BitCode where
+
+import Data.Word  (Word32, Word64)
+import Data.Maybe (catMaybes)
+import Data.Bits (FiniteBits, finiteBitSize, countLeadingZeros)
+
+
+--- Bit Codes ------------------------------------------------------------------
+-- see BitCodes.h (e.g. http://llvm.org/docs/doxygen/html/BitCodes_8h_source.html)
+-- * Bits
+type Bit = Bool
+type Bits = [Bool]
+
+-- * BitCode
+type BlockId = Int
+type Code    = Int
+
+-- * Source location
+type Loc = (Int, Int) -- Words, Bits
+
+-- | Bit Code data values can be 64bit wide.
+type Val = Word64
+data EncVal = Fixed !Val   -- code 1 fixed value
+            | VBR   !Val   -- code 2 vbr value
+            | Arr          -- code 3 Array -- the documentation sais, an Array needs to be followed by an op.
+                                           -- when reading an array, the first is a vbr6 field indicating the length.
+            | Char6        -- code 4 6-bit char
+            | Blob         -- code 5 note: the value for this is: [vbr6:val,pad32bit,8bit array,pad32bit]
+            deriving Show
+
+-- | Operators for abbreviated records, are encoded as either literal (1) or encoded value (0).
+data Op = Lit !Val          -- [1,vbr8:val]
+        | Enc !EncVal       -- [0,f3:enc(,vbr5:val)?], vbr5 value only if given.
+        deriving Show
+
+-- | The Fields contained in an abbreviated record can be one of the following.
+data Field = Vbr !Int !Val
+           | Fix !Int !Val
+           | Len !Val
+           | Chr !Char
+           | W64 !Val         -- Literal values. These are not bein emitted.
+                              -- WARN: this is somewhat a hack, to make parsing and writing identical to id,
+                              --       without having to track abbreviations in the writer and ensure the
+                              --       abbreviated record matches the def abbrev. This could be considered
+                              --       a TODO, as it would be an improvement to enforce the that AbbrevRecord
+                              --       matches the actuall DefAbbrev.
+           deriving Show
+
+-- | Bit Code Data consists of a series of blocks. Their interpretation is dependent
+-- on the container they are in.  The top level blocks are emitted with an abbreviation
+-- width of 2. This allows the following four block types, which allow to define any
+-- other set of blocks.
+data BitCode
+  -- | Combine ENTER_SUBBLOCK(1) with END_BLOCK(0)
+  -- Layout: [1,vbr8:id,vbr4:newabbrevlen,<align32bits>,32bit:blocklen,<blocklen * words>,0,<align32bits>]
+  -- 1 and 0 are vbr(current abbrev len); starting with 2 at the top level.
+  = Block { blockId        :: !BlockId   -- ^ id
+          , blockAbbrevLen :: !Int       -- ^ abbrev len
+          , blockBody      :: ![BitCode] -- ^ body
+          }
+  -- | A abbreviation definition record. Layout: [2,vbr5:#ops,op0,op1,...]
+  | DefAbbrevRecord { defRecordOps :: ![Op]
+                    }
+  -- | An unabbreviated record. Layout: [3,vbr6:code,vbr6:#ops,vbr6:op0,...]
+  | UnabbrevRecord { uRecordCode :: !Val  -- ^ code         encoded vbr6
+                   , uRecordOps :: ![Val] -- ^ generic ops, encoded vbr6
+                   }
+  -- | An abbreviated record. Layout [<abbrevcode>, fields, ...]
+  | AbbrevRecord { aRecordCode   :: !Code
+                 , aRecordFields :: ![Field]
+                 }
+  | Located { srcLoc :: (Loc, Loc), unLoc :: !BitCode }
+  deriving Show
+
+-- | BitCode contains some additional control information,
+-- like abbreviation records, or the BLOCKINFO block, which
+-- assist in decoding, but provide no information after
+-- parsing the bitcode. Normalized bitcode is a simpler
+-- structure consisting of only Blocks and Records.
+--
+-- Note: Normalized BitCode will erase location information.
+data NBitCode
+  = NBlock !BlockId ![NBitCode]
+  | NRec   !Code    ![Val]
+  deriving Show
+
+idOrCode :: NBitCode -> Int
+idOrCode (NBlock i _) = i
+idOrCode (NRec i _) = i
+
+normalize :: BitCode -> Maybe NBitCode
+normalize (Block 0 _ _) = Nothing
+normalize (Block id _ b) = Just (NBlock id (catMaybes . map normalize $ b))
+normalize (DefAbbrevRecord{}) = Nothing
+normalize (Located _ bs) = normalize bs
+normalize (UnabbrevRecord c vs) = Just (NRec (fromIntegral c) vs)
+normalize (AbbrevRecord _ flds) = let (code:ops) = map toVal . filter (not . isControl) $ flds
+                                  in Just (NRec (fromIntegral code) ops)
+  where
+    -- As Abbreviated records can contain arrays, and
+    -- arrays have thier length encoded in the field,
+    -- Ops is anything but array length.
+    --
+    -- NOTE: This way we don't have to go back to the
+    --       abbrev definition to figure out which
+    --       ops are control ops and which are not.
+    isControl :: Field -> Bool
+    isControl (Len _) = True
+    isControl _       = False
+
+    toVal :: Field -> Val
+    toVal (Vbr _ n) = n
+    toVal (Fix _ n) = n
+    toVal (Len _)   = error "Len is a control op"
+    toVal (Chr c)   = fromIntegral . fromEnum $ c
+    toVal (W64 v)   = v
+
+bitWidth :: (FiniteBits a) => a -> Int
+bitWidth x = finiteBitSize x - countLeadingZeros x
+
+-- | Extract the id or the code for a BitCode element
+denormalize :: NBitCode -> BitCode
+denormalize (NBlock id bs) = let bs' = map denormalize bs
+                                 ids = map idOrCode bs
+                                 abbrevWidth = if ids == []
+                                               then 0
+                                               else max 2 (bitWidth (maximum ids))
+                             in Block id abbrevWidth (map denormalize bs)
+denormalize (NRec c vs) = UnabbrevRecord (fromIntegral c) vs
+
+records :: (Enum a) => [NBitCode] -> [(a, [Val])]
+records bs = [(toEnum c, vs) | NRec c vs <- bs]
+blocks  :: (Enum a) => [NBitCode] -> [(a,[NBitCode])]
+blocks bs = [(toEnum c, bs') | NBlock c bs' <- bs]
+
+lookupBlock :: (Enum a) => a -> [NBitCode] -> Maybe [NBitCode]
+lookupBlock e bs = lookup (fromEnum e) [(c,b) | NBlock c b <- bs]
+
+lookupRecord :: (Enum a) => a -> [NBitCode] -> Maybe [Val]
+lookupRecord e bs = lookup (fromEnum e) [(c,v) | NRec c v <- bs]
+
+--------------------------------------------------------------------------------
+-- Turn things into Val's for use in records
+class ToVal a where
+  toVal :: a -> [Val]
+
+instance {-# OVERLAPPABLE #-} (Enum a) => ToVal a where
+  toVal = pure . fromIntegral . fromEnum
+
+instance {-# OVERLAPPING #-} (ToVal a) => ToVal [a] where
+  toVal = concatMap toVal
+
+--------------------------------------------------------------------------------
+-- NBitCode construction
+mkBlock :: (Enum a) => a -> [NBitCode] -> NBitCode
+mkBlock e = NBlock (fromEnum e)
+
+mkRec :: (Enum a, ToVal b) => a -> b -> NBitCode
+mkRec e = NRec (fromEnum e) . toVal
+
+mkEmptyRec :: (Enum a) => a -> NBitCode
+mkEmptyRec e = NRec (fromEnum e) []
@@ -0,0 +1,22 @@
+module Data.BitCode.AbbrevOpEncoding where
+
+-- | BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
+-- This is actually a union of two different things:
+--   1. It could be a literal integer value ("the operand is always 17").
+--   2. It could be an encoding specification ("this operand encoded like so").
+--
+-- These are encoded as
+data AbbrevOpEncoding
+  -- | Placehodler for 0. Do not use.
+  = Unused
+  -- | A fixed width field, Val specifies number of bits.
+  | Fixed
+  -- | A VBR field where Val specifies the width of each chunk.
+  | VBR
+  -- | A sequence of fields, next field species elt encoding.
+  | Array
+  -- | A 6-bit fixed field which maps to [a-zA-Z0-9._].
+  | Char6
+  -- | 32-bit aligned array of 8-bit characters.
+  | Blob
+  deriving (Show, Enum)
@@ -0,0 +1,40 @@
+module Data.BitCode.Abbreviation
+  ( addAbbrev, lookupAbbrev
+  , addGlobalAbbrev, lookupGlobalAbbrev
+  , AbbrevMap
+  , GlobalAbbrevMap
+  )
+where
+
+import Data.BitCode
+import Data.Maybe (fromMaybe)
+
+newtype AbbrevMap = AbbrevMap [(Code, BitCode)] deriving Show
+newtype GlobalAbbrevMap = GlobalAbbrevMap [(BlockId, AbbrevMap)] deriving Show
+
+instance Monoid AbbrevMap where
+  mempty = AbbrevMap []
+  (AbbrevMap m) `mappend` (AbbrevMap n) = AbbrevMap (m ++ n)
+
+instance Monoid GlobalAbbrevMap where
+  mempty = GlobalAbbrevMap []
+  (GlobalAbbrevMap m) `mappend` (GlobalAbbrevMap n) = GlobalAbbrevMap (m ++ n)
+
+lookupGlobalAbbrev :: GlobalAbbrevMap -> BlockId -> AbbrevMap
+lookupGlobalAbbrev (GlobalAbbrevMap g) blockId = fromMaybe mempty (lookup blockId g)
+
+addGlobalAbbrev :: GlobalAbbrevMap -> BlockId -> BitCode -> GlobalAbbrevMap
+addGlobalAbbrev (GlobalAbbrevMap g) blockId block = GlobalAbbrevMap g'
+  where g' = go g blockId block
+        go :: [(BlockId, AbbrevMap)] -> BlockId -> BitCode -> [(BlockId, AbbrevMap)]
+        go [] id b = [(blockId, addAbbrev mempty block)]
+        go (gb@(id', bs):g') id block | id == id' = (id, addAbbrev bs block):go g' id block
+                                      | otherwise = gb:go g' id block
+
+lookupAbbrev :: AbbrevMap -> Code -> Maybe BitCode
+lookupAbbrev (AbbrevMap m) = flip lookup m
+
+addAbbrev :: AbbrevMap -> BitCode -> AbbrevMap
+addAbbrev (AbbrevMap m) r@(DefAbbrevRecord ops) = AbbrevMap $ (nextId,r):m
+  where nextId = 1 + foldr max 3 (map fst m)
+
@@ -0,0 +1,17 @@
+module Data.BitCode.Codes.BlockInfo where
+
+-- | BlockInfoCodes - The blockinfo block contains metadata about user-defined
+-- blocks.
+--
+-- DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+-- block, instead of the BlockInfo block.
+data BlockInfo
+  -- | Placeholder for 0. Do not use.
+  = BLOCKINFO_CODE_UNDEFINED
+  -- | SETBID: [blockid#]
+  | BLOCKINFO_CODE_SETBID
+  -- | BLOCKNAME: [name]
+  | BLOCKINFO_CODE_BLOCKNAME
+  -- | BLOCKINFO_CODE_SETRECORDNAME: [id, name]
+  | BLOCKINFO_CODE_SETRECORDNAME
+  deriving (Show, Enum)
@@ -0,0 +1,20 @@
+module Data.BitCode.IDs.FixedAbbrev where
+
+-- | The standard abbrev namespace always has a way to exit a block, enter a
+-- nested block, define abbrevs, and define an unabbreviated record.
+data FixedAbbrev
+  -- | Must be zero to guarantee termination for broken bitcode.
+  = END_BLOCK
+  | ENTER_SUBBLOCK
+  -- | DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
+  -- of a vbr5 for # operand infos.  Each operand info is emitted with a
+  -- single bit to indicate if it is a literal encoding.  If so, the value is
+  -- emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
+  -- by the info value as a vbr5 if needed.
+  | DEFINE_ABBREV
+  -- | UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+  -- a vbr6 for the # operands, followed by vbr6's for each operand.
+  | UNABBREV_RECORD
+  -- | This is not a code, this is a marker for the first abbrev assignment.
+  | FIRST_APPLICATION_ABBREV
+  deriving (Show, Enum)
@@ -0,0 +1,13 @@
+module Data.BitCode.IDs.StandardBlock where
+-- | StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+-- block, which contains metadata about other blocks in the file.
+data StandardBlock
+  -- | BLOCKINFO_BLOCK (0) is used to define metadata about blocks, for example,
+  -- standard abbrevs that should be available to all blocks of a specified
+  -- ID.
+  = BLOCKINFO
+  -- | Block IDs 1-7 are reserved for future expansion.
+  | RESERVED_1 | RESERVED_2 | RESERVED_3 | RESERVED_4 | RESERVED_5 | RESERVED_6 | RESERVED_7
+  -- | This is the marker for the first application block id (8).
+  | FIRST_APPLICATION_BLOCKID
+  deriving (Show, Enum)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+import Distribution.Simple`
	`2`	`+main = defaultMain`