diff --git a/TODO b/TODO index e676d1d..a02c6c6 100644 --- a/TODO +++ b/TODO @@ -3,3 +3,16 @@ - Put symbol table in the reader monad and it to the stack or use the state monad for everything. - Add errors handling to the monad stack. + +# ELF generation +- Define SHF_ constants. +- Don't ignore relocations where the symbol is not defined in the symbol table. + Add it as an external symbol to the symbol table. +- Since every function adds a section header use a state monad + in the generator and put the headers into the state to reduce the number of + returned values in the tuples. +- Relocation section header relates to another section (e.g. .rel.text). The + index of that section should be passed together with collected relocations. +- symstrtab creates 3 section headers and does some math to calculate the + offsets and names. Introducing the state monad can help to get rid of magic + numbers. diff --git a/lib/Language/Elna/Architecture/RiscV.hs b/lib/Language/Elna/Architecture/RiscV.hs index f4c3887..9964911 100644 --- a/lib/Language/Elna/Architecture/RiscV.hs +++ b/lib/Language/Elna/Architecture/RiscV.hs @@ -1,5 +1,6 @@ module Language.Elna.Architecture.RiscV ( BaseOpcode(..) + , RelocationType(..) , Funct3(..) , Funct7(..) , Funct12(..) @@ -15,6 +16,7 @@ module Language.Elna.Architecture.RiscV import qualified Data.ByteString.Builder as ByteString.Builder import Data.Bits (Bits(..)) +import Data.Text (Text) import Data.Word (Word8, Word32) data XRegister @@ -137,8 +139,18 @@ data Type | U XRegister Word32 | J XRegister Word32 | Type XRegister Funct3 XRegister Funct12 -- Privileged. + deriving Eq -data Instruction = Instruction BaseOpcode Type +data RelocationType + = Lower12I XRegister Funct3 XRegister Text + | Lower12S Text Funct3 XRegister XRegister + | Higher20 XRegister Text -- Type U. + deriving Eq + +data Instruction + = Instruction BaseOpcode Type + | RelocatableInstruction BaseOpcode RelocationType + deriving Eq xRegister :: XRegister -> Word8 xRegister Zero = 0 @@ -285,8 +297,17 @@ type' (Type rd funct3' rs1 funct12') .|. (fromIntegral (xRegister rs1) `shiftL` 15) .|. (fromIntegral (funct12 funct12') `shiftL` 20); +relocationType :: RelocationType -> Word32 +relocationType (Lower12I rd funct3' rs1 _) = type' $ I rd funct3' rs1 0 +relocationType (Lower12S _ funct3' rs1 rs2) = type' $ S 0 funct3' rs1 rs2 +relocationType (Higher20 rd _) = type' $ U rd 0 + instruction :: Instruction -> ByteString.Builder.Builder -instruction (Instruction base instructionType) - = ByteString.Builder.word32LE - $ fromIntegral (baseOpcode base) - .|. type' instructionType +instruction = \case + (Instruction base instructionType) -> go base $ type' instructionType + (RelocatableInstruction base instructionType) -> go base $ relocationType instructionType + where + go base instructionType + = ByteString.Builder.word32LE + $ fromIntegral (baseOpcode base) + .|. instructionType diff --git a/lib/Language/Elna/PrinterWriter.hs b/lib/Language/Elna/PrinterWriter.hs index 38c3549..6fc10e4 100644 --- a/lib/Language/Elna/PrinterWriter.hs +++ b/lib/Language/Elna/PrinterWriter.hs @@ -4,6 +4,7 @@ module Language.Elna.PrinterWriter , riscv32Header ) where +import Data.Word (Word8) import Data.ByteString (ByteString) import qualified Data.ByteString as ByteString import qualified Data.ByteString.Builder as ByteString.Builder @@ -12,6 +13,7 @@ import Data.Vector (Vector) import qualified Data.Vector as Vector import Language.Elna.Object.Elf ( ByteOrder(..) + , Elf32_Addr , Elf32_Ehdr(..) , Elf32_Half , Elf32_Sym(..) @@ -25,15 +27,20 @@ import Language.Elna.Object.Elf , ElfSectionType(..) , ElfSymbolBinding(..) , ElfSymbolType(..) + , Elf32_Rel (..) , elf32Sym , elfHeaderSize , elfSectionsSize , stInfo + , rInfo + , elf32Rel ) import System.IO (Handle) import qualified Language.Elna.Architecture.RiscV as RiscV +import qualified Data.Text.Encoding as Text.Encoding data ElfHeaderResult a = ElfHeaderResult ByteString (Vector a) +data UnresolvedRelocation = UnresolvedRelocation ByteString Elf32_Addr Word8 riscv32Elf :: Vector RiscV.Instruction -> Handle -> IO (Vector Elf32_Shdr) riscv32Elf code objectHandle = @@ -50,6 +57,7 @@ riscv32Elf code objectHandle = , sh_addr = 0 } in text (ElfHeaderResult "\0" $ Vector.singleton zeroHeader) + >>= symstrtab >>= shstrtab >>= finalize where @@ -71,12 +79,59 @@ riscv32Elf code objectHandle = ByteString.hPut objectHandle stringTable pure $ ElfHeaderResult stringTable $ Vector.snoc headers nextHeader - strtab stringTable (ElfHeaderResult names headers) = do - let newHeader = Elf32_Shdr + takeStringZ stringTable Elf32_Sym{ st_name } + = ByteString.takeWhile (/= 0) + $ ByteString.drop (fromIntegral st_name) stringTable + resolveRelocation (ElfHeaderResult stringTable entries) unresolvedRelocation + | UnresolvedRelocation symbolName offset type' <- unresolvedRelocation + , Just entry <- Vector.findIndex ((== symbolName) . takeStringZ stringTable) entries = + Right $ Elf32_Rel + { r_offset = offset + , r_info = rInfo (fromIntegral entry) type' + } + | otherwise = Left unresolvedRelocation + symstrtab (symbols@(ElfHeaderResult stringTable entries), ElfHeaderResult names headers, relocations) = do + let encodedSymbols = LazyByteString.toStrict + $ ByteString.Builder.toLazyByteString + $ foldMap (elf32Sym LSB) entries + namesLength = fromIntegral $ ByteString.length names + symHeader = Elf32_Shdr + { sh_type = SHT_SYMTAB + , sh_size = fromIntegral $ ByteString.length encodedSymbols + , sh_offset = elfSectionsSize headers + , sh_name = namesLength + , sh_link = fromIntegral $ Vector.length headers + 2 + , sh_info = 1 + , sh_flags = 0 + , sh_entsize = 16 + , sh_addralign = 0 + , sh_addr = 0 + } + ByteString.hPut objectHandle encodedSymbols + let headers1 = Vector.snoc headers symHeader + let y = resolveRelocation symbols <$> relocations + encodedRelocations = LazyByteString.toStrict + $ ByteString.Builder.toLazyByteString + $ Vector.foldMap (either (const mempty) (elf32Rel LSB)) y + relHeader = Elf32_Shdr + { sh_type = SHT_REL + , sh_size = fromIntegral $ ByteString.length encodedRelocations + , sh_offset = elfSectionsSize headers1 + , sh_name = namesLength + 8 + , sh_link = fromIntegral $ Vector.length headers + , sh_info = 1 + , sh_flags = 0x40 -- SHF_INFO_LINK + , sh_entsize = 8 + , sh_addralign = 0 + , sh_addr = 0 + } + ByteString.hPut objectHandle encodedRelocations + let headers2 = Vector.snoc headers1 relHeader + let strHeader = Elf32_Shdr { sh_type = SHT_STRTAB , sh_size = fromIntegral $ ByteString.length stringTable - , sh_offset = elfSectionsSize headers - , sh_name = fromIntegral $ ByteString.length names + , sh_offset = elfSectionsSize headers2 + , sh_name = namesLength + 18 , sh_link = 0 , sh_info = 0 , sh_flags = 0 @@ -85,44 +140,25 @@ riscv32Elf code objectHandle = , sh_addr = 0 } ByteString.hPut objectHandle stringTable - pure $ ElfHeaderResult (names <> ".strtab\0") - $ Vector.snoc headers newHeader - symtab strtabIndex entries (ElfHeaderResult names headers) = do - let encoded = LazyByteString.toStrict - $ ByteString.Builder.toLazyByteString - $ foldMap (elf32Sym LSB) entries - newHeader = Elf32_Shdr - { sh_type = SHT_SYMTAB - , sh_size = fromIntegral $ ByteString.length encoded - , sh_offset = elfSectionsSize headers - , sh_name = fromIntegral $ ByteString.length names - , sh_link = strtabIndex - , sh_info = 1 - , sh_flags = 0 - , sh_entsize = 16 - , sh_addralign = 0 - , sh_addr = 0 - } - ByteString.hPut objectHandle encoded - pure $ ElfHeaderResult (names <> ".symtab\0") - $ Vector.snoc headers newHeader + pure $ ElfHeaderResult (names <> ".symtab\0.rel.text\0.strtab\0") + $ Vector.snoc headers2 strHeader text (ElfHeaderResult names headers) = do let textTabIndex = fromIntegral $ Vector.length headers - strtabIndex = fromIntegral $ textTabIndex + 2 - ElfHeaderResult stringTable entries <- symbolEntry textTabIndex code - $ ElfHeaderResult "\0" - $ Vector.singleton - $ Elf32_Sym - { st_value = 0 - , st_size = 0 - , st_shndx = 0 - , st_other = 0 - , st_name = 0 - , st_info = 0 - } + initialHeaders = ElfHeaderResult "\0" + $ Vector.singleton + $ Elf32_Sym + { st_value = 0 + , st_size = 0 + , st_shndx = 0 + , st_other = 0 + , st_name = 0 + , st_info = 0 + } + (symbolResult, size, relocations) <- symbolEntry textTabIndex code + (initialHeaders, 0, mempty) let newHeader = Elf32_Shdr { sh_type = SHT_PROGBITS - , sh_size = fromIntegral $ foldr ((+) . st_size) 0 entries + , sh_size = size , sh_offset = elfSectionsSize headers , sh_name = fromIntegral $ ByteString.length names , sh_link = 0 @@ -134,24 +170,47 @@ riscv32Elf code objectHandle = } newResult = ElfHeaderResult (names <> ".text\0") $ Vector.snoc headers newHeader - symtab strtabIndex entries newResult - >>= strtab stringTable - symbolEntry :: Elf32_Half -> Vector RiscV.Instruction -> ElfHeaderResult Elf32_Sym -> IO (ElfHeaderResult Elf32_Sym) - symbolEntry shndx instructions (ElfHeaderResult names entries) = do - let encoded = LazyByteString.toStrict - $ ByteString.Builder.toLazyByteString - $ foldMap RiscV.instruction instructions + pure (symbolResult, newResult, relocations) + symbolEntry + :: Elf32_Half + -> Vector RiscV.Instruction + -> (ElfHeaderResult Elf32_Sym, Elf32_Addr, Vector UnresolvedRelocation) + -> IO (ElfHeaderResult Elf32_Sym, Elf32_Addr, Vector UnresolvedRelocation) + symbolEntry shndx instructions (ElfHeaderResult names entries, offset, relocations) = do + let (encoded, size, updatedRelocations) = + Vector.foldl' encodeInstruction (mempty, offset, relocations) instructions newEntry = Elf32_Sym - { st_value = 0 - , st_size = fromIntegral $ ByteString.length encoded + { st_value = offset + , st_size = fromIntegral size , st_shndx = shndx , st_other = 0 , st_name = fromIntegral $ ByteString.length names , st_info = stInfo STB_GLOBAL STT_FUNC } - ByteString.hPut objectHandle encoded - pure $ ElfHeaderResult (names <> "_start\0") - $ Vector.snoc entries newEntry + ByteString.hPut objectHandle $ LazyByteString.toStrict encoded + let newResult = ElfHeaderResult (names <> "_start\0") + $ Vector.snoc entries newEntry + pure (newResult, size, updatedRelocations) + encodeInstruction (instructions, offset, relocations) instruction = + let unresolvedRelocation = case instruction of + RiscV.RelocatableInstruction _ instructionType + | RiscV.Higher20 _ symbolName <- instructionType + -> Just + $ UnresolvedRelocation (Text.Encoding.encodeUtf8 symbolName) offset 26 + | RiscV.Lower12I _ _ _ symbolName <- instructionType + -> Just + $ UnresolvedRelocation (Text.Encoding.encodeUtf8 symbolName) offset 27 + | RiscV.Lower12S symbolName _ _ _ <- instructionType + -> Just + $ UnresolvedRelocation (Text.Encoding.encodeUtf8 symbolName) offset 28 + RiscV.Instruction _ _ -> Nothing + encoded = ByteString.Builder.toLazyByteString + $ RiscV.instruction instruction + in + ( instructions <> encoded + , offset + fromIntegral (LazyByteString.length encoded) + , maybe relocations (Vector.snoc relocations) unresolvedRelocation + ) riscv32Header :: Elf32_Ehdr riscv32Header = Elf32_Ehdr