{-# LANGUAGE NoMonomorphismRestriction, FlexibleInstances #-} {- Pretty printer for a subset of the Term AST into C, using Language.C Note that C syntax is quite complex and to be honest, I don't understand all the details of how it's encoded in Language.C My approach has been to use the c2ast function to generate the Language.C.Syntax.AST structure frome a simple C example, and use this to figure out in what order to assemble the different AST constructors. The way the code below is factored is mostly ad-hoc using "pragmatic refactoring" (== I see a pattern but I have no idea what name to stick to it. ) -} module PrettyC ( termFunction, termTopdef, code ) where import qualified Data.ByteString.Char8 -- cabal install pretty-show import qualified Text.Show.Pretty -- cabal install language-c import Language.C import Language.C.Syntax.AST import Language.C.Data.Position import Language.C.Pretty -- pretty-show import Language.C.Data.Ident import Term import Type import qualified Control.Monad.Identity import Control.Monad.Writer import Data.List import qualified Data.Map import Control.Monad.State {- 1. HIGH LEVEL Term -> AST compilation -} {- Compilation needs 3 levels: * Toplevel function definitions. These use Lambda but are compiled differently than internal function definitions: C functions vs. C labels. * Nested expressions to C statement lists (CCompoundBlockItem a) which is a list of variable declarations terminated by a function call or return statement. * Expressions as they occur in variable initializers and function calls / built in ops. -} -- Dictionary for toplevel C forms. type Dict = Data.Map.Map TypeName (CExternalDeclaration ()) type MDict = State Dict instance Ord Type where compare t1 t2 = compare (typeHash t1) (typeHash t2) -- Save only keeps last written. save k v = do dict <- get put $ Data.Map.insert k v dict -- Save struct definitions of AStruct tags that appear in function -- arguments, so canonical struct names can be used in the function -- bodies. typeDef var@(Var (Type (ATree sub) _) _) = save baseTyp decl where baseTyp = Type (ATree sub) 0 decl = CDeclExt (CDecl [(cType soNamedSub baseTyp)] [] ()) typeDef var = return () termTopdef form = CTranslUnit decls () where decls = types ++ funs types = Data.Map.elems typesMap funs = Data.Map.elems funsMap (typesMap, funsMap) = Data.Map.partitionWithKey isStruct unsorted isStruct (Type (ATree _) _) _ = True isStruct _ _ = False ((), unsorted) = runState (tl form) Data.Map.empty dictSize = do dict <- get return $ Data.Map.size dict tl td@(Topdef funVar (Lambda var body)) = do vars <- return $ varList var sequence (map typeDef vars) -- Dict size can provide a unique identifier for each toplevel -- function definition. n <- dictSize stmt <- termStatement body save (Type (AType n) 0) -- needs unique type tag (CFDefExt (cFun funVar (map (cVarDecl soNamedNoSub Nothing) vars) (cBlock stmt))) tl (Begin t1 t2) = do {tl t1; tl t2} tl Void = return () -- Why is this necessary? tl form = error $ "termTopdef: " ++ show form -- Convenient shortcut for Lambda Term. termFunction var term = termTopdef (Topdef var term) termStatement = stm where funArgName funName n = funName ++ "_" ++ show n funArgVar funName n = Var undefined (funArgName funName n) -- Compile a function expression to 2 lists of statements: -- variable definitions and function body. funCompile (Var _ funName, (Lambda privVar funBody)) = let privVars = varList privVar -- Variables need 2 names: call sites don't know internal so -- those will use position encoding based on function name. -- This indirection also solves the "swapped asignment" problem. pubVars = map pubVar $ zip privVars [0..] pubVar ((Var (Type t o) varName),n) = Var (Type t o) (funArgName funName n) pubDefs = map cVar pubVars privDefs = zipWith privDef privVars pubVars privDef priv pub = cVarInitOne priv (ex $ Ref $ pub) in do body <- stm funBody funDef <- return $ [cNamedBlock funName $ concat [privDefs, body]] return $ (pubDefs, funDef) -- Tuple initialization. Convert Term data structure to initializer -- expression. initTree t = tree t where list ts = cInitList (map tree ts) tree (Atom t) = cInitList [CInitExpr (ex t) ()] tree (Cons t1 t2) = list [t1, t2] tree Nil = list [] ex = termExpression -- Non-monad (non-recursive) statements st (Ret e) = [cRet $ ex e] st e@(Set _ _ _) = [cExprStat (ex e)] -- Function application. These are tail calls only, and are -- implemented through argument variable assignment followed by -- goto. -- Application: assign args + goto. st (App (Ref (Var _ funName)) aTerm) = concat [assignArgs, [goto]] where args = termList aTerm goto = cGoto funName assignArgs = f 0 args where f _ [] = [] f n (a:as) = (assignArg n a) : (f (n + 1) as) assignArg n src = cAssign (ex $ Ref $ funArgVar funName n) (ex src) -- Non-handled cases go here. st e = error $ "statement: " ++ show e -- Monadic statement generation. This is for parts that need to -- propagate the toplevel dictionary, i.e. for type declarations. stm (If ec ty tn) = do yes <- stm ty no <- stm tn return $ [cIf (ex ec) yes no] -- Binding forms: composite and atomic. The 'init' argument is a -- Cons/Atom data structure. stm (Let var (Pack tree) body) = do typeDef var rest <- stm body return $ cVarInit var (initTree tree) : rest stm (Let var init body) = do rest <- stm body return $ cVarInitOne var (ex init) : rest stm (Begin s1 s2) = do sts1 <- stm s1 sts2 <- stm s2 return $ sts1 ++ sts2 -- Structure unpack. Note that vartree and termtree need to be -- structurally matched. This is guaranteed by the Unpack compiler -- in Code.hs stm (Unpack vartree termtree body) = let -- Flatten vars = varList vartree terms = termList termtree -- Create variable bindings bindings = zipWith bind vars terms bind var term = cVarInitOne var (ex term) in do rest <- stm body return $ bindings ++ rest -- Nested recursive function definition. This creates a block with: -- * function argument variables -- * nested body's block -- * labeled block for each function body. stm (LetRec defs nestBody) = do -- Compile args and bodies for each function separately. abs <- sequence $ map funCompile defs funArgs <- return $ concat $ map fst abs funBodies <- return $ concat $ map snd abs body <- stm nestBody return $ [cBlockStmt $ concat [funArgs, [cBlockStmt $ body], funBodies]] where stm s = return $ st s termExpression = ex where -- Array and tuple indexing. index a i = CIndex (ex a) (ex i) () tuple v i = cMem (ex v) i -- Generic binary operations binop (Op _ _ [a,b]) op = CBinary op (ex a) (ex b) () -- Expressions ex (Lit t v) = CConst $ cnst t v -- Deconstruction - struct dereference ex (Ref (Var _ varName)) = cRef varName ex (Car t) = tuple t 0 ex (Cdr t) = tuple t 1 ex (Unatom t) = tuple t 0 ex e@(Op _ "add" _) = binop e CAddOp ex e@(Op _ "lt" _) = binop e CLeOp ex (Op _ opName args) = CCall (cRef opName) (map ex args) () ex (Get a i) = index a i ex (Set a i e) = cExAssign (index a i) (ex e) ex e = error $ "expression: " ++ show e {- 2. LOW LEVEL AST TOOLS -} -- These can be used to fill in dummy Pos and NodeInfo nodes. dummyPos = initPos "main.c" dummyNodeInfo = OnlyPos dummyPos $ (dummyPos, 0) -- The AST is quite verbose (C really isn't that simple!) so provide -- some simplified constructors that fill in unused features. ident name = Ident name 0 dummyNodeInfo -- First param of cType' and cTupleTupe' is a configuration option -- that determines in what detail structured types should be rendered: data StructOpts = StructOpts { soNamed :: Bool, -- is this struct named soSub :: Bool -- included substructure or just name } -- Name struct and define substructure. soNamedSub = StructOpts True True soNamedNoSub = StructOpts True False cType _ (Type AVoid 0) = CTypeSpec (CVoidType ()) cType _ (Type AInt 0) = CTypeSpec (CIntType ()) cType _ (Type ABool 0) = CTypeSpec (CBoolType ()) cType _ (Type AFloat 0) = CTypeSpec (CFloatType ()) -- Atoms and Cons cells are implemented in terms of generic tuples. -- We need singleton structs to make overall structuring consistent, -- distinguishing atomic types from singleton lists. cType opts (Type (ATree (ANil)) 0) = cTuple opts [] cType opts (Type (ATree (AAtom t)) 0) = cTuple opts [t] cType opts (Type (ATree (ACons t0 t1)) 0) = cTuple opts [t0, t1] -- Generic struct cTuple opts@(StructOpts named sub) ts = typ where -- When recursing, always turn off naming for substructure. optsDown = StructOpts False sub typ = cTupleType opts vs vsub where vs = zipWith varMember ts [0..] vsub = case soSub opts of False -> Nothing True -> mems vs mems vars = Just $ map (\var -> cVarDecl optsDown Nothing var) vars cVarDecl opts init (Var (Type typeName order) varName) = CDecl (concat [qual init, [cType opts (Type typeName 0)]]) -- base type only! [(Just (CDeclr (Just $ ident varName) (map (\_ -> CPtrDeclr [] ()) [1..order]) -- type order goes here Nothing [] ()), init, Nothing)] () where -- Non-initialized variables are const. qual Nothing = [] qual _ = [CTypeQual (CConstQual ())] cStructType name init = CTypeSpec (CSUType (CStruct CStructTag name -- (Just $ ident name) init [] -- CAttribute a ()) ()) cStructMembers opts vars = Just $ map (\var -> cVarDecl opts Nothing var) vars -- A tuple declaration is a struct with a canonical name. This can -- lift the burden of having to explicitly define structure types when -- the Haskell side just uses tuples. cTupleType opts vars mems = cStructType canonicalName mems where -- Naming needs to be suppressed when defining recursive structures. canonicalName = case soNamed opts of True -> Just $ ident $ "tuple_" ++ (concat $ map shortTag vars) False -> Nothing -- Provide a unique struct name based on legal C characters. shortTag (Var typ _) = st typ where -- ot order = concat $ map (\_ -> "x") order st (Type b o) = bt b ++ ot o ot order = show $ order bt AFloat = "f" bt AInt = "i" bt ABool = "b" bt (ATree t) = tt t tt ANil = "" tt (AAtom t1) = st t1 tt (ACons t1 t2) = "_L_" ++ (st t1) ++ "_" ++ (st t2) ++ "_R_" cTupleDecl opts vars = cTupleType opts vars (cStructMembers opts vars) -- Constants cnst (Type AInt _) i = CIntConst ci () where (Right ci) = readCInteger DecRepr i cnst (Type AFloat _) f = CFloatConst cf () where cf = readCFloat f -- CCompoundBlockItem variable declaration without init cVar var = CBlockDecl $ (cVarDecl soNamedNoSub Nothing var) -- .. with AST init expressions cVarInit var init = CBlockDecl $ (cVarDecl soNamedNoSub (Just $ init) var) -- .. with 1 init expression. cVarInitOne var expr = cVarInit var $ CInitExpr expr () -- Represent list of CInitExpr | CInitList as CInitList cInitList is = CInitList (map (\i -> ([], i)) is) () cFun (Var funRetType funName) args body = CFunDef [cType soNamedNoSub funRetType] (CDeclr (Just $ ident funName) [CFunDeclr (Right (args, False)) [] ()] Nothing [] ()) [] body () cBlock ss = CCompound [] ss () cBlockStmt sts = CBlockStmt $ cBlock sts cRet t = CBlockStmt $ CReturn (Just t) () cGoto labelName = CBlockStmt $ CGoto (ident labelName) () cNamedBlock name sts = CBlockStmt $ cLabel name (cBlock sts) cExprStat ex = CBlockStmt $ CExpr (Just ex) () cExAssign dst src = CAssign CAssignOp dst src () cAssign dst src = cExprStat $ cExAssign dst src cLabel name statement = CLabel (ident name) statement [] () cIf ec sy sn = CBlockStmt $ CIf ec (cBlock sy) (Just $ cBlock sn) () -- CUnary CUnaryOp (CExpression a) a cDeref ex = CUnary CIndOp ex () -- Variable reference expression. cRef refName = CVar (ident refName) () -- Numeric offset structure indexing. cMem cRef n = CMember cRef (cMemIdent n) False () memName n = "m" ++ show n varMember t n = Var t $ memName n cMemIdent n = ident $ memName n {- 3. INTERACTIVE TOOLS -} -- Pretty-printing of AST rep. This strips out the NodeInfo because -- it's not supported by pretty-show (not a derived Show instance) and -- would be too verbose anyway. ppAst = Text.Show.Pretty.ppShow . (fmap $ \_ -> ()) ast = putStrLn . ppAst -- Printing C code we need a NodeInfo instance, so we make one up. code = pretty . (fmap $ \x -> dummyNodeInfo) -- To investigate the Language.C AST structure, it's simpler to just -- parse some C code and print out the AST than to read the docs. parse c = ast where (Right ast) = parseC (Data.ByteString.Char8.pack c) $ dummyPos c2ast = ast . parse -- a1 = parse "int fun(void *state, void **ins, void **outs, int n) { }" -- ast a1 -- code a1 -- c2ast "int fun(void *state, void **ins, void **outs, int n) { }" -- c2ast "fun() { return 0; }" -- code $ decl 3 "foo" -- a2 = parse "struct foo {int a; float b;};" {- tv vn = Var (Type AFloat 0) vn tvs ns = map (\n -> tv ("mem" ++ show n)) ns t8 = cStructType "foo" (cStructMembers $ tvs [1..10]) t9 = cTupleDecl vars where vars = tvs [1..10] t10 = cTupleDecl [ Var (Type ABool 0) "mem0", Var (Type AInt 1) "mem1", Var (Type AFloat 2) "mem2" ] -}