BROKEN {-# LANGUAGE NoMonomorphismRestriction, FlexibleInstances, FunctionalDependencies, MultiParamTypeClasses, TypeSynonymInstances #-} module CAnalyze(CAtop(..), caTop) where import qualified Data.ByteString.Char8 import qualified Text.Show.Pretty -- cabal install pretty-show import Language.C -- cabal install language-c import Language.C.Syntax.AST import Language.C.Data.Position import Language.C.Pretty -- pretty-show import Language.C.Data.Ident import Control.Monad.Writer.Lazy parse filename text = ast where (Right ast) = parseC (Data.ByteString.Char8.pack text) $ initPos filename -- Clear the node info slot. strip = fmap $ \_ -> () ppAst = Text.Show.Pretty.ppShow . strip ast = putStrLn . show. ppAst process filename = do s <- readFile filename return $ parse filename s -- Fish out particular parts from a translation unit = whole C file. -- To abstract the Language.C AST a bit, we perform a generic -- recursion and terminate it by abstract analysis represented in a -- class. -- This factorization is largely arbitrary. It's kept as simple as -- possible while still supporting the different kinds of analysis we -- want to do. -- First level of unpacking gathers all top forms, by default ignoring -- them. This makes most sense with a Writer monad instance where the -- result list can be further processed by specific form analyzers -- (see below). -- m : report / result gathering monad (ex. a Writer) -- r : final report type class Monad m => CAtop m r | m -> r, r -> m where caAsm :: CStringLiteral ni -> m () caFunDef :: CFunctionDef ni -> m () caFunDecl :: CDeclaration ni -> m () caDataDecl :: CDeclaration ni -> m () caIgnore :: m () caRun :: m () -> r -- Defaults do nothing. caIgnore = return () caAsm _ = caIgnore caFunDef _ = caIgnore caFunDecl _ = caIgnore caDataDecl _ = caIgnore -- Generic recursion caTop (CTranslUnit ds _) = result where oops msg ast = error $ "\n" ++ msg ++ "\n" ++ (show $ strip ast) result = sequence_ $ map decl ds decl (CDeclExt d) = cdecl d decl (CFDefExt f) = cfundef f decl (CAsmExt sl _) = caAsm sl cfundef ast@(CFunDef cdeclspecs cdeclr cdelcs cstat _) = caFunDef ast cdecl ast@(CDecl [CTypeSpec (CVoidType _)] [(Just (CDeclr id [CFunDeclr _ _ _] _ _ _),_,_)] _) = caFunDecl ast cdecl (ast@(CDecl _ _ _)) = caDataDecl ast -- cdecl ast = oops "cdecl:" ast -- The second level of unpacking is that of recursive parts of the -- AST: data structures and code. This needs to be monadic beacuse C -- type names are global entities. -- C structures (products) and unions (sums) are the same except for -- struct|union annotation, so we treat them as such {- FIXME: first build a concrete data type, then later abstract it in a class. class Monad m => CAdata m t | t -> m, m -> t where caList :: CStructTag -> String -> [t] -> m t caPrim :: String -> m t -} -- This part of the AST is quite complex to handle completely. The -- result here is written using an error-driven approach to support -- the subset that's actually used. typeDef = td where oops msg ast = error $ "\n" ++ msg ++ "\n" ++ (show $ strip ast) -- typedef td (CDecl (CStorageSpec (CTypedef _) : t) [(Just (CDeclr id _ _ _ _), _, _)] _) = DefUser (ident id) (TypeRef (typeBase t) 0) {- td (CDecl [CTypeSpec (CEnumType (CEnum (Just "cyg_ISR_results") (Just [("CYG_ISR_HANDLED", Just (CConst (CIntConst 1 ()))), ("CYG_ISR_CALL_DSR", Just (CConst (CIntConst 2 ())))]) [] ()) ())] [] ()) -} -- enum td (CDecl [CTypeSpec (CEnumType (CEnum id (Just labels) [] _) _)] [] _) = DefEnum (ident id) [] -- FIXME -- (CDecl [CTypeSpec (CSUType (CStruct CStructTag (Just "cyg_interrupt") (Just [CDecl [CTypeSpec (CTypeDef "cyg_vector_t" ())] [(Just (CDeclr (Just "vector") [] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "cyg_priority_t" ())] [(Just (CDeclr (Just "priority") [] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "cyg_ISR_t" ())] [(Just (CDeclr (Just "isr") [CPtrDeclr [] ()] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "cyg_DSR_t" ())] [(Just (CDeclr (Just "dsr") [CPtrDeclr [] ()] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "CYG_ADDRWORD" ())] [(Just (CDeclr (Just "data") [] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "cyg_ucount32" ())] [(Just (CDeclr (Just "dsr_count") [] Nothing [] ()),Nothing,Nothing)] (),CDecl [CTypeSpec (CTypeDef "cyg_interrupt" ())] [(Just (CDeclr (Just "next_dsr") [CPtrDeclr [] ()] Nothing [] ()),Nothing,Nothing)] ()]) [] ()) ())] [] ()) td (CDecl [CTypeSpec (CSUType (CStruct CStructTag id (Just memDecls) [] _) _)] [] _) = DefStruct (ident id) [] -- not a td: Function declaration td (CDecl retType [(Just (CDeclr funName ((CFunDeclr (Right (argDecls, varArgs)) _ _) : ptrDecls) Nothing _ _),Nothing,Nothing)] _) = NoDef -- FIXME: move somewhere else, not a type -- not a td: Variable declaration td (CDecl typ [(Just (CDeclr id [] Nothing [] _),Nothing,Nothing)] _) = NoDef -- FIXME: move somewhere else, not a type -- not a td: forward struct declaration td (CDecl [CTypeSpec (CSUType (CStruct CStructTag id Nothing [] _) _)] [] _) = NoDef -- (CDecl [CTypeSpec (CEnumType (CEnum (Just "cyg_ISR_results") (Just [("CYG_ISR_HANDLED",Just (CConst (CIntConst 1 ()))),("CYG_ISR_CALL_DSR",Just (CConst (CIntConst 2 ())))]) [] ()) ())] [] ()) -- (CDecl [CStorageSpec (CExtern ()),CTypeQual (CConstQual ()),CTypeSpec (CTypeDef "Cyg_libm_ieee_double_shape_type" ())] [(Just (CDeclr (Just "__infinity") [CArrDeclr [] (CNoArrSize False) ()] Nothing [] ()),Nothing,Nothing)] ()) td (CDecl [CStorageSpec (CExtern _),CTypeQual (CConstQual _),CTypeSpec (CTypeDef id _)] [(Just (CDeclr (Just id1) [CArrDeclr [] (CNoArrSize False) _] Nothing [] _),Nothing,Nothing)] _) = NoDef -- TODO -- (CDecl [CTypeSpec (CTypeDef "CYG_ADDRWORD" ())] [(Just (CDeclr (Just "cyg_thread_get_data_ptr") [CFunDeclr (Right ([CDecl [CTypeSpec (CTypeDef "cyg_ucount32" ())] [(Just (CDeclr (Just "index") [] Nothing [] ()),Nothing,Nothing)] ()],False)) [] (),CPtrDeclr [] ()] Nothing [] ()),Nothing,Nothing)] ()) {- td (CDecl [CTypeSpec (CTypeDef "CYG_ADDRWORD" ())] [(Just (CDeclr (Just "cyg_thread_get_data_ptr") [CFunDeclr (Right ([CDecl [CTypeSpec (CTypeDef "cyg_ucount32" ())] [(Just (CDeclr (Just "index") [] Nothing [] ()),Nothing,Nothing)] ()],False)) [] (), CPtrDeclr [] ()] Nothing [] ()), Nothing,Nothing)] ()) -} {- td (CDecl [CStorageSpec (CExtern _), CTypeSpec (CVoidType _)] [(Just (CDeclr id [CFunDeclr (Right ([CDecl [CTypeSpec (CVoidType _)] [] _], False)) [] _] Nothing [] _),Nothing,Nothing)] _) = DefFun (ident id) -- FIXME td (CDecl [CStorageSpec (CExtern _), CTypeSpec (CVoidType _)] [(Just (CDeclr id [CFunDeclr (Right ([CDecl [CTypeSpec (CCharType _)] [(Just (CDeclr id' [] Nothing [] _),Nothing,Nothing)] _], False)) [] _] Nothing [] _),Nothing,Nothing)] _) = DefFun (ident id) -- FIXME -} td ast = oops "td:" ast comp CStructTag = Struct comp CUnionTag = Union ident (Just (Ident name _ _)) = name ident _ = "" -- Struct / Union declarations. -- struct ast = oops "struct:" ast -- A declaration can have one type and multiple variables/members decl (CDecl spec mems _) = map (\(Just (CDeclr name _ _ _ _),_,_) -> (typeBase spec, ident name)) mems -- decl ast = oops "decl:" ast -- Wow this sucks.. prefix s t = Prim (s : ss) where Prim ss = typeBase t typeBase ((CTypeSpec (CSignedType _)) : t) = prefix "signed" t typeBase ((CTypeSpec (CUnsigType _)) : t) = prefix "unsigned" t typeBase ((CTypeSpec (CShortType _)) : t) = prefix "short" t typeBase ((CTypeSpec (CLongType _)) : t) = prefix "long" t typeBase [] = Prim [] -- terminates recursion -- Skip unknown qualifiers, go only for the last one in the list. typeBase [(CTypeSpec t)] = typeSpec t typeBase (_ : ts) = typeBase ts -- just drop what we don't know -- typeBase ast = oops "typeBase:" ast typeSpec (CTypeDef id _) = User (ident $ Just id) typeSpec (CCharType _) = Prim ["char"] typeSpec (CVoidType _) = Prim ["void"] typeSpec (CIntType _) = Prim ["int"] typeSpec (CFloatType _) = Prim ["float"] typeSpec (CDoubleType _) = Prim ["double"] typeSpec (CLongType _) = Prim ["long"] typeSpec (CUnsigType _) = Prim ["unsigned"] typeSpec (CSUType (CStruct tag id _ _ _) _) = (comp tag) (ident id) typeSpec (CEnumType (CEnum id _ _ _) _) = Enum (ident id) typeSpec ast = oops "typeSpec:" ast -- The 4 namespaces: Prim/User Enum Struct Union data TypeBase = Prim [String] | User String | Enum String | Struct String | Union String deriving (Eq, Show) data TypeRef = TypeRef TypeBase Int deriving (Eq, Show) data TypeDef = DefUser String TypeRef | DefStruct String [TypeRef] | DefUnion String [TypeRef] | DefEnum String [(String, Int)] | NoDef deriving (Eq, Show) instance CAtop (Writer [TypeDef]) [TypeDef] where caRun = execWriter -- caStruct (CStruct CStructTag (Just (Ident name _ _)) _ _ _) = tell [Struct name] caDataDecl ast = tell [typeDef ast] -- TEST file = "/tmp/test.c" -- This assumes the analysis monad is a Writer. info = do stx <- process file return $ caRun $ caTop stx pp = putStr . Text.Show.Pretty.ppShow struct = info :: IO [TypeDef] ppm m = do { v <- m ; pp v }