{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DeriveFunctor, DeriveTraversable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecursiveDo #-} module PCF where import Bound import Data.Deriving import Data.Functor.Classes import Data.Map (Map) import qualified Data.Map as M import Data.Void import Control.Applicative import Control.Monad import Control.Monad.State import Control.Monad.Except import Data.String import Data.Tuple import Data.Bifunctor import Data.Foldable import Data.Char import Data.List import Text.Earley import Text.Earley.Mixfix import System.Environment import System.IO -- We're using Int everywhere... For this to be actually correct, it should -- probably be Integer, but that probably won't be a problem? type Id = String infixr 5 :-> data Ty = Nat | Ty :-> Ty deriving (Show, Eq) infixl 8 :@ data Exp fr = Var fr | Z | S (Exp fr) | Ifz (Exp fr) (Exp fr) (Scope () Exp fr) | Lam (Scope () Exp fr) | Exp fr :@ Exp fr | Fix (Scope () Exp fr) deriving (Functor, Foldable, Traversable) type CExp = Exp Void -- Closed Exp type CScope = Scope () Exp Void -- Closed except for one bound variable. embedNat :: Int -> Exp Id embedNat n | n < 0 = error "not a nat" embedNat 0 = Z embedNat n = S (embedNat (n - 1)) exp0 :: Exp (Var () a) exp0 = Var (B ()) lam :: Id -> Exp Id -> Exp Id lam v = Lam . abstract1 v ifz :: Exp Id -> Exp Id -> Id -> Exp Id -> Exp Id ifz c th v el = Ifz c th (abstract1 v el) fix' :: Id -> Exp Id -> Exp Id fix' v = Fix . abstract1 v makeBound ''Exp deriveShow1 ''Exp deriveEq1 ''Exp instance Show fr => Show (Exp fr) where showsPrec = showsPrec1 instance Eq fr => Eq (Exp fr) where (==) = eq1 instance IsString (Exp Id) where fromString = Var . fromString -- Statics -- A unification metavariable. type UVar = Int -- A type which may contain metavariables. We're only really gonna use it with -- v = UVar, but writing it like this lets us use it with things like -- 'substitute' from Bound, since we can give it a Monad instance. infixr 5 :->? data UTyF v = UVar v | UNat | UTyF v :->? UTyF v deriving (Show, Eq, Functor, Foldable, Traversable) type UTy = UTyF UVar -- A type equation. type TyEq = (UTy, UTy) instance Applicative UTyF where pure = return; (<*>) = ap instance Monad UTyF where return = UVar UVar v >>= k = k v UNat >>= _ = UNat dom :->? cod >>= k = (dom >>= k) :->? (cod >>= k) -- Monad for setting up type equations that lets us generate fresh variables. type FreshM = State (Int, UVar) freshId :: FreshM Id freshId = state (\(i, u) -> ("var" ++ show i, (i + 1, u))) freshU :: FreshM UTy freshU = state (\(i, u) -> (UVar u, (i, u + 1))) openFresh :: (Monad f, Foldable f) => Scope b f Id -> FreshM (Id, f Id) openFresh scope = (\new -> (new, instantiate1 (pure new) scope)) <$> freshId -- Property: For all Γ, e, τ, and all type substitutions θ such that Γθ and τθ -- are closed, we have Γθ |- e : τθ iff θ solves the system 'genEqs Γ e τ'. -- TODO: Carefully check that this property actually holds! -- Important corollary: If Γ and τ are already closed, then Γ |- e : τ iff -- there is *any* substitution solving 'genEqs Γ e τ'. genEqs :: Map Id UTy -> Exp Id -> UTy -> FreshM [TyEq] genEqs g e' t = case e' of Var v -> return $ case M.lookup v g of -- this is gross and arbitrary---find a better solution! Nothing -> [(UNat, UNat :->? UNat)] Just t' -> [(t, t')] Z -> return [(t, UNat)] S e -> ((t, UNat) :) <$> genEqs g e UNat Ifz c th elb -> do (new, el) <- openFresh elb let g' = M.insert new UNat g concat <$> sequence [genEqs g c UNat, genEqs g th t, genEqs g' el t] Lam b -> do dom <- freshU cod <- freshU (new, e) <- openFresh b let g' = M.insert new dom g ((t, dom :->? cod) :) <$> genEqs g' e cod ef :@ ex -> do xt <- freshU (++) <$> genEqs g ef (xt :->? t) <*> genEqs g ex xt Fix b -> do (new, e) <- openFresh b let g' = M.insert new t g genEqs g' e t -- Basic "most general unification" algo. -- Based on https://www.cmi.ac.in/~madhavan/ -- courses/pl2009/lecturenotes/lecture-notes/node113.html unify :: Map UVar UTy -> [TyEq] -> Maybe (Map UVar UTy) unify sub [] = Just sub unify sub (eq:eqs) = case eq of (UNat, UVar _) -> unify sub (swap eq:eqs) (_ :->? _, UVar _) -> unify sub (swap eq:eqs) (UNat, _ :->? _) -> Nothing (_ :->? _, UNat) -> Nothing (UNat, UNat) -> unify sub eqs (dom :->? cod, dom' :->? cod') -> unify sub ((dom, dom'):(cod, cod'):eqs) (UVar i, t) | UVar j <- t, i == j -> unify sub eqs | i `elem` t -> Nothing | otherwise -> let sub' = substitute i t <$> sub eqs' = bimap (substitute i t) (substitute i t) <$> eqs in unify (M.insert i t sub') eqs' -- Property: For all e, if 'inferTy e' = Just τ, then for all type -- substitutions θ such that τθ is closed, we have |- e : τθ. If -- 'inferTy e' = Nothing, then there is no substitution θ such that |- e : τθ. -- This property follows from the fact that 'unify' finds a unification -- whenever one exists. In the event that it cannot find one, then the system -- from 'genEqs' is unsolvable, and so no substitution works. If it *can* find -- one, μ, then τ = (UVar 0)μ, so for any θ making τ closed, we have -- τθ = (UVar 0)μθ = (UVar 0)(μθ). μθ will be a solution to the -- 'genEqs M.empty e (UVar 0)' system since μ is, and it will make (UVar 0) -- closed, so by the property of 'genEqs', we have |- e : (UVar 0)(μθ) and -- hence |- e : τθ. -- Note that since all τs can be made closed by *some* substitution, this means -- that e is well-typed iff 'inferTy e' is not Nothing. -- The fact that 'unify' finds a *most general* unification actually means that -- all types that can be assigned to e will be substitution instances of τ, but -- that's less important here. inferTy :: CExp -> Maybe UTy inferTy e = unify M.empty eqs >>= M.lookup 0 where eqs = evalState (genEqs M.empty (vacuous e) (UVar 0)) (0, 1) -- Dynamics -- We will use a strict dynamics for the successor and for function -- application. I suppose this is a bit at odds with the presence of fix, but -- it doesn't break the language. val :: CExp -> Bool val e' = case e' of Z -> True S e -> val e Lam _ -> True _ -> False -- Evaluation dynamics eval :: CExp -> Maybe CExp eval e' = case e' of Z -> Just Z S e -> S <$> eval e Ifz c th elb -> let c' = eval c in (do Z <- c'; eval th) <|> (do S c'' <- c'; eval (instantiate1 c'' elb)) Lam b -> Just (Lam b) ef :@ ex -> do Lam b <- eval ef ex' <- eval ex eval (instantiate1 ex' b) Fix b -> eval (instantiate1 (Fix b) b) _ -> Nothing -- Stack dynamics data StackFrame = SFrame | IfzFrame CExp CScope | ApLFrame CExp | ApRFrame CScope deriving (Show, Eq) type Stack = [StackFrame] infix 4 :|>, :<| data StackEvalState = Stack :|> CExp | Stack :<| CExp deriving (Show, Eq) final :: StackEvalState -> Bool final ([] :<| e) = val e final _ = False step :: StackEvalState -> Maybe StackEvalState step es = case es of k :|> Z -> Just (k :<| Z) k :|> S e -> Just (SFrame:k :|> e) SFrame:k :<| e -> Just (k :<| S e) k :|> Ifz c th elb -> Just (IfzFrame th elb:k :|> c) IfzFrame th _:k :<| Z -> Just (k :|> th) IfzFrame _ elb:k :<| S e -> Just (k :|> instantiate1 e elb) k :|> Lam b -> Just (k :<| Lam b) k :|> ef :@ ex -> Just (ApLFrame ex:k :|> ef) ApLFrame ex:k :<| Lam b -> Just (ApRFrame b:k :|> ex) ApRFrame ef:k :<| ex -> Just (k :|> instantiate1 ex ef) k :|> Fix b -> Just (k :|> instantiate1 (Fix b) b) _ -> Nothing evalStackTimed :: Int -> CExp -> (Int, Either StackEvalState CExp) evalStackTimed n' e' = go 0 n' ([] :|> e') where finish count ([] :<| e) = (count, Right e) finish count es = (count, Left es) go count 0 es = finish count es go count n es = case step es of Just es' -> go (count + 1) (if n > 0 then n - 1 else n) es' Nothing -> finish count es evalStack :: Int -> CExp -> Either StackEvalState CExp evalStack n' e' = snd (evalStackTimed n' e') -- Concrete syntax -- This function is only correct if it is called with a depth such that -- depth + 1 is larger than any free variables. pprintPrec' :: Int -> Int -> Exp Id -> String pprintPrec' depth prec e' = case e' of Var v -> v e | Just n <- intOf e -> show n Z -> error "case not ruled out?!" S e -> p 10 $ "s " ++ r 11 e Ifz c th elb -> let new = "var" ++ show depth el = instantiate1 (pure new) elb in p 5 $ "ifz " ++ r 0 c ++ " then " ++ r 0 th ++ " else " ++ new ++ "." ++ r' 5 el Lam b -> let new = "var" ++ show depth e = instantiate1 (pure new) b in p 0 $ "\\" ++ new ++ "." ++ r' 0 e ef :@ ex -> p 10 $ r 10 ef ++ " " ++ r 11 ex Fix b -> let new = "var" ++ show depth e = instantiate1 (pure new) b in p 0 $ "fix " ++ new ++ "." ++ r' 0 e where p prec' m | prec' >= prec = m | otherwise = "(" ++ m ++ ")" r = pprintPrec' depth r' = pprintPrec' (depth + 1) intOf :: Exp Id -> Maybe Int intOf Z = Just 0 intOf (S e) = succ <$> intOf e intOf _ = Nothing pprintPrec :: Int -> Exp Id -> String pprintPrec prec e = pprintPrec' startDepth prec e where Just startDepth = find (\n -> "var" ++ show n `notElem` e) [0..] pprint :: Exp Id -> String pprint = pprintPrec 0 pprintPrecUTy :: Int -> UTy -> String pprintPrecUTy prec t' = case t' of UVar i -> "?" ++ show i UNat -> "nat" dom :->? cod -> p 0 $ pprintPrecUTy 1 dom ++ " -> " ++ pprintPrecUTy 0 cod where p prec' m | prec' >= prec = m | otherwise = "(" ++ m ++ ")" pprintUTy :: UTy -> String pprintUTy = pprintPrecUTy 0 -- We'll add a syntactic sugar "let" construct which compiles down to an -- application of a lambda to a fix. data Token = LParenT | RParenT | DotT | VarT String | ST | IfzT | ThenT | ElseT | LamT | FixT | NatT Int | LetT | EqT | InT deriving (Show, Eq) tokenName :: Token -> String tokenName t = case t of LParenT -> q "("; RParenT -> q ")"; DotT -> q "." VarT v -> "variable " ++ q v; ST -> q "s" IfzT -> q "ifz"; ThenT -> q "then"; ElseT -> q "else" LamT -> "lambda"; FixT -> q "fix" NatT n -> "number " ++ show n LetT -> q "let"; EqT -> q "="; InT -> q "in" where q = show (<:>) :: Functor f => a -> f [a] -> f [a] a <:> fas = fmap (a:) fas -- A Left return gives the remainder of the string. tokenize :: String -> Either String [Token] tokenize [] = Right [] tokenize ('(':cs) = LParenT <:> tokenize cs tokenize (')':cs) = RParenT <:> tokenize cs tokenize ('.':cs) = DotT <:> tokenize cs tokenize ('\\':cs) = LamT <:> tokenize cs tokenize ('=':cs) = EqT <:> tokenize cs tokenize (c:cs) | isSpace c = tokenize cs tokenize (c:cs) | isAlpha c || c == '_' = tok <:> tokenize rest where validInVar c' = isAlphaNum c' || c' `elem` ("'_" :: String) (name, rest) = span validInVar (c:cs) tok = case name of "s" -> ST "ifz" -> IfzT; "then" -> ThenT; "else" -> ElseT "λ" -> LamT; "fix" -> FixT "let" -> LetT; "in" -> InT v -> VarT v tokenize (c:cs) | c `elem` digits = tok <:> tokenize rest where digits = ['0'..'9'] (lit, rest) = span (`elem` digits) (c:cs) tok = NatT (read lit) tokenize cs = Left cs -- TODO improve error reports expG :: Grammar r (Prod r String Token (Exp Id)) expG = mdo let toId (VarT v) = Just v toId _ = Nothing toNat (NatT n) = Just n toNat _ = Nothing atom <- rule $ Var <$> (terminal toId "variable") <|> embedNat <$> (terminal toNat "number") <|> (token LParenT "\"(\"") *> exp <* (token RParenT "\")\"") let isVar (VarT _) = True isVar _ = False tok t = Just (token t tokenName t) var = Just (satisfy isVar "variable") table = [ [([tok LetT, var, tok EqT, Nothing, tok InT, Nothing], NonAssoc, \[_, Just (VarT v), _, _, _, _] [e, b] -> lam v b :@ fix' v e), ([tok LamT, var, tok DotT, Nothing], NonAssoc, \[_, Just (VarT v), _, _] [b] -> lam v b), ([tok FixT, var, tok DotT, Nothing], NonAssoc, \[_, Just (VarT v), _, _] [b] -> fix' v b), ([tok IfzT, Nothing, tok ThenT, Nothing, tok ElseT, var, tok DotT, Nothing], NonAssoc, \[_, _, _, _, _, Just (VarT v), _, _] [c, th, el] -> ifz c th v el)], [([tok ST, Nothing], LeftAssoc, \_ [e] -> S e), -- The 'Just (pure DotT)' is an ugly hack to get Early to do left -- associativity properly---it will only do the right thing if -- there's a non-Nothing between the two Nothings, so we put one in -- that has no actual effect. The DotT is an arbitrary unused value. ([Nothing, Just (pure DotT), Nothing], LeftAssoc, \_ [ef, ex] -> ef :@ ex)] ] exp <- mixfixExpressionSeparate table atom return exp expParser :: Parser String [Token] (Exp Id) expParser = parser expG -- A Left return contains a human-readable error message. parseExp :: [Token] -> Either String (Exp Id) parseExp tokens = case fullParses expParser tokens of ([e], _) -> Right e ([], rep) -> Left $ let ex = case expected rep of [] -> "EOF" [t] -> t ts -> "one of " ++ intercalate ", " ts found = case unconsumed rep of [] -> "EOF" tok:_ -> tokenName tok in "Parse error at token " ++ show (position rep) ++ ": expected " ++ ex ++ "; found " ++ found -- Next clause should hopefully never happen... (ps, _) -> Left $ "Ambiguous parse: could be any of " ++ intercalate ", " (map (pprintPrec 11) ps) data Dyn = EvalDyn | StackDyn process :: Bool -> Dyn -> String -> ExceptT String IO () process check dyn code = void $ do let getMaybe err = maybe (throwError err) return getEither f = liftEither . first f let tokmsg remainder = "Could not tokenize, starting at " ++ show remainder tokens <- getEither tokmsg (tokenize code) e <- liftEither (parseExp tokens) let freemsg frees = "Term not closed: contains free variable" ++ case frees of [v] -> " " ++ show v _ -> "s " ++ intercalate ", " (map show frees) ce <- getMaybe (freemsg (toList e)) (closed e) when check $ do ty <- getMaybe "Not well-typed!" (inferTy ce) lift $ putStrLn ("Type: " ++ pprintUTy ty) let tsf = if check then "Type safety failed?! " else "" e' <- case dyn of EvalDyn -> getMaybe (tsf ++ "No evaluation") (eval ce) StackDyn -> let stuckmsg es = tsf ++ "Stuck at: " ++ show es in getEither stuckmsg (evalStack (-1) ce) lift $ putStrLn (pprint (vacuous e')) main :: IO () main = do args <- getArgs let dyn = if "-s" `elem` args then StackDyn else EvalDyn check = "-n" `notElem` args forever $ do putStr ">> " hFlush stdout code <- getLine res <- runExceptT (process check dyn code) case res of Left err -> putStrLn err _ -> return ()