Skip to content

Commit

Permalink
refactor format parser
Browse files Browse the repository at this point in the history
  • Loading branch information
glguy committed Dec 2, 2024
1 parent af1a213 commit 24a8591
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 221 deletions.
3 changes: 3 additions & 0 deletions common/advent.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,12 @@ library
Advent.Tokenize

other-modules:
Advent.Format.Enum
Advent.Format.Lexer
Advent.Format.Parser
Advent.Format.Show
Advent.Format.Types
Advent.Format.Utils

build-tool-depends: alex:alex, happy:happy

Expand Down
40 changes: 17 additions & 23 deletions common/src/Advent/Format.hs
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,16 @@ Structures:
-}
module Advent.Format (format) where

import Advent.Prelude (countBy)
import Advent.Input (getRawInput)
import Advent.Format.Enum (enumCases)
import Advent.Format.Lexer (alexScanTokens, AlexPosn(..))
import Advent.Format.Parser (parseFormat, ParseError(..))
import Advent.Format.Types
import Advent.Format.Show (showFormat, showToken)
import Advent.Format.Types (Format(..))
import Advent.Format.Utils
import Advent.Input (getRawInput)
import Advent.Prelude (countBy)
import Control.Applicative ((<|>), some)
import Control.Monad ((<=<), void)
import Control.Monad ((<=<), when, void)
import Data.Char (isDigit, isSpace, isUpper, isAsciiLower, isAsciiUpper, isHexDigit)
import Data.Maybe (listToMaybe)
import Language.Haskell.TH
Expand All @@ -73,7 +76,7 @@ import Text.ParserCombinators.ReadP
parse :: String -> Q Format
parse txt =
case parseFormat (alexScanTokens txt) of
Right fmt -> pure fmt
Right fmt -> pure (simplify fmt)
Left (Unclosed p) -> failAt p "Unclosed parenthesis"
Left (UnexpectedToken p t) -> failAt p ("Unexpected token " ++ showToken t)
Left UnexpectedEOF -> fail "Format parse error, unexpected end-of-input"
Expand Down Expand Up @@ -176,7 +179,7 @@ toReadP s =
[| void (some $(toReadP x)) |]

SepBy x y ->
do whenM (andM (acceptsEmpty x) (acceptsEmpty y)) (fail ("Both arguments to & accept ε: " ++ showFormat 0 s ""))
do whenM (allM acceptsEmpty [x, y]) (fail ("Both arguments to & accept ε: " ++ showFormat 0 s ""))
if interesting x then
[| sepBy $(toReadP x) $(toReadP y) |]
else
Expand All @@ -195,8 +198,8 @@ toReadP s =

Group x -> toReadP x

_ ->
case [(interesting x, toReadP x) | x <- follows s []] of
Follow xs ->
case [(interesting x, toReadP x) | x <- xs] of
[] -> [| pure () |]
xxs@((ix,x):xs)
| n == 0 -> foldl apply0 x xs
Expand Down Expand Up @@ -249,7 +252,7 @@ toType fmt =
[t| () |]

SepBy x y ->
do whenM (andM (acceptsEmpty x) (acceptsEmpty y)) (fail ("Both arguments to & accept ε: " ++ showFormat 0 fmt ""))
do whenM (allM acceptsEmpty [x, y]) (fail ("Both arguments to & accept ε: " ++ showFormat 0 fmt ""))
if interesting x then
[t| [$(toType x)] |]
else
Expand All @@ -268,26 +271,17 @@ toType fmt =

Group x -> toType x

_ ->
case [toType x | x <- follows fmt [], interesting x] of
Follow xs ->
case [toType x | x <- xs, interesting x] of
[] -> [t| () |]
[t] -> t
ts -> foldl appT (tupleT (length ts)) ts

-- | Prefix a list of format strings with a format string.
-- If the given list has all the topmost 'Follow' constructors
-- removed, the output list will as well. Any consecutive literals found
-- while flattening will be combined.
follows :: Format -> [Format] -> [Format]
follows (Follow x y) zs = follows x (follows y zs)
follows Empty zs = zs
follows (Literal x) (Literal y : zs) = follows (Literal (x ++ y)) zs
follows x zs = x : zs

enumParser :: String -> ExpQ
enumParser nameStr =
do entries <- enumCases nameStr

let parsers = [[| $(conE name) <$ string str |] | (name, str) <- entries]

[| choice $(listE parsers) |]

whenM :: Monad m => m Bool -> m () -> m ()
whenM pm m = pm >>= \p -> when p m
72 changes: 72 additions & 0 deletions common/src/Advent/Format/Enum.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{-# Language BlockArguments #-}
module Advent.Format.Enum where

import Language.Haskell.TH
import Data.List
import Data.Traversable

enumCases :: String -> Q [(Name,String)]
enumCases nameStr =
do tyName <- maybe (fail ("Failed to find type named " ++ show nameStr)) pure
=<< lookupTypeName nameStr

info <- reify tyName
cons <-
case info of
TyConI (DataD _ _ _ _ cons _) -> pure cons
_ -> fail ("Failed to find data declaration for " ++ show nameStr)

for cons \con ->
case con of
NormalC name []
| Just str <- stripPrefix nameStr (nameBase name) ->
case str of
'_' : symbolName ->
do symbol <- processSymbolName symbolName
pure (name, symbol)
_ -> pure (name, str)
_ -> fail ("Unsupported constructor: " ++ show con)

processSymbolName :: String -> Q String
processSymbolName str =
case break ('_' ==) str of
(name, rest) ->
case lookup name symbolNames of
Nothing -> fail ("Unknown symbol name: " ++ name)
Just symbol ->
case rest of
[] -> pure [symbol]
_:str' -> (symbol:) <$> processSymbolName str'

symbolNames :: [(String, Char)]
symbolNames =
[ ("LT", '<')
, ("GT", '>')
, ("EQ", '=')
, ("BANG", '!')
, ("AT" , '@')
, ("HASH", '#')
, ("DOLLAR", '$')
, ("PERCENT", '%')
, ("CARET", '^')
, ("AMPERSAND", '&')
, ("STAR", '*')
, ("PIPE", '|')
, ("LPAREN", '(')
, ("RPAREN", ')')
, ("LBRACE", '{')
, ("RBRACE", '}')
, ("LBRACK", '[')
, ("RBRACK", ']')
, ("COLON", ':')
, ("SEMI", ';')
, ("QUESTION", '?')
, ("SLASH", '/')
, ("BACKSLASH", '\\')
, ("UNDERSCORE", '_')
, ("DASH", '-')
, ("DOT", '.')
, ("COMMA", ',')
, ("PLUS", '+')
, ("TILDE", '~')
]
10 changes: 5 additions & 5 deletions common/src/Advent/Format/Parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ NAME { (_, TAt $$) }

%%

format
format :: { Format }
: atoms { $1 }
| format '|' atoms { Alt $1 $3 }

atoms
: { Empty }
| atoms atom { follow $1 $2 }
atoms :: { Format }
: { Follow [] }
| atoms atom { Follow [$1, $2] }

atom
atom :: { Format }
: '(' format ')' { Group $2 }
| '(' format error {% Left (Unclosed $1) }
| '%u' { UnsignedInt }
Expand Down
64 changes: 64 additions & 0 deletions common/src/Advent/Format/Show.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{-|
Module : Advent.Format.Show
Description : Rendering for the parser DSL
Copyright : (c) Eric Mertens, 2018-2021
License : ISC
Maintainer : [email protected]
-}

module Advent.Format.Show where

import Advent.Format.Types (Format(..), Token(..))

-- | Render a parsed format string back to the input syntax.
showFormat :: Int {- ^ surrounding precedence -} -> Format -> ShowS
showFormat p fmt =
case fmt of
Many x -> showFormat 3 x . showChar '*'
Some x -> showFormat 3 x . showChar '+'
Gather x -> showFormat 3 x . showChar '!'
SepBy x y -> showFormat 3 x . showChar '&' . showFormat 3 y
Alt x y -> showParen (p > 1) $ showFormat 1 x . showChar '|' . showFormat 2 y
Follow xs -> showParen (p > 2) $ \z -> foldr (showFormat 3) z xs
Group x -> showFormat 3 x
UnsignedInteger -> showString "%lu"
SignedInteger -> showString "%ld"
HexInteger -> showString "%lx"
UnsignedInt -> showString "%u"
SignedInt -> showString "%d"
HexInt -> showString "%x"
Word -> showString "%s"
Char -> showString "%c"
Letter -> showString "%a"
Named n -> showChar '@' . showString n
Literal x -> flip (foldr showLiteral) x

-- | Render a literal character match back to input syntax.
showLiteral :: Char -> ShowS
showLiteral x
| x == '\n' = showString "%n"
| x `elem` "()&!*+%@" = showChar '%' . showChar x
| otherwise = showChar x

showToken :: Token -> String
showToken t =
case t of
TOpenGroup -> "("
TCloseGroup -> ")"
TAnyChar -> "%c"
TAnyLetter -> "%a"
TAnyWord -> "%s"
TUnsignedInteger -> "%lu"
TSignedInteger -> "%ld"
THexInteger -> "%lx"
TUnsignedInt -> "%u"
TSignedInt -> "%d"
THexInt -> "%x"
TMany -> "*"
TSome -> "+"
TSepBy -> "&"
TAlt -> "|"
TAt x -> "@" ++ x
TBang -> "!"
TLiteral c -> showLiteral c ""
Loading

0 comments on commit 24a8591

Please sign in to comment.