Browse Source

fixed multiple small issues and added some more latex control statements

Sebastian Kreisel 2 years ago
parent
commit
686a3788b9
6 changed files with 94 additions and 97 deletions
  1. 7 1
      README.md
  2. 6 3
      parseck.cabal
  3. 0 48
      src/Main.hs
  4. 50 25
      src/Parseck/Html.hs
  5. 21 9
      src/Parseck/Latex.hs
  6. 10 11
      src/Parseck/Parseck.hs

+ 7 - 1
README.md

@@ -1 +1,7 @@
-Simple parsing using parser combinators.
+Simple Latex parsing using parser combinators.
+
+# TODO
+
+* Differentiate between \section and \section*
+* Nested inline parsing
+* Nested block/item-env parsing

+ 6 - 3
parseck.cabal

@@ -15,9 +15,13 @@ cabal-version:       >=1.10
 
 library
   hs-source-dirs:      src
-  build-depends:       base >= 4.7 && < 5
   default-language:    Haskell2010
-
+  ghc-options:         -Wall -Wwarn
+  exposed-modules:     Parseck.Parseck
+                     , Parseck.Latex
+                     , Parseck.Html
+  build-depends:       base
+                     , blaze-html >= 0.7
 executable parseck-exe
   hs-source-dirs:      src
   main-is:             Main.hs
@@ -27,5 +31,4 @@ executable parseck-exe
                      , Parseck.Latex
                      , Parseck.Html
   build-depends:       base
-                     , blaze-markup
                      , blaze-html >= 0.7

+ 0 - 48
src/Main.hs

@@ -11,23 +11,9 @@ import Parseck.Html
 main :: IO ()
 main = do
   putStrLn $ ">>> 3, 2, 1. Let's jam! <<<\n"
-  --testInline
-  --testBlocks
   testRunLatex
   putStrLn $ "\n >>> See you Space Cowboy <<<"
 
-testInline :: IO ()
-testInline = do
-  test <- readFile "./test/test3.txt"
-  let lns = fst $ head (parse inlines test)
-  printList lns
-
-testBlocks :: IO ()
-testBlocks = do
-  test <- readFile "./test/test2.txt"
-  let bs = parseItems $ parseBlocks test
-  printList bs
-
 testRunLatex :: IO ()
 testRunLatex  = do
   test <- readFile "./test/test2.txt"
@@ -35,39 +21,5 @@ testRunLatex  = do
 
 -- ---------------------------------------------------------------------------
 
-parseBlocks :: String -> [Block String]
-parseBlocks s = fst $ head (parse blocks s)
-
-parseItems :: [Block String] -> [Block String]
-parseItems bs = parseItems' bs []
-
-parseInlines :: [Block String] -> [Block [Inline]]
-parseInlines bs = parseInlines' bs []
-
-parseInlines' :: [Block String] -> [Block [Inline]] -> [Block [Inline]]
-parseInlines' [] r = reverse r
-parseInlines' (b : bs) r = parseInlines' bs (procInlns b : r)
-
-procInlns :: Block String -> Block [Inline]
-procInlns (Section i s) = Section i (fst $ head (parse inlines s))
-procInlns (Math s) = Math (fst $ head (parse inlines s))
-procInlns (Center s) = Center (fst $ head (parse inlines s))
-procInlns (Paragraph s) = Paragraph (fst $ head (parse inlines s))
-procInlns (Itemize ss) = Itemize (map (\s -> fst $ head (parse inlines s)) ss)
-procInlns (Enumerate ss) = Enumerate (map (\s -> fst $ head (parse inlines s))
-                                      ss)
-procInlns (ErrorBlock s) = ErrorBlock $ [ErrorInline s]
-
-parseItems' :: [Block String] -> [Block String] -> [Block String]
-parseItems' [] r = reverse r
-parseItems' (b : bs) r = parseItems' bs (procListing b : r)
-  where procListing (Itemize xs)
-          | null xs = Itemize xs
-          | otherwise = Itemize (fst $ head (parse items (head xs)))
-        procListing (Enumerate xs)
-          | null xs = Enumerate xs
-          | otherwise = Enumerate (fst $ head (parse items (head xs)))
-        procListing bl = bl
-
 printList :: Show a => [a] -> IO ()
 printList xs = sequence_ (map (\x -> putStrLn $ show x) xs)

+ 50 - 25
src/Parseck/Html.hs

@@ -1,32 +1,57 @@
 module Parseck.Html where
 
-import Text.Blaze.Html5
+import Prelude hiding (div)
+import Text.Blaze.Html5 hiding (map)
+import Text.Blaze.Html5.Attributes
+import Text.Blaze.Html.Renderer.String
 
-import Parseck.Latex
+import Parseck.Latex (Block(..), Inline(..))
 
 
 data Settings = DivOnly | Mixed
 
-toHtml :: Settings -> [Block [Inline]] -> Html
-toHtml settings bs = undefined
-
-toHtmlString :: Settings -> [Block [Inline]] -> String
-toHtmlString settings bs = undefined
-
-blockToHtml :: Settings -> Block -> Html
-blockToHtml (Section (i, s)) = undefined
-blockToHtml (Math s) = undefined
-blockToHtml (Center s) = undefined
-blockToHtml (Paragraph s) = undefined
-blockToHtml (Itemize ss) = undefined
-blockToHtml (Enumerate ss) = undefined
-
-inlineToHtml :: Settings -> Inline -> Html
-inlineToHtml (Plain s) = undefined
-inlineToHtml (Emph s) = undefined
-inlineToHtml (Bold s) = undefined
-inlineToHtml (InlineMath s) = undefined
-inlineToHtml (Url (ref, des)) = undefined
-inlineToHtml (Image (ref, _)) = undefined
-inlineToHtml (VSpace px) = undefined
-inlineToHtml (ErrorInline px) = undefined
+blocksToHtml :: Settings -> [Block [Inline]] -> Html
+blocksToHtml settings bs = toHtml $ map (blockToHtml settings) bs
+
+blocksToHtmlString :: Settings -> [Block [Inline]] -> String
+blocksToHtmlString settings bs = renderHtml $ blocksToHtml settings bs
+
+blockToHtml :: Settings -> Block [Inline] -> Html
+blockToHtml DivOnly (Section i bs) = div $ toHtml (map inlineToHtml bs)
+blockToHtml DivOnly (Math bs) = div $ extrMath bs
+blockToHtml DivOnly (Center bs) = div $ toHtml (map inlineToHtml bs)
+blockToHtml DivOnly (Paragraph bs) = div $ toHtml (map inlineToHtml bs)
+blockToHtml DivOnly (Itemize bss) = div $ ul $ toHtml (map itemsToHtml bss)
+blockToHtml DivOnly (Enumerate bss) = div $ ol $ toHtml (map itemsToHtml bss)
+-- --
+blockToHtml Mixed (Section i bs) = (intToH i) $ toHtml (map inlineToHtml bs)
+blockToHtml Mixed (Math bs) = p $ extrMath bs
+blockToHtml Mixed (Center bs) = p $ toHtml (map inlineToHtml bs)
+blockToHtml Mixed (Paragraph bs) = p $ toHtml (map inlineToHtml bs)
+blockToHtml Mixed (Itemize bss) = ul $ toHtml (map itemsToHtml bss)
+blockToHtml Mixed (Enumerate bss) = ol $ toHtml (map itemsToHtml bss)
+
+extrMath :: [Inline] -> Html
+extrMath [] = toHtml ""
+extrMath (x : _) = toHtml [toHtml "$$", inlineToHtml x, toHtml "$$"]
+
+intToH :: Int -> (Html -> Html)
+intToH 0 = h1
+intToH 1 = h2
+intToH 2 = h3
+intToH _ = h4
+
+itemsToHtml :: [Inline] -> Html
+itemsToHtml xs = li $ toHtml (map inlineToHtml xs)
+
+inlineToHtml :: Inline -> Html
+inlineToHtml (Plain s) = toHtml s
+inlineToHtml (Emph s) = i $ toHtml s
+inlineToHtml (Bold s) = b $ toHtml s
+inlineToHtml (Italic s) = i $ toHtml s
+inlineToHtml (InlineMath s) = toHtml ("\\(" ++ s ++ "\\)")
+inlineToHtml (Url (ref, des)) = a ! href (stringValue ref) $ toHtml des
+inlineToHtml (Image (ref, _)) = img ! src (stringValue ref)
+inlineToHtml (VSpace px) = p $ toHtml ""
+inlineToHtml Linebreak = br
+inlineToHtml (ErrorInline s) = p $ toHtml s

+ 21 - 9
src/Parseck/Latex.hs

@@ -63,9 +63,9 @@ block = section <|> math <|> center <|> itemize <|> enumerate <|> paragraph
 
 section :: Parser (Block String)
 section = token $ do
-  string_ "\\"; sc <- subs; string_ "section{"
-  s <- untilStop "}"
-  string_ "}"
+  string_ "\\"; sc <- subs; string_ "section"
+  many $ string_ "*"
+  s <- getEnclosedBalanced ("{", "}")
   if sc > 3 then failure else return (Section sc s)
   where subs = many (string "sub") >>= return . length
 
@@ -100,6 +100,7 @@ paragraph = token $ do
 
 items :: Parser [String]
 items = do
+  _ <- spaces
   string_ "\\item"
   itms <- many (do i <- untilStop "\\item"; string_ "\\item"; return i)
   lastItm <- everything
@@ -111,6 +112,7 @@ items = do
 
 data Inline = Plain String | Emph String | Bold String | InlineMath String
             | Url (String, String) | Image (String, String) | VSpace Int
+            | Italic String | Linebreak
             | ErrorInline String
             deriving Show
 
@@ -121,16 +123,20 @@ inlines = do
   return $ lns ++ [lastLn]
 
 inline :: Parser Inline
-inline = emph <|> bold <|> inlinemath <|> url <|> image <|> vspace <|> plain
+inline = emph <|> bold <|> italic <|> inlinemath <|> url <|> image <|>
+         vspace <|> br <|> plain
 
 emph :: Parser Inline
-emph = token $ getEnclosed ("\\emph{", "}") >>= return . Emph
+emph = getEnclosed ("\\emph{", "}") >>= return . Emph
+
+italic :: Parser Inline
+italic = getEnclosed ("\\textit{", "}") >>= return . Italic
 
 bold :: Parser Inline
-bold = token $ getEnclosed ("\\textbf{", "}") >>= return . Bold
+bold = getEnclosed ("\\textbf{", "}") >>= return . Bold
 
 inlinemath :: Parser Inline
-inlinemath = token $ getEnclosed ("$", "$") >>= return . InlineMath
+inlinemath = getEnclosed ("$", "$") >>= return . InlineMath
 
 url :: Parser Inline
 url = do
@@ -151,10 +157,16 @@ vspace = do
     Just n -> return $ VSpace n
     _ -> failure
 
+br :: Parser Inline
+br = do
+  token $ string_ "\\\\"
+  return Linebreak
+
 plain :: Parser Inline
 plain = token $ do
   s <- anyString delims
   pl <- untilStops delims
   return $ Plain (s ++ pl)
-    where delims = ["\\emph", "\\textbf", "\\href", "\\includegraphics",
-                    "\\vspace", "$"]
+    where delims = ["\\emph", "\\textbf", "\\textit",
+                    "\\href", "\\includegraphics",
+                    "\\vspace", "$", "\\\\"]

+ 10 - 11
src/Parseck/Parseck.hs

@@ -1,6 +1,6 @@
 module Parseck.Parseck where
 
-import Control.Monad
+--import Control.Monad
 import Control.Applicative
 
 -- --------------------------------------------------------------------------
@@ -32,7 +32,7 @@ unit :: a -> Parser a
 unit a = Parser $ \s -> [(a, s)]
 
 failure :: Parser a
-failure = Parser $ \a -> []
+failure = Parser $ \_ -> []
 
 option :: Parser a -> Parser a -> Parser a
 option p1 p2 = Parser $ \s -> case parse p1 s of
@@ -70,7 +70,7 @@ char c = satisfy (== c)
 
 string :: String -> Parser String
 string [] = return []
-string (c : cs) = do { char c; string cs; return (c : cs) }
+string (c : cs) = do { _ <- char c; string_ cs; return (c : cs) }
 
 string_ :: String -> Parser ()
 string_ s = string s >> return ()
@@ -88,13 +88,13 @@ everything :: Parser String
 everything = Parser $ \s -> [(s, "")]
 
 token :: Parser a -> Parser a
-token p = do { a <- p; spaces; pure a }
+token p = do { a <- p; _ <- spaces; pure a }
 
 enclosed :: (String, String) -> Parser a -> Parser a
 enclosed (pre, post) p = do
-  string pre
+  string_ pre
   a <- p
-  string post
+  string_ post
   pure a
 
 getEnclosed :: (String, String) -> Parser String
@@ -102,7 +102,7 @@ getEnclosed (start, end) = enclosed (start, end) (untilStop end)
 
 getEnclosedBalanced :: (String, String) -> Parser String
 getEnclosedBalanced (start, end) = do
-  string start
+  string_ start
   getEnclB (start, end) (1, 0)
 
 getEnclB :: (String, String) -> (Int, Int) -> Parser String
@@ -111,15 +111,14 @@ getEnclB (start, end) (sc, ec) = do
   e <- hasPrefix end >>= return . boolToInt
   case sc + s == ec + e of
     True -> do
-      string end
+      string_ end
       return ""
     False -> do
       i <- itemS
       rest <- getEnclB (start, end) (sc + s, ec + e)
       return (i ++ rest)
-
-boolToInt True = 1
-boolToInt False = 0
+  where boolToInt True = 1
+        boolToInt False = 0
 
 hasPrefix :: String -> Parser Bool
 hasPrefix s = (notstring s >> return False) <|> return True