tags | Ext_native_spans -- ^ Use Span inlines for contents of @@ -162,6 +163,7 @@ pandocExtensions = extensionsFromList , Ext_fenced_code_attributes , Ext_backtick_code_blocks , Ext_inline_code_attributes + , Ext_raw_attribute , Ext_markdown_in_html_blocks , Ext_native_divs , Ext_native_spans @@ -275,6 +277,8 @@ multimarkdownExtensions = extensionsFromList , Ext_subscript , Ext_backtick_code_blocks , Ext_spaced_reference_links + -- So far only in dev version of mmd: + , Ext_raw_attribute ] -- | Language extensions to be used with strict markdown. diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 793ee0996..b91efcd8c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -681,19 +681,36 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -1516,13 +1533,20 @@ code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) diff --git a/test/command/3537.md b/test/command/3537.md new file mode 100644 index 000000000..df4eeba7d --- /dev/null +++ b/test/command/3537.md @@ -0,0 +1,28 @@ +Generalized raw attributes. + +```` +% pandoc -t native +```{=ms} +.MACRO +foo bar +``` +^D +[RawBlock (Format "ms") ".MACRO\nfoo bar"] +```` + +```` +% pandoc -t native +Hi `there`{=ms}. +^D +[Para [Str "Hi",Space,RawInline (Format "ms") "there",Str "."]] +```` + +```` +% pandoc -t native +~~~ {=ms} +.MACRO +foo bar +~~~ +^D +[RawBlock (Format "ms") ".MACRO\nfoo bar"] +```` -- cgit v1.2.3 From 5812ac03902169e5ce8593c26fb2d8cffafbd828 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Jun 2017 22:31:08 +0200 Subject: Markdown reader: interpret YAML metadata as Inlines when possible. If the metadata field is all on one line, we try to interpret it as Inlines, and only try parsing as Blocks if that fails. If it extends over one line (including possibly the `|` or `>` character signaling an indented block), then we parse as Blocks. This was motivated by some German users finding that date: '22. Juin 2017' got parsed as an ordered list. Closes #3755. --- src/Text/Pandoc/Readers/Markdown.hs | 25 +++++++++++++------------ test/command/3755.md | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 test/command/3755.md (limited to 'test') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b91efcd8c..b3b275674 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -292,18 +292,19 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) - where - toMeta p = do - p' <- p - return $ - case B.toList p' of - [Plain xs] -> MetaInlines xs - [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x yamlToMeta :: PandocMonad m => Yaml.Value -> MarkdownParser m (F MetaValue) diff --git a/test/command/3755.md b/test/command/3755.md new file mode 100644 index 000000000..5e1ffc921 --- /dev/null +++ b/test/command/3755.md @@ -0,0 +1,23 @@ +``` +% pandoc -t native -s +--- +title: 'Titel' +date: '22. Juni 2017' +--- +^D +Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "22.",Space,Str "Juni",Space,Str "2017"]),("title",MetaInlines [Str "Titel"])]}) +[] +``` + +``` +% pandoc -t native -s +--- +title: '

foo

' +date: | + 22. Juni 2017 +--- +^D +Pandoc (Meta {unMeta = fromList [("date",MetaBlocks [OrderedList (22,Decimal,Period) [[Plain [Str "Juni",Space,Str "2017"]]]]),("title",MetaBlocks [Div ("",[],[]) [Plain [Str "foo"]]])]}) +[] +``` + -- cgit v1.2.3 From f8877516e03ec678aeb735cdafe56e20ca52b235 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Sun, 25 Jun 2017 11:01:43 +0300 Subject: Muse reader: Require space before and after '=' for code (#3758) --- src/Text/Pandoc/Readers/Muse.hs | 13 ++++++++++--- test/Tests/Readers/Muse.hs | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 7eee064a7..fe8a55f5c 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -442,8 +442,7 @@ tableParseCaption = try $ do -- inline :: PandocMonad m => MuseParser m (F Inlines) -inline = choice [ whitespace - , br +inline = choice [ br , footnote , strong , strongTag @@ -455,6 +454,7 @@ inline = choice [ whitespace , link , code , codeTag + , whitespace , str , symbol ] "inline" @@ -535,7 +535,14 @@ strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) strikeoutTag = inlineTag B.strikeout "del" code :: PandocMonad m => MuseParser m (F Inlines) -code = return . B.code <$> verbatimBetween '=' +code = try $ do + pos <- getPosition + sp <- if sourceColumn pos == 1 + then pure mempty + else skipMany1 spaceChar >> pure B.space + cd <- verbatimBetween '=' + notFollowedBy nonspaceChar + return $ return (sp B.<> B.code cd) codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = do diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index cac69dffa..bae389584 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -58,6 +58,8 @@ tests = , "Code" =: "=foo(bar)=" =?> para (code "foo(bar)") + , "Not code" =: "a=b= =c=d" =?> para (text "a=b= =c=d") + , "Code tag" =: "foo(bar)" =?> para (code "foo(bar)") , testGroup "Links" @@ -79,6 +81,12 @@ tests = , "Image link with description" =: "[[URL:image.jpg][Image]]" =?> para (link "image.jpg" "" (text "Image")) + -- Implicit links are supported in Emacs Muse, but not in Amusewiki: + -- https://github.com/melmothx/text-amuse/issues/18 + -- + -- This test also makes sure '=' without whitespace is not treated as code markup + , "No implicit links" =: "http://example.org/index.php?action=view&id=1" + =?> para "http://example.org/index.php?action=view&id=1" ] ] -- cgit v1.2.3 From 492b3b129190be9742981493812894f888bb5f2d Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 26 Jun 2017 09:41:17 +0300 Subject: Muse reader: fix horizontal rule parsing (#3762) Do not parse 3 dashes as horizontal rule and allow whitespace after rule --- src/Text/Pandoc/Readers/Muse.hs | 6 ++++-- test/Tests/Readers/Muse.hs | 13 ++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 06d385222..eb0769e0b 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -200,8 +200,10 @@ comment = try $ do separator :: PandocMonad m => MuseParser m (F Blocks) separator = try $ do - string "---" - newline + string "----" + many $ char '-' + many spaceChar + void newline <|> eof return $ return B.horizontalRule header :: PandocMonad m => MuseParser m (F Blocks) diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index bae389584..3d7baf8f0 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -91,7 +91,18 @@ tests = ] , testGroup "Blocks" - [ "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + [ "Block elements end paragraphs" =: + T.unlines [ "First paragraph" + , "----" + , "Second paragraph" + ] =?> para (text "First paragraph") <> horizontalRule <> para (text "Second paragraph") + , testGroup "Horizontal rule" + [ "Less than 4 dashes is not a horizontal rule" =: "---" =?> para (text "---") + , "4 dashes is a horizontal rule" =: "----" =?> horizontalRule + , "5 dashes is a horizontal rule" =: "-----" =?> horizontalRule + , "4 dashes with spaces is a horizontal rule" =: "---- " =?> horizontalRule + ] + , "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") , "Center" =: "Hello, world" =?> para (text "Hello, world") , "Right" =: "Hello, world" =?> para (text "Hello, world") , testGroup "Comments" -- cgit v1.2.3 From fa515e46f36fa3e73b26b89b721a2de1738cf4e3 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 26 Jun 2017 16:07:45 +0300 Subject: Muse writer: fix hlint errors (#3764) --- src/Text/Pandoc/Writers/Muse.hs | 30 +++++++++++++----------------- test/Tests/Writers/Muse.hs | 4 ++-- 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 3d9e232ae..b386a85b9 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -97,8 +97,7 @@ pandocToMuse (Pandoc meta blocks) = do body <- blockListToMuse blocks notes <- liftM (reverse . stNotes) get >>= notesToMuse let main = render colwidth $ body $+$ notes - let context = defField "body" main - $ metadata + let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -129,14 +128,14 @@ blockToMuse (Para inlines) = do blockToMuse (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] + (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToMuse ls contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) return $ blankline $$ "" $$ contents $$ "" <> blankline -blockToMuse (CodeBlock (_,_,_) str) = do +blockToMuse (CodeBlock (_,_,_) str) = return $ "" $$ text str $$ "" $$ blankline blockToMuse (RawBlock (Format format) str) = return $ blankline $$ " text format <> "\">" $$ @@ -154,11 +153,10 @@ blockToMuse (OrderedList (start, style, _) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToMuse item num) $ - zip markers' items + contents <- zipWithM orderedListItemToMuse markers' items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where orderedListItemToMuse :: PandocMonad m => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) @@ -170,7 +168,7 @@ blockToMuse (BulletList items) = do contents <- mapM bulletListItemToMuse items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where bulletListItemToMuse :: PandocMonad m => [Block] -> StateT WriterState m Doc @@ -179,7 +177,7 @@ blockToMuse (BulletList items) = do return $ hang 2 "- " contents blockToMuse (DefinitionList items) = do contents <- mapM definitionListItemToMuse items - return $ cr $$ (nest 1 $ vcat $ contents) $$ blankline + return $ cr $$ nest 1 (vcat contents) $$ blankline where definitionListItemToMuse :: PandocMonad m => ([Inline], [[Block]]) -> StateT WriterState m Doc @@ -218,8 +216,8 @@ blockToMuse (Table caption _ _ headers rows) = do -- FIXME: Muse doesn't allow blocks with height more than 1. let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where h = maximum (1 : map height blocks) - sep' = lblock (length sep) $ vcat (map text $ replicate h sep) - let makeRow sep = (" " <>) . (hpipeBlocks sep . zipWith lblock widthsInChars) + sep' = lblock (length sep) $ vcat (replicate h (text sep)) + let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars let head' = makeRow " || " headers' let rowSeparator = if noHeaders then " | " else " | " rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row @@ -236,9 +234,7 @@ blockToMuse Null = return empty notesToMuse :: PandocMonad m => Notes -> StateT WriterState m Doc -notesToMuse notes = - mapM (\(num, note) -> noteToMuse num note) (zip [1..] notes) >>= - return . vsep +notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes) -- | Return Muse representation of a note. noteToMuse :: PandocMonad m @@ -268,7 +264,7 @@ conditionalEscapeString s inlineListToMuse :: PandocMonad m => [Inline] -> StateT WriterState m Doc -inlineListToMuse lst = mapM inlineToMuse lst >>= return . hcat +inlineListToMuse lst = liftM hcat (mapM inlineToMuse lst) -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m @@ -316,7 +312,7 @@ inlineToMuse Space = return space inlineToMuse SoftBreak = do wrapText <- gets $ writerWrapText . stOptions return $ if wrapText == WrapPreserve then cr else space -inlineToMuse (Link _ txt (src, _)) = do +inlineToMuse (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> return $ "[[" <> text (escapeLink x) <> "]]" @@ -340,7 +336,7 @@ inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ "[" <> text ref <> "]" inlineToMuse (Span (_,name:_,_) inlines) = do contents <- inlineListToMuse inlines diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index 63fdd293c..d83cc5c9b 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -31,14 +31,14 @@ tests = [ testGroup "block elements" , "Second paragraph." ] ] - , "line block" =: lineBlock ([text "Foo", text "bar", text "baz"]) + , "line block" =: lineBlock [text "Foo", text "bar", text "baz"] =?> unlines [ "" , "Foo" , "bar" , "baz" , "" ] - , "code block" =: codeBlock ("int main(void) {\n\treturn 0;\n}") + , "code block" =: codeBlock "int main(void) {\n\treturn 0;\n}" =?> unlines [ "" , "int main(void) {" , "\treturn 0;" -- cgit v1.2.3 From 460b6c470bae26f31d7c0d72b85aef8eb254b7f7 Mon Sep 17 00:00:00 2001 From: bucklereed Date: Tue, 27 Jun 2017 09:19:37 +0100 Subject: HTML reader: Use the lang value of to set the lang meta value. (#3765) * HTML reader: Use the lang value of to set the lang meta value. * Fix for pre-AMP environments. --- src/Text/Pandoc/Readers/HTML.hs | 9 +++++++++ test/Tests/Readers/HTML.hs | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 301afa207..b07b65019 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -53,6 +53,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M +import Data.Foldable ( for_ ) import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) @@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) type TagParser m = HTMLParser m [Tag Text] +pHtml :: PandocMonad m => TagParser m Blocks +pHtml = try $ do + (TagOpen "html" attr) <- lookAhead $ pAnyTag + for_ (lookup "lang" attr) $ + updateState . B.setMeta "lang" . B.text . T.unpack + pInTags "html" block + pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block @@ -175,6 +183,7 @@ block = do , pList , pHrule , pTable + , pHtml , pHead , pBody , pDiv diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 8647540b6..da6298e76 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -30,4 +30,10 @@ tests = [ testGroup "base tag" [ test html "anchor without href" $ "" =?> plain (spanWith ("anchor",[],[]) mempty) ] + , testGroup "lang" + [ test html "lang on " $ "hola" =?> + setMeta "lang" (text "es") (doc (plain (text "hola"))) + , test html "xml:lang on " $ "hola" =?> + setMeta "lang" (text "es") (doc (plain (text "hola"))) + ] ] -- cgit v1.2.3 From 563c9c8687a62acc7361fb49126a1d2030f3a11e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 14:35:03 +0200 Subject: RST reader: Handle chained link definitions. For example, .. _hello: .. _goodbye: example.com Here both `hello` and `goodbye` should link to `example.com`. Fixes the first part of #262. --- src/Text/Pandoc/Readers/RST.hs | 27 ++++++++++++++++++++------- test/command/262.md | 9 +++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 test/command/262.md (limited to 'test') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d13f697b7..c790d5188 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where -import Control.Monad (guard, liftM, mzero, when) +import Control.Monad (guard, liftM, mzero, when, forM_) import Control.Monad.Identity (Identity(..)) import Control.Monad.Except (throwError) import Data.Char (isHexDigit, isSpace, toLower, toUpper) @@ -1054,16 +1054,29 @@ stripTicks = reverse . stripTick . reverse . stripTick where stripTick ('`':xs) = xs stripTick xs = xs +referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames = do + let rn = try $ do + string ".. _" + (_, ref) <- withRaw referenceName + char ':' + return ref + first <- rn + rest <- many (try (blanklines *> rn)) + return (first:rest) + regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' + -- we allow several references to the same URL, e.g. + -- .. _hello: + -- .. _goodbye: url.com + refs <- referenceNames src <- targetURI - let key = toKey $ stripTicks ref --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ - stateKeys s } + let keys = map (toKey . stripTicks) refs + forM_ keys $ \key -> + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do diff --git a/test/command/262.md b/test/command/262.md new file mode 100644 index 000000000..e23e2d866 --- /dev/null +++ b/test/command/262.md @@ -0,0 +1,9 @@ +``` +% pandoc -f rst +`hello`_ and `goodbye`_ + +.. _hello: +.. _goodbye: example.com +^D +

hello and goodbye

+``` -- cgit v1.2.3 From 33a29fbf8720c0d7eec40b7014e3f819b05474ef Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 15:03:16 +0200 Subject: RST reader: support anchors. E.g. `hello` .. _hello: paragraph This is supported by putting "paragraph" in a Div with id `hello`. Closes #262. --- src/Text/Pandoc/Readers/RST.hs | 24 +++++++++++++++++++++++- test/command/262.md | 17 +++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index c790d5188..2daf60a89 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -170,7 +170,8 @@ parseRST = do -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> citationBlock <|> + manyTill (referenceKey <|> anchorDef <|> + noteBlock <|> citationBlock <|> headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos @@ -217,6 +218,7 @@ block = choice [ codeBlock , fieldList , include , directive + , anchor , comment , header , hrule @@ -1072,12 +1074,32 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI + guard $ not (null src) --TODO: parse width, height, class and name attributes let keys = map (toKey . stripTicks) refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } +anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef = try $ do + (refs, raw) <- withRaw (try (referenceNames <* blanklines)) + let keys = map stripTicks refs + forM_ keys $ \rawkey -> + updateState $ \s -> s { stateKeys = + M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + -- keep this for 2nd round of parsing, where we'll add the divs (anchor) + return raw + +anchor :: PandocMonad m => RSTParser m Blocks +anchor = try $ do + refs <- referenceNames + blanklines + b <- block + -- put identifier on next block: + let addDiv ref = B.divWith (ref, [], []) + return $ foldr addDiv b refs + headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') diff --git a/test/command/262.md b/test/command/262.md index e23e2d866..bda2acb35 100644 --- a/test/command/262.md +++ b/test/command/262.md @@ -7,3 +7,20 @@ ^D

hello and goodbye

``` + +``` +% pandoc -f rst +`hello`_ `goodbye`_ + +.. _hello: +.. _goodbye: + +paragraph +^D +

hello goodbye

paragraph

+``` -- cgit v1.2.3 From 79cc56726c7e876314c7c21f5bb5f65084e7d8b7 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Wed, 28 Jun 2017 15:32:53 +0300 Subject: Muse reader: parse indented blockquotes (#3769) --- src/Text/Pandoc/Readers/Muse.hs | 23 ++++++++++++++++++++++- test/Tests/Readers/Muse.hs | 7 ++++++- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index eb0769e0b..a51306347 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -187,6 +187,7 @@ blockElements = choice [ comment , orderedList , table , commentTag + , indentedBlock , noteBlock ] @@ -209,7 +210,8 @@ separator = try $ do header :: PandocMonad m => MuseParser m (F Blocks) header = try $ do st <- stateParserContext <$> getState - getPosition >>= \pos -> guard (st == NullState && sourceColumn pos == 1) + q <- stateQuoteContext <$> getState + getPosition >>= \pos -> guard (st == NullState && q == NoQuote && sourceColumn pos == 1) level <- liftM length $ many1 $ char '*' guard $ level <= 5 skipSpaces @@ -248,6 +250,25 @@ quoteTag = blockTag B.blockQuote "quote" commentTag :: PandocMonad m => MuseParser m (F Blocks) commentTag = parseHtmlContent "comment" block >> return mempty +-- Indented block is either center, right or quote +indentedLine :: PandocMonad m => MuseParser m (Int, String) +indentedLine = try $ do + indent <- length <$> many1 spaceChar + line <- anyLine + return (indent, line) + +rawIndentedBlock :: PandocMonad m => MuseParser m (Int, String) +rawIndentedBlock = try $ do + lns <- many1 indentedLine + let indent = minimum $ map fst lns + return (indent, unlines $ map snd lns) + +indentedBlock :: PandocMonad m => MuseParser m (F Blocks) +indentedBlock = try $ do + (indent, raw) <- rawIndentedBlock + contents <- withQuoteContext InDoubleQuote $ parseFromString parseBlocks raw + return $ (if indent >= 2 && indent < 6 then B.blockQuote else id) <$> contents + para :: PandocMonad m => MuseParser m (F Blocks) para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement where diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 3d7baf8f0..fe0a59992 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -102,7 +102,12 @@ tests = , "5 dashes is a horizontal rule" =: "-----" =?> horizontalRule , "4 dashes with spaces is a horizontal rule" =: "---- " =?> horizontalRule ] - , "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + , "Quote tag" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + , "Quote" =: " This is a quotation\n" =?> blockQuote (para $ text "This is a quotation") + , "Multiline quote" =: T.unlines [ " This is a quotation" + , " with a continuation" + ] + =?> blockQuote (para $ text "This is a quotation with a continuation") , "Center" =: "Hello, world" =?> para (text "Hello, world") , "Right" =: "Hello, world" =?> para (text "Hello, world") , testGroup "Comments" -- cgit v1.2.3 From 780a65f8a87b40d1a9ee269cd7a51699c42d497e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 15:47:27 +0200 Subject: Lua filters: Remove special treatment of Quoted, Math. No more SingleQuoted, DoubleQuoted, InlineMath, DisplayMath. This makes everything uniform and predictable, though it does open up a difference btw lua filters and custom writers. --- src/Text/Pandoc/Lua.hs | 32 ++++++++------------------------ test/lua/single-to-double-quoted.lua | 6 ++++-- 2 files changed, 12 insertions(+), 26 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 3bb11b705..fd7bba0ac 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -101,6 +101,12 @@ data LuaFilter = LuaFilter LuaState FunctionMap newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } +tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a +tryFilter lua fnMap filterFnName x = + case Map.lookup filterFnName fnMap of + Nothing -> return x + Just fn -> runFilterFunction lua fn x + execDocLuaFilter :: LuaState -> FunctionMap -> Pandoc -> IO Pandoc @@ -116,34 +122,12 @@ execMetaLuaFilter lua fnMap (Pandoc meta blks) = do execBlockLuaFilter :: LuaState -> FunctionMap -> Block -> IO Block -execBlockLuaFilter lua fnMap x = do - tryFilter lua fnMap (show (toConstr x)) x - -tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a -tryFilter lua fnMap filterFnName x = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runFilterFunction lua fn x - -tryFilterAlternatives :: StackValue a - => LuaState -> FunctionMap -> [String] -> a -> IO a -tryFilterAlternatives _ _ [] x = return x -tryFilterAlternatives lua fnMap (fnName : alternatives) x = - case Map.lookup fnName fnMap of - Nothing -> tryFilterAlternatives lua fnMap alternatives x - Just fn -> runFilterFunction lua fn x +execBlockLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x execInlineLuaFilter :: LuaState -> FunctionMap -> Inline -> IO Inline -execInlineLuaFilter lua fnMap x = do - let tryAlt = tryFilterAlternatives lua fnMap - case x of - Math DisplayMath _ -> tryAlt ["DisplayMath", "Math"] x - Math InlineMath _ -> tryAlt ["InlineMath", "Math"] x - Quoted DoubleQuote _ -> tryAlt ["DoubleQuoted", "Quoted"] x - Quoted SingleQuote _ -> tryAlt ["SingleQuoted", "Quoted"] x - _ -> tryFilter lua fnMap (show (toConstr x)) x +execInlineLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x instance StackValue LuaFilter where valuetype _ = Lua.TTABLE diff --git a/test/lua/single-to-double-quoted.lua b/test/lua/single-to-double-quoted.lua index 45c184c95..b985b215c 100644 --- a/test/lua/single-to-double-quoted.lua +++ b/test/lua/single-to-double-quoted.lua @@ -1,7 +1,9 @@ return { { - SingleQuoted = function (elem) - elem.quotetype = "DoubleQuote" + Quoted = function (elem) + if elem.quotetype == "SingleQuote" then + elem.quotetype = "DoubleQuote" + end return elem end, } -- cgit v1.2.3 From 0f658eb46ce3630886b733e8c8ed1086af9b3510 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 17:08:59 +0200 Subject: data/pandoc.lua: regularize constructors. We now use Pandoc instead of Doc (though Doc remains a deprecated Synonym), and we deprecate DoubleQuoted, SingleQuoted, InlineMath, and DisplayMath. --- data/pandoc.lua | 21 ++++++++++----------- test/lua/hello-world-doc.lua | 4 ++-- test/lua/implicit-doc-filter.lua | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/data/pandoc.lua b/data/pandoc.lua index 7dcd179e8..138d8b59b 100644 --- a/data/pandoc.lua +++ b/data/pandoc.lua @@ -23,7 +23,7 @@ THIS SOFTWARE. -- @copyright © 2017 Albert Krewinkel -- @license MIT local M = { - _VERSION = "0.2.0" + _VERSION = "0.3.0" } ------------------------------------------------------------------------ @@ -137,7 +137,7 @@ end -- @function Doc -- @tparam {Block,...} blocks document content -- @tparam[opt] Meta meta document meta data -function M.Doc(blocks, meta) +function M.Pandoc(blocks, meta) meta = meta or {} return { ["blocks"] = blocks, @@ -146,6 +146,8 @@ function M.Doc(blocks, meta) } end +-- DEPRECATED synonym: +M.Doc = M.Pandoc ------------------------------------------------------------------------ -- MetaValue @@ -449,8 +451,7 @@ M.Link = M.Inline:create_constructor( {"attributes", "content", {"target", "title"}} ) ---- Creates a Math element, either inline or displayed. It is usually simpler to --- use one of the specialized functions @{InlineMath} or @{DisplayMath} instead. +--- Creates a Math element, either inline or displayed. -- @function Math -- @tparam "InlineMath"|"DisplayMath" mathtype rendering specifier -- @tparam string text Math content @@ -462,7 +463,7 @@ M.Math = M.Inline:create_constructor( end, {"mathtype", "text"} ) ---- Creates a DisplayMath element. +--- Creates a DisplayMath element (DEPRECATED). -- @function DisplayMath -- @tparam string text Math content -- @treturn Inline Math element @@ -471,7 +472,7 @@ M.DisplayMath = M.Inline:create_constructor( function(text) return M.Math("DisplayMath", text) end, {"mathtype", "text"} ) ---- Creates an InlineMath inline element. +--- Creates an InlineMath inline element (DEPRECATED). -- @function InlineMath -- @tparam string text Math content -- @treturn Inline Math element @@ -490,9 +491,7 @@ M.Note = M.Inline:create_constructor( "content" ) ---- Creates a Quoted inline element given the quote type and quoted content. It --- is usually simpler to use one of the specialized functions @{SingleQuoted} or --- @{DoubleQuoted} instead. +--- Creates a Quoted inline element given the quote type and quoted content. -- @function Quoted -- @tparam "DoubleQuote"|"SingleQuote" quotetype type of quotes to be used -- @tparam {Inline,..} content inline content @@ -502,7 +501,7 @@ M.Quoted = M.Inline:create_constructor( function(quotetype, content) return {c = {quotetype, content}} end, {"quotetype", "content"} ) ---- Creates a single-quoted inline element. +--- Creates a single-quoted inline element (DEPRECATED). -- @function SingleQuoted -- @tparam {Inline,..} content inline content -- @treturn Inline quoted element @@ -512,7 +511,7 @@ M.SingleQuoted = M.Inline:create_constructor( function(content) return M.Quoted(M.SingleQuote, content) end, {"quotetype", "content"} ) ---- Creates a single-quoted inline element. +--- Creates a single-quoted inline element (DEPRECATED). -- @function DoubleQuoted -- @tparam {Inline,..} content inline content -- @treturn Inline quoted element diff --git a/test/lua/hello-world-doc.lua b/test/lua/hello-world-doc.lua index 221321a60..62236584e 100644 --- a/test/lua/hello-world-doc.lua +++ b/test/lua/hello-world-doc.lua @@ -1,10 +1,10 @@ return { { - Doc = function(doc) + Pandoc = function(doc) local meta = {} local hello = { pandoc.Str "Hello,", pandoc.Space(), pandoc.Str "World!" } local blocks = { pandoc.Para(hello) } - return pandoc.Doc(blocks, meta) + return pandoc.Pandoc(blocks, meta) end } } diff --git a/test/lua/implicit-doc-filter.lua b/test/lua/implicit-doc-filter.lua index 320d22105..253462d1c 100644 --- a/test/lua/implicit-doc-filter.lua +++ b/test/lua/implicit-doc-filter.lua @@ -2,5 +2,5 @@ function Doc (doc) local meta = {} local hello = { pandoc.Str "Hello,", pandoc.Space(), pandoc.Str "World!" } local blocks = { pandoc.Para(hello) } - return pandoc.Doc(blocks, meta) + return pandoc.Pandoc(blocks, meta) end -- cgit v1.2.3 From 5e00cf8086e0960e81c31f7cd981ace646623f09 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 17:13:19 +0200 Subject: Added parameter for user data directory to runLuaFilter. in Text.Pandoc.Lua. Also to pushPandocModule. This change allows users to override pandoc.lua with a file in their local data directory, adding custom functions, etc. @tarleb, if you think this is a bad idea, you can revert this. But in general our data files are all overridable. --- src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/Lua.hs | 6 +++--- src/Text/Pandoc/Lua/PandocModule.hs | 10 +++++----- test/Tests/Lua.hs | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index ee74d39c0..c119fa255 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -795,7 +795,7 @@ applyLuaFilters :: MonadIO m applyLuaFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters let go f d' = liftIO $ do - res <- E.try (runLuaFilter f args d') + res <- E.try (runLuaFilter mbDatadir f args d') case res of Right x -> return x Left (LuaException s) -> E.throw (PandocFilterError f s) diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 87fb8fd6b..22b68d5e0 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -56,12 +56,12 @@ newtype LuaException = LuaException String instance Exception LuaException runLuaFilter :: (MonadIO m) - => FilePath -> [String] -> Pandoc -> m Pandoc -runLuaFilter filterPath args pd = liftIO $ do + => Maybe FilePath -> FilePath -> [String] -> Pandoc -> m Pandoc +runLuaFilter datadir filterPath args pd = liftIO $ do lua <- Lua.newstate Lua.openlibs lua -- store module in global "pandoc" - pushPandocModule lua + pushPandocModule datadir lua Lua.setglobal lua "pandoc" top <- Lua.gettop lua status <- Lua.loadfile lua filterPath diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs index fccfbebf3..2d0baf4f8 100644 --- a/src/Text/Pandoc/Lua/PandocModule.hs +++ b/src/Text/Pandoc/Lua/PandocModule.hs @@ -41,9 +41,9 @@ import Text.Pandoc.Readers (Reader (..), getReader) import Text.Pandoc.Shared (readDataFile) -- | Push the "pandoc" on the lua stack. -pushPandocModule :: LuaState -> IO () -pushPandocModule lua = do - script <- pandocModuleScript +pushPandocModule :: Maybe FilePath -> LuaState -> IO () +pushPandocModule datadir lua = do + script <- pandocModuleScript datadir status <- loadstring lua script "pandoc.lua" unless (status /= 0) $ call lua 0 1 push lua "__read" @@ -51,8 +51,8 @@ pushPandocModule lua = do rawset lua (-3) -- | Get the string representation of the pandoc module -pandocModuleScript :: IO String -pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua" +pandocModuleScript :: Maybe FilePath -> IO String +pandocModuleScript datadir = unpack <$> readDataFile datadir "pandoc.lua" read_doc :: String -> String -> IO (Either String Pandoc) read_doc formatSpec content = do diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index cd8604ab9..ebd39366b 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -68,7 +68,7 @@ tests = map (localOption (QuickCheckTests 20)) assertFilterConversion :: String -> FilePath -> Pandoc -> Pandoc -> Assertion assertFilterConversion msg filterPath docIn docExpected = do - docRes <- runLuaFilter ("lua" filterPath) [] docIn + docRes <- runLuaFilter Nothing ("lua" filterPath) [] docIn assertEqual msg docExpected docRes roundtripEqual :: (Eq a, Lua.StackValue a) => a -> IO Bool @@ -78,7 +78,7 @@ roundtripEqual x = (x ==) <$> roundtripped roundtripped = do lua <- Lua.newstate Lua.openlibs lua - pushPandocModule lua + pushPandocModule Nothing lua Lua.setglobal lua "pandoc" oldSize <- Lua.gettop lua Lua.push lua x -- cgit v1.2.3 From e574d50b1cec1a8aea58db70a2c88ad10f1c4cb2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 17:41:25 +0200 Subject: Markdown writer: Ensure that `+` and `-` are escaped properly... so they don't cause spurious lists. Previously they were only if succeeded by a space, not if they were at end of line. Closes #3773. --- src/Text/Pandoc/Writers/Markdown.hs | 3 +++ test/command/3773.md | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 test/command/3773.md (limited to 'test') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index b951288bc..1e0d8bde2 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -416,6 +416,9 @@ blockToMarkdown' opts (Plain inlines) = do '+':s:_ | not isPlain && isSpace s -> "\\" <> contents '*':s:_ | not isPlain && isSpace s -> "\\" <> contents '-':s:_ | not isPlain && isSpace s -> "\\" <> contents + '+':[] | not isPlain -> "\\" <> contents + '*':[] | not isPlain -> "\\" <> contents + '-':[] | not isPlain -> "\\" <> contents '|':_ | (isEnabled Ext_line_blocks opts || isEnabled Ext_pipe_tables opts) && isEnabled Ext_all_symbols_escapable opts diff --git a/test/command/3773.md b/test/command/3773.md new file mode 100644 index 000000000..7ee8a3941 --- /dev/null +++ b/test/command/3773.md @@ -0,0 +1,14 @@ +``` +% pandoc -t markdown +A. \# +B. \+ +C. \* +D. o +E. o or \* +^D +A. \# +B. \+ +C. \* +D. o +E. o or \* +``` -- cgit v1.2.3 From fa43c0feaa7c01e718626e7121f9366d09cab589 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 20:50:37 +0200 Subject: Updated jats tests for new texmath version. --- test/writer.jats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/writer.jats b/test/writer.jats index 3cb5050c2..6f808e109 100644 --- a/test/writer.jats +++ b/test/writer.jats @@ -1085,7 +1085,7 @@ These should not be escaped: \$ \\ \> \[ \{

Here’s some display math: - ddxf(x)=limh→0f(x+h)−f(x)h + ddxf(x)=limh→0f(x+h)−f(x)h

-- cgit v1.2.3 From 0feb7504b1c68cef76b30ea9987e2eae3101714c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 1 Jul 2017 19:31:43 +0200 Subject: Rewrote LaTeX reader with proper tokenization. This rewrite is primarily motivated by the need to get macros working properly. A side benefit is that the reader is significantly faster (27s -> 19s in one benchmark, and there is a lot of room for further optimization). We now tokenize the input text, then parse the token stream. Macros modify the token stream, so they should now be effective in any context, including math. Thus, we no longer need the clunky macro processing capacities of texmath. A custom state LaTeXState is used instead of ParserState. This, plus the tokenization, will require some rewriting of the exported functions rawLaTeXInline, inlineCommand, rawLaTeXBlock. * Added Text.Pandoc.Readers.LaTeX.Types (new exported module). Exports Macro, Tok, TokType, Line, Column. [API change] * Text.Pandoc.Parsing: adjusted type of `insertIncludedFile` so it can be used with token parser. * Removed old texmath macro stuff from Parsing. Use Macro from Text.Pandoc.Readers.LaTeX.Types instead. * Removed texmath macro material from Markdown reader. * Changed types for Text.Pandoc.Readers.LaTeX's rawLaTeXInline and rawLaTeXBlock. (Both now return a String, and they are polymorphic in state.) * Added orgMacros field to OrgState. [API change] * Removed readerApplyMacros from ReaderOptions. Now we just check the `latex_macros` reader extension. * Allow `\newcommand\foo{blah}` without braces. Fixes #1390. Fixes #2118. Fixes #3236. Fixes #3779. Fixes #934. Fixes #982. --- pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 1 - src/Text/Pandoc/Error.hs | 3 + src/Text/Pandoc/Extensions.hs | 1 + src/Text/Pandoc/Options.hs | 2 - src/Text/Pandoc/Parsing.hs | 67 +- src/Text/Pandoc/Readers/LaTeX.hs | 2777 +++++++++++++++++----------- src/Text/Pandoc/Readers/LaTeX/Types.hs | 48 + src/Text/Pandoc/Readers/Markdown.hs | 19 +- src/Text/Pandoc/Readers/Muse.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 5 +- src/Text/Pandoc/Readers/Org/ParserState.hs | 10 + src/Text/Pandoc/Readers/TWiki.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/Readers/Txt2Tags.hs | 2 +- test/Tests/Readers/LaTeX.hs | 3 +- test/command/1390.md | 20 + test/command/2118.md | 11 + test/command/3113.md | 2 +- test/command/3236.md | 9 + test/command/3558.md | 8 +- test/command/3779.md | 28 + test/command/934.md | 12 + test/command/982.md | 11 + test/latex-reader.latex | 1 - test/latex-reader.native | 2 +- test/markdown-reader-more.native | 4 +- 27 files changed, 1876 insertions(+), 1177 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Types.hs create mode 100644 test/command/1390.md create mode 100644 test/command/2118.md create mode 100644 test/command/3236.md create mode 100644 test/command/3779.md create mode 100644 test/command/934.md create mode 100644 test/command/982.md (limited to 'test') diff --git a/pandoc.cabal b/pandoc.cabal index 70475568e..dd92690ce 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -366,6 +366,7 @@ Library Text.Pandoc.Readers, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, + Text.Pandoc.Readers.LaTeX.Types, Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.CommonMark, Text.Pandoc.Readers.MediaWiki, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6fdd2a44c..689c0a784 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -308,7 +308,6 @@ convertWithOpts opts = do , readerColumns = optColumns opts , readerTabStop = optTabStop opts , readerIndentedCodeClasses = optIndentedCodeClasses opts - , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = optDefaultImageExtension opts , readerTrackChanges = optTrackChanges opts diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 60bc699ab..24186720c 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -64,6 +64,7 @@ data PandocError = PandocIOError String IOError | PandocTemplateError String | PandocAppError String | PandocEpubSubdirectoryError String + | PandocMacroLoop String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -107,6 +108,8 @@ handleError (Left e) = PandocAppError s -> err 1 s PandocEpubSubdirectoryError s -> err 31 $ "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + PandocMacroLoop s -> err 91 $ + "Loop encountered in expanding macro " ++ s err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index bd164635c..28459d4e6 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -318,6 +318,7 @@ getDefaultExtensions "epub2" = getDefaultExtensions "epub" getDefaultExtensions "epub3" = getDefaultExtensions "epub" getDefaultExtensions "latex" = extensionsFromList [Ext_smart, + Ext_latex_macros, Ext_auto_identifiers] getDefaultExtensions "context" = extensionsFromList [Ext_smart, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 6519f807c..d7e77010e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -61,7 +61,6 @@ data ReaderOptions = ReaderOptions{ , readerStandalone :: Bool -- ^ Standalone document with header , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations @@ -75,7 +74,6 @@ instance Default ReaderOptions , readerStandalone = False , readerColumns = 80 , readerTabStop = 4 - , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index eb5b37f40..f6263c782 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -109,8 +109,6 @@ module Text.Pandoc.Parsing ( anyLine, dash, nested, citeKey, - macro, - applyMacros', Parser, ParserT, F, @@ -130,6 +128,7 @@ module Text.Pandoc.Parsing ( anyLine, runParser, runParserT, parse, + tokenPrim, anyToken, getInput, setInput, @@ -178,13 +177,16 @@ module Text.Pandoc.Parsing ( anyLine, sourceLine, setSourceColumn, setSourceLine, - newPos + newPos, + Line, + Column ) where +import Data.Text (Text) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..), trimInlines) +import Text.Pandoc.Builder (Blocks, Inlines, HasMeta(..), trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) @@ -195,7 +197,7 @@ import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, import Data.List ( intercalate, transpose, isSuffixOf ) import Text.Pandoc.Shared import qualified Data.Map as M -import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, pMacroDefinition) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.HTML.TagSoup.Entity ( lookupEntity ) import Text.Pandoc.Asciify (toAsciiChar) import Data.Monoid ((<>)) @@ -994,7 +996,7 @@ data ParserState = ParserState stateIdentifiers :: Set.Set String, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateMacros :: [Macro], -- ^ List of macros defined so far + stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: @@ -1057,8 +1059,8 @@ instance HasIdentifierList ParserState where updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } class HasMacros st where - extractMacros :: st -> [Macro] - updateMacros :: ([Macro] -> [Macro]) -> st -> st + extractMacros :: st -> M.Map Text Macro + updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st instance HasMacros ParserState where extractMacros = stateMacros @@ -1112,7 +1114,7 @@ defaultParserState = stateIdentifiers = Set.empty, stateNextExample = 1, stateExamples = M.empty, - stateMacros = [], + stateMacros = M.empty, stateRstDefaultRole = "title-reference", stateRstCustomRoles = M.empty, stateCaption = Nothing, @@ -1341,33 +1343,6 @@ token :: (Stream s m t) -> ParsecT s st m a token pp pos match = tokenPrim pp (\_ t _ -> pos t) match --- --- Macros --- - --- | Parse a \newcommand or \newenviroment macro definition. -macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) - => ParserT [Char] st m Blocks -macro = do - apply <- getOption readerApplyMacros - (m, def') <- withRaw pMacroDefinition - if apply - then do - updateState $ \st -> updateMacros (m:) st - return mempty - else return $ rawBlock "latex" def' - --- | Apply current macros to string. -applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) - => String - -> ParserT [Char] st m String -applyMacros' target = do - apply <- getOption readerApplyMacros - if apply - then do macros <- extractMacros <$> getState - return $ applyMacros macros target - else return target - infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) @@ -1385,10 +1360,11 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st, Functor mf, Applicative mf, Monad mf) - => ParserT String st m (mf Blocks) + => ParserT [a] st m (mf Blocks) + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m (mf Blocks) -insertIncludedFile' blocks dirs f = do + -> ParserT [a] st m (mf Blocks) +insertIncludedFile' blocks totoks dirs f = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState @@ -1402,7 +1378,7 @@ insertIncludedFile' blocks dirs f = do report $ CouldNotLoadIncludeFile f oldPos return "" setPosition $ newPos f 1 1 - setInput contents + setInput $ totoks contents bs <- blocks setInput oldInput setPosition oldPos @@ -1412,11 +1388,12 @@ insertIncludedFile' blocks dirs f = do -- | Parse content of include file as blocks. Circular includes result in an -- @PandocParseError@. insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT String st m Blocks + => ParserT [a] st m Blocks + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m Blocks -insertIncludedFile blocks dirs f = - runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f + -> ParserT [a] st m Blocks +insertIncludedFile blocks totoks dirs f = + runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. @@ -1424,4 +1401,4 @@ insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) => ParserT String st m (Future st Blocks) -> [FilePath] -> FilePath -> ParserT String st m (Future st Blocks) -insertIncludedFileF = insertIncludedFile' +insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 090dc5fdb..d82e6a5dc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2017 John MacFarlane @@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Portability : portable Conversion of LaTeX to 'Pandoc' document. + -} module Text.Pandoc.Readers.LaTeX ( readLaTeX, + applyMacros, rawLaTeXInline, rawLaTeXBlock, - inlineCommand, + macro, + inlineCommand ) where import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (chr, isAlphaNum, isLetter, ord) -import Data.Text (Text, unpack) +import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M +import qualified Data.Set as Set import Data.Maybe (fromMaybe, maybeToList) import Safe (minimumDef) import System.FilePath (addExtension, replaceExtension, takeExtension) @@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional, - space, (<|>)) +import Text.Pandoc.Parsing hiding (many, optional, withRaw, + mathInline, mathDisplay, + space, (<|>), spaces, blankline) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..), + TokType(..), Line, Column) import Text.Pandoc.Walk +import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop)) + +-- for debugging: +-- import Text.Pandoc.Extensions (getDefaultExtensions) +-- import Text.Pandoc.Class (runIOorExplode, PandocIO) +-- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: PandocMonad m @@ -63,18 +81,18 @@ readLaTeX :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } - (unpack (crFilter ltx)) + parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" + (tokenize (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError e + Left e -> throwError $ PandocParsecError (T.unpack ltx) e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do bs <- blocks eof st <- getState - let meta = stateMeta st + let meta = sMeta st let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] @@ -88,177 +106,476 @@ parseLaTeX = do else id) doc' return $ Pandoc meta bs' -type LP m = ParserT String ParserState m - -anyControlSeq :: PandocMonad m => LP m String -anyControlSeq = do - char '\\' - next <- option '\n' anyChar - case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - -controlSeq :: PandocMonad m => String -> LP m String -controlSeq name = try $ do - char '\\' - case name of - "" -> mzero - [c] | not (isLetter c) -> string [c] - cs -> string cs <* notFollowedBy letter <* optional sp - return name - -dimenarg :: PandocMonad m => LP m String -dimenarg = try $ do - ch <- option "" $ string "=" - num <- many1 digit - dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - return $ ch ++ num ++ dim +-- testParser :: LP PandocIO a -> Text -> IO a +-- testParser p t = do +-- res <- runIOorExplode (runParserT p defaultLaTeXState{ +-- sOptions = def{ readerExtensions = +-- enableExtension Ext_raw_tex $ +-- getDefaultExtensions "latex" }} "source" (tokenize t)) +-- case res of +-- Left e -> error (show e) +-- Right r -> return r + +data LaTeXState = LaTeXState{ sOptions :: ReaderOptions + , sMeta :: Meta + , sQuoteContext :: QuoteContext + , sMacros :: M.Map Text Macro + , sContainers :: [String] + , sHeaders :: M.Map Inlines String + , sLogMessages :: [LogMessage] + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: Maybe Inlines + , sInListItem :: Bool + , sInTableCell :: Bool + } + deriving Show + +defaultLaTeXState :: LaTeXState +defaultLaTeXState = LaTeXState{ sOptions = def + , sMeta = nullMeta + , sQuoteContext = NoQuote + , sMacros = M.empty + , sContainers = [] + , sHeaders = M.empty + , sLogMessages = [] + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = Nothing + , sInListItem = False + , sInTableCell = False + } + +instance PandocMonad m => HasQuoteContext LaTeXState m where + getQuoteContext = sQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = sQuoteContext oldState + setState oldState { sQuoteContext = context } + result <- parser + newState <- getState + setState newState { sQuoteContext = oldQuoteContext } + return result + +instance HasLogMessages LaTeXState where + addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } + getLogMessages st = reverse $ sLogMessages st + +instance HasIdentifierList LaTeXState where + extractIdentifierList = sIdentifiers + updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } + +instance HasIncludeFiles LaTeXState where + getIncludeFiles = sContainers + addIncludeFile f s = s{ sContainers = f : sContainers s } + dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } + +instance HasHeaderMap LaTeXState where + extractHeaderMap = sHeaders + updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } + +instance HasMacros LaTeXState where + extractMacros st = sMacros st + updateMacros f st = st{ sMacros = f (sMacros st) } + +instance HasReaderOptions LaTeXState where + extractReaderOptions = sOptions + +instance HasMeta LaTeXState where + setMeta field val st = + st{ sMeta = setMeta field val $ sMeta st } + deleteMeta field st = + st{ sMeta = deleteMeta field $ sMeta st } + +instance Default LaTeXState where + def = defaultLaTeXState + +type LP m = ParserT [Tok] LaTeXState m + +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXBlock = do + lookAhead (try (char '\\' >> letter)) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- try $ + withRaw (environment <|> macroDef <|> blockCommand) + return raw + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right raw -> count (T.length (untokenize raw)) anyChar + +macro :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m Blocks +macro = do + guardEnabled Ext_latex_macros + lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *> + oneOfStrings ["command", "environment"]) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- withRaw $ try macroDef + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right (raw, st) -> do + updateState (updateMacros (const $ sMacros st)) + mempty <$ count (T.length (untokenize raw)) anyChar + +applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => String -> ParserT String s m String +applyMacros s = do + (guardEnabled Ext_latex_macros >> + do let retokenize = doMacros 0 *> (toksToString <$> getInput) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT retokenize lstate "math" (tokenize (T.pack s)) + case res of + Left e -> fail (show e) + Right s' -> return s') <|> return s + +rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXInline = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + count (T.length (untokenize raw)) anyChar + +inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (il, raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (il, raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + count (T.length (untokenize raw)) anyChar + return il + +tokenize :: Text -> [Tok] +tokenize = totoks (1, 1) + +totoks :: (Line, Column) -> Text -> [Tok] +totoks (lin,col) t = + case T.uncons t of + Nothing -> [] + Just (c, rest) + | c == '\n' -> + Tok (lin, col) Newline "\n" + : totoks (lin + 1,1) rest + | isSpaceOrTab c -> + let (sps, rest') = T.span isSpaceOrTab t + in Tok (lin, col) Spaces sps + : totoks (lin, col + T.length sps) rest' + | isAlphaNum c -> + let (ws, rest') = T.span isAlphaNum t + in Tok (lin, col) Word ws + : totoks (lin, col + T.length ws) rest' + | c == '%' -> + let (cs, rest') = T.break (== '\n') rest + in Tok (lin, col) Comment ("%" <> cs) + : totoks (lin, col + 1 + T.length cs) rest' + | c == '\\' -> + case T.uncons rest of + Nothing -> [Tok (lin, col) Symbol (T.singleton c)] + Just (d, rest') + | isLetter d -> + let (ws, rest'') = T.span isLetter rest + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (lin, + col + 1 + T.length ws + T.length ss) rest''' + | d == '\t' || d == '\n' -> + Tok (lin, col) Symbol ("\\") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d]) + : totoks (lin, col + 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok (lin, col) (Arg i) ("#" <> t1) + : totoks (lin, col + 1 + T.length t1) t2 + Nothing -> + Tok (lin, col) Symbol ("#") + : totoks (lin, col + 1) t2 + | c == '^' -> + case T.uncons rest of + Just ('^', rest') -> + case T.uncons rest' of + Just (d, rest'') + | isLowerHex d -> + case T.uncons rest'' of + Just (e, rest''') | isLowerHex e -> + Tok (lin, col) Esc2 (T.pack ['^','^',d,e]) + : totoks (lin, col + 4) rest''' + _ -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + | d < '\128' -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + _ -> [Tok (lin, col) Symbol ("^"), + Tok (lin, col + 1) Symbol ("^")] + _ -> Tok (lin, col) Symbol ("^") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest + + where isSpaceOrTab ' ' = True + isSpaceOrTab '\t' = True + isSpaceOrTab _ = False + +isLowerHex :: Char -> Bool +isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' + +untokenize :: [Tok] -> Text +untokenize = mconcat . map untoken + +untoken :: Tok -> Text +untoken (Tok _ _ t) = t + +satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok +satisfyTok f = + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res + where matcher t | f t = Just t + | otherwise = Nothing + updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos + updatePos spos _ (Tok (lin,col) _ _ : _) = + setSourceColumn (setSourceLine spos lin) col + updatePos spos _ [] = spos + +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + verbatimMode <- sVerbatimMode <$> getState + when (not verbatimMode) $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro numargs optarg newtoks) -> do + setInput ts + let getarg = spaces >> braced + args <- case optarg of + Nothing -> count numargs getarg + Just o -> + (:) <$> option o bracketedToks + <*> count (numargs - 1) getarg + let addTok (Tok _ (Arg i) _) acc | i > 0 + , i <= numargs = + map (setpos spos) (args !! (i - 1)) ++ acc + addTok t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr addTok ts' newtoks + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) + +setpos :: (Line, Column) -> Tok -> Tok +setpos spos (Tok _ tt txt) = Tok spos tt txt + +anyControlSeq :: PandocMonad m => LP m Tok +anyControlSeq = satisfyTok isCtrlSeq + where isCtrlSeq (Tok _ (CtrlSeq _) _) = True + isCtrlSeq _ = False + +anySymbol :: PandocMonad m => LP m Tok +anySymbol = satisfyTok isSym + where isSym (Tok _ Symbol _) = True + isSym _ = False + +spaces :: PandocMonad m => LP m () +spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +spaces1 :: PandocMonad m => LP m () +spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +tokTypeIn :: [TokType] -> Tok -> Bool +tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes + +controlSeq :: PandocMonad m => Text -> LP m Tok +controlSeq name = satisfyTok isNamed + where isNamed (Tok _ (CtrlSeq n) _) = n == name + isNamed _ = False + +symbol :: PandocMonad m => Char -> LP m Tok +symbol c = satisfyTok isc + where isc (Tok _ Symbol d) = case T.uncons d of + Just (c',_) -> c == c' + _ -> False + isc _ = False + +symbolIn :: PandocMonad m => [Char] -> LP m Tok +symbolIn cs = satisfyTok isInCs + where isInCs (Tok _ Symbol d) = case T.uncons d of + Just (c,_) -> c `elem` cs + _ -> False + isInCs _ = False sp :: PandocMonad m => LP m () sp = whitespace <|> endline whitespace :: PandocMonad m => LP m () -whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') +whitespace = () <$ satisfyTok isSpaceTok + where isSpaceTok (Tok _ Spaces _) = True + isSpaceTok _ = False -endline :: PandocMonad m => LP m () -endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) - -isLowerHex :: Char -> Bool -isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' +newlineTok :: PandocMonad m => LP m () +newlineTok = () <$ satisfyTok isNewlineTok -tildeEscape :: PandocMonad m => LP m Char -tildeEscape = try $ do - string "^^" - c <- satisfy (\x -> x >= '\0' && x <= '\128') - d <- if isLowerHex c - then option "" $ count 1 (satisfy isLowerHex) - else return "" - if null d - then case ord c of - x | x >= 64 && x <= 127 -> return $ chr (x - 64) - | otherwise -> return $ chr (x + 64) - else return $ chr $ read ('0':'x':c:d) +isNewlineTok :: Tok -> Bool +isNewlineTok (Tok _ Newline _) = True +isNewlineTok _ = False comment :: PandocMonad m => LP m () -comment = do - char '%' - skipMany (satisfy (/='\n')) - optional newline - return () +comment = () <$ satisfyTok isCommentTok + where isCommentTok (Tok _ Comment _) = True + isCommentTok _ = False + +anyTok :: PandocMonad m => LP m Tok +anyTok = satisfyTok (const True) -bgroup :: PandocMonad m => LP m () +endline :: PandocMonad m => LP m () +endline = try $ do + newlineTok + lookAhead anyTok + notFollowedBy blankline + +blankline :: PandocMonad m => LP m () +blankline = try $ skipMany whitespace *> newlineTok + +primEscape :: PandocMonad m => LP m Char +primEscape = do + Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) + case toktype of + Esc1 -> case T.uncons (T.drop 2 t) of + Just (c, _) + | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) + | otherwise -> return (chr (ord c + 64)) + Nothing -> fail "Empty content of Esc1" + Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Just x -> return (chr x) + Nothing -> fail $ "Could not read: " ++ T.unpack t + _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen + +bgroup :: PandocMonad m => LP m Tok bgroup = try $ do - skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) - () <$ char '{' - <|> () <$ controlSeq "bgroup" - <|> () <$ controlSeq "begingroup" + skipMany sp + symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" -egroup :: PandocMonad m => LP m () -egroup = () <$ char '}' - <|> () <$ controlSeq "egroup" - <|> () <$ controlSeq "endgroup" +egroup :: PandocMonad m => LP m Tok +egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup") -grouped :: PandocMonad m => Monoid a => LP m a -> LP m a +grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do bgroup -- first we check for an inner 'grouped', because -- {{a,b}} should be parsed the same as {a,b} - try (grouped parser <* egroup) - <|> (mconcat <$> manyTill parser egroup) - -braced :: PandocMonad m => LP m String -braced = grouped chunk - where chunk = - many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) - <|> try (string "\\}") - <|> try (string "\\{") - <|> try (string "\\\\") - <|> ((\x -> "{" ++ x ++ "}") <$> braced) - <|> count 1 anyChar + try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' 1 + where braced' (n :: Int) = + handleEgroup n <|> handleBgroup n <|> handleOther n + handleEgroup n = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' (n - 1) + handleBgroup n = do + t <- bgroup + (t:) <$> braced' (n + 1) + handleOther n = do + t <- anyTok + (t:) <$> braced' n bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a -bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) - -mathDisplay :: PandocMonad m => LP m String -> LP m Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) - -mathInline :: PandocMonad m => LP m String -> LP m Inlines -mathInline p = math <$> (try p >>= applyMacros') - -mathChars :: PandocMonad m => LP m String -mathChars = - concat <$> many (escapedChar - <|> (snd <$> withRaw braced) - <|> many1 (satisfy isOrdChar)) - where escapedChar = try $ do char '\\' - c <- anyChar - return ['\\',c] - isOrdChar '$' = False - isOrdChar '{' = False - isOrdChar '}' = False - isOrdChar '\\' = False - isOrdChar _ = True - -quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines -quoted' f starter ender = do - startchs <- starter - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then do - ils <- many (notFollowedBy ender >> inline) - (ender >> return (f (mconcat ils))) <|> - (<> mconcat ils) <$> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) - else lit startchs +bracketed parser = try $ do + symbol '[' + mconcat <$> manyTill parser (symbol ']') -doubleQuote :: PandocMonad m => LP m Inlines -doubleQuote = do - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +dimenarg :: PandocMonad m => LP m Text +dimenarg = try $ do + ch <- option False $ True <$ symbol '=' + Tok _ _ s <- satisfyTok isWordTok + guard $ (T.take 2 (T.reverse s)) `elem` + ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + let num = T.take (T.length s - 2) s + guard $ T.length num > 0 + guard $ T.all isDigit num + return $ T.pack ['=' | ch] <> s -singleQuote :: PandocMonad m => LP m Inlines -singleQuote = do - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - else str <$> many1 (oneOf "`\'‘’") +-- inline elements: -inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> inlineText - <|> inlineCommand - <|> inlineEnvironment - <|> inlineGroup - <|> (char '-' *> option (str "-") - (char '-' *> option (str "–") (str "—" <$ char '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (string "''")) - <|> (str "”" <$ char '”') - <|> (str "’" <$ char '\'') - <|> (str "’" <$ char '’') - <|> (str "\160" <$ char '~') - <|> mathDisplay (string "$$" *> mathChars <* string "$$") - <|> mathInline (char '$' *> mathChars <* char '$') - <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str . (:[]) <$> tildeEscape) - <|> (do res <- oneOf "#&~^'`\"[]" - pos <- getPosition - report $ ParsingUnescaped [res] pos - return $ str [res]) +word :: PandocMonad m => LP m Inlines +word = (str . T.unpack . untoken) <$> satisfyTok isWordTok -inlines :: PandocMonad m => LP m Inlines -inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) +regularSymbol :: PandocMonad m => LP m Inlines +regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol + where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t + isRegularSymbol _ = False + isSpecial c = c `Set.member` specialChars + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" + +isWordTok :: Tok -> Bool +isWordTok (Tok _ Word _) = True +isWordTok _ = False inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do @@ -269,233 +586,564 @@ inlineGroup = do -- we need the span so we can detitlecase bibtex entries; -- we need to know when something is {C}apitalized -block :: PandocMonad m => LP m Blocks -block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) - <|> environment - <|> include - <|> macro - <|> blockCommand - <|> paragraph - <|> grouped block - -blocks :: PandocMonad m => LP m Blocks -blocks = mconcat <$> many block +doLHSverb :: PandocMonad m => LP m Inlines +doLHSverb = + (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -getRawCommand :: PandocMonad m => String -> LP m String -getRawCommand name' = do - rawargs <- withRaw (many (try (optional sp *> opt)) *> - option "" (try (optional sp *> dimenarg)) *> - many braced) - return $ '\\' : name' ++ snd rawargs +mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines +mkImage options src = do + let replaceTextwidth (k,v) = + case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth + $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let attr = ("",[], kvs) + let alt = str "image" + case takeExtension src of + "" -> do + defaultExt <- getOption readerDefaultImageExtension + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v -lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines +dosiunitx = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- tok + let emptyOr160 "" = "" + emptyOr160 _ = "\160" + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] -blockCommand :: PandocMonad m => LP m Blocks -blockCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*" <* optional sp) - let name' = name ++ star - let raw = do - rawcommand <- getRawCommand name' - transformed <- applyMacros' rawcommand - guard $ transformed /= rawcommand - notFollowedBy $ parseFromString' inlines transformed - parseFromString' blocks transformed - lookupListDefault raw [name',name] blockCommands +lit :: String -> LP m Inlines +lit = pure . str -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" +removeDoubleQuotes :: Text -> Text +removeDoubleQuotes t = + maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" --- eat an optional argument and one or more arguments in braces -ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines) -ignoreInlines name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawInline "latex" rawCommand) - doraw <|> ignore rawCommand +doubleQuote :: PandocMonad m => LP m Inlines +doubleQuote = do + quoted' doubleQuoted (try $ count 2 $ symbol '`') + (void $ try $ count 2 $ symbol '\'') + <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`']) + (void $ try $ sequence [symbol '"', symbol '\'']) + <|> quoted' doubleQuoted ((:[]) <$> symbol '"') + (void $ symbol '"') -guardRaw :: PandocMonad m => LP m () -guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex +singleQuote :: PandocMonad m => LP m Inlines +singleQuote = do + quoted' singleQuoted ((:[]) <$> symbol '`') + (try $ symbol '\'' >> + notFollowedBy (satisfyTok startsWithLetter)) + <|> quoted' singleQuoted ((:[]) <$> symbol '‘') + (try $ symbol '’' >> + notFollowedBy (satisfyTok startsWithLetter)) + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + +quoted' :: PandocMonad m + => (Inlines -> Inlines) + -> LP m [Tok] + -> LP m () + -> LP m Inlines +quoted' f starter ender = do + startchs <- (T.unpack . untokenize) <$> starter + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions + if smart + then do + ils <- many (notFollowedBy ender >> inline) + (ender >> return (f (mconcat ils))) <|> + (<> mconcat ils) <$> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + cs -> cs) + else lit startchs -optargs :: PandocMonad m => LP m String -optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced)) +enquote :: PandocMonad m => LP m Inlines +enquote = do + skipopts + quoteContext <- sQuoteContext <$> getState + if quoteContext == InDoubleQuote + then singleQuoted <$> withQuoteContext InSingleQuote tok + else doubleQuoted <$> withQuoteContext InDoubleQuote tok -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a -ignore raw = do - pos <- getPosition - report $ SkippedContent raw pos - return mempty +doverb :: PandocMonad m => LP m Inlines +doverb = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + (code . T.unpack . untokenize) <$> + manyTill (verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok) + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar) + : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp + return $ Tok (lin, col) toktype t1 -ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) -ignoreBlocks name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawBlock "latex" rawCommand) - doraw <|> ignore rawCommand +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do + options <- option [] keyvals + let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + (codeWith ("",classes,[]) . T.unpack . untokenize) <$> + manyTill (verbTok stopchar) (symbol stopchar) -blockCommands :: PandocMonad m => M.Map String (LP m Blocks) -blockCommands = M.fromList $ - [ ("par", mempty <$ skipopts) - , ("parbox", braced >> grouped blocks) - , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) - , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) - , ("author", mempty <$ (skipopts *> authors)) - -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) - , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - -- Koma-script metadata commands - , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) - -- sectioning - , ("part", section nullAttr (-1)) - , ("part*", section nullAttr (-1)) - , ("chapter", section nullAttr 0) - , ("chapter*", section ("",["unnumbered"],[]) 0) - , ("section", section nullAttr 1) - , ("section*", section ("",["unnumbered"],[]) 1) - , ("subsection", section nullAttr 2) - , ("subsection*", section ("",["unnumbered"],[]) 2) - , ("subsubsection", section nullAttr 3) - , ("subsubsection*", section ("",["unnumbered"],[]) 3) - , ("paragraph", section nullAttr 4) - , ("paragraph*", section ("",["unnumbered"],[]) 4) - , ("subparagraph", section nullAttr 5) - , ("subparagraph*", section ("",["unnumbered"],[]) 5) - -- beamer slides - , ("frametitle", section nullAttr 3) - , ("framesubtitle", section nullAttr 4) - -- letters - , ("opening", (para . trimInlines) <$> (skipopts *> tok)) - , ("closing", skipopts *> closing) - -- - , ("hrule", pure horizontalRule) - , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> looseItem) - , ("documentclass", skipopts *> braced *> preamble) - , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> setCaption) - , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - -- includes - , ("lstinputlisting", inputListing) - , ("graphicspath", graphicsPath) - -- hyperlink - , ("hypertarget", braced >> grouped block) - ] ++ map ignoreBlocks - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" - , "bibliographystyle" - , "maketitle", "makeindex", "makeglossary" - , "addcontentsline", "addtocontents", "addtocounter" - -- \ignore{} is used conventionally in literate haskell for definitions - -- that are to be processed by the compiler but not printed. - , "ignore" - , "hyperdef" - , "markboth", "markright", "markleft" - , "hspace", "vspace" - , "newpage" - , "clearpage" - , "pagebreak" - ] +keyval :: PandocMonad m => LP m (String, String) +keyval = try $ do + Tok _ Word key <- satisfyTok isWordTok + let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"] + isSpecSym _ = False + val <- option [] $ do + symbol '=' + braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym + <|> anyControlSeq)) + optional sp + optional (symbol ',') + optional sp + return (T.unpack key, T.unpack . untokenize $ val) -graphicsPath :: PandocMonad m => LP m Blocks -graphicsPath = do - ps <- bgroup *> (manyTill braced egroup) - getResourcePath >>= setResourcePath . (++ ps) - return mempty +keyvals :: PandocMonad m => LP m [(String, String)] +keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ stateMeta = addMetaField field val $ stateMeta st } +accent :: (Char -> String) -> Inlines -> LP m Inlines +accent f ils = + case toList ils of + (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) + [] -> mzero + _ -> return ils -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +grave :: Char -> String +grave 'A' = "À" +grave 'E' = "È" +grave 'I' = "Ì" +grave 'O' = "Ò" +grave 'U' = "Ù" +grave 'a' = "à" +grave 'e' = "è" +grave 'i' = "ì" +grave 'o' = "ò" +grave 'u' = "ù" +grave c = [c] -setCaption :: PandocMonad m => LP m Blocks -setCaption = do - ils <- tok - mblabel <- option Nothing $ - try $ spaces' >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("data-label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ stateCaption = Just ils' } - return mempty +acute :: Char -> String +acute 'A' = "Á" +acute 'E' = "É" +acute 'I' = "Í" +acute 'O' = "Ó" +acute 'U' = "Ú" +acute 'Y' = "Ý" +acute 'a' = "á" +acute 'e' = "é" +acute 'i' = "í" +acute 'o' = "ó" +acute 'u' = "ú" +acute 'y' = "ý" +acute 'C' = "Ć" +acute 'c' = "ć" +acute 'L' = "Ĺ" +acute 'l' = "ĺ" +acute 'N' = "Ń" +acute 'n' = "ń" +acute 'R' = "Ŕ" +acute 'r' = "ŕ" +acute 'S' = "Ś" +acute 's' = "ś" +acute 'Z' = "Ź" +acute 'z' = "ź" +acute c = [c] -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ stateCaption = Nothing } +circ :: Char -> String +circ 'A' = "Â" +circ 'E' = "Ê" +circ 'I' = "Î" +circ 'O' = "Ô" +circ 'U' = "Û" +circ 'a' = "â" +circ 'e' = "ê" +circ 'i' = "î" +circ 'o' = "ô" +circ 'u' = "û" +circ 'C' = "Ĉ" +circ 'c' = "ĉ" +circ 'G' = "Ĝ" +circ 'g' = "ĝ" +circ 'H' = "Ĥ" +circ 'h' = "ĥ" +circ 'J' = "Ĵ" +circ 'j' = "ĵ" +circ 'S' = "Ŝ" +circ 's' = "ŝ" +circ 'W' = "Ŵ" +circ 'w' = "ŵ" +circ 'Y' = "Ŷ" +circ 'y' = "ŷ" +circ c = [c] -authors :: PandocMonad m => LP m () -authors = try $ do - bgroup - let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> - (inline <|> mempty <$ blockCommand)) - -- skip e.g. \vspace{10pt} - auths <- sepBy oneAuthor (controlSeq "and") - egroup - addMeta "author" (map trimInlines auths) +tilde :: Char -> String +tilde 'A' = "Ã" +tilde 'a' = "ã" +tilde 'O' = "Õ" +tilde 'o' = "õ" +tilde 'I' = "Ĩ" +tilde 'i' = "ĩ" +tilde 'U' = "Ũ" +tilde 'u' = "ũ" +tilde 'N' = "Ñ" +tilde 'n' = "ñ" +tilde c = [c] -section :: PandocMonad m => Attr -> Int -> LP m Blocks -section (ident, classes, kvs) lvl = do - skipopts - contents <- grouped inline - lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) - attr' <- registerHeader (lab, classes, kvs) contents - return $ headerWith attr' lvl contents +umlaut :: Char -> String +umlaut 'A' = "Ä" +umlaut 'E' = "Ë" +umlaut 'I' = "Ï" +umlaut 'O' = "Ö" +umlaut 'U' = "Ü" +umlaut 'a' = "ä" +umlaut 'e' = "ë" +umlaut 'i' = "ï" +umlaut 'o' = "ö" +umlaut 'u' = "ü" +umlaut c = [c] -inlineCommand :: PandocMonad m => LP m Inlines -inlineCommand = try $ do - (name, raw') <- withRaw anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*") - let name' = name ++ star +hungarumlaut :: Char -> String +hungarumlaut 'A' = "A̋" +hungarumlaut 'E' = "E̋" +hungarumlaut 'I' = "I̋" +hungarumlaut 'O' = "Ő" +hungarumlaut 'U' = "Ű" +hungarumlaut 'Y' = "ӳ" +hungarumlaut 'a' = "a̋" +hungarumlaut 'e' = "e̋" +hungarumlaut 'i' = "i̋" +hungarumlaut 'o' = "ő" +hungarumlaut 'u' = "ű" +hungarumlaut 'y' = "ӳ" +hungarumlaut c = [c] + +dot :: Char -> String +dot 'C' = "Ċ" +dot 'c' = "ċ" +dot 'E' = "Ė" +dot 'e' = "ė" +dot 'G' = "Ġ" +dot 'g' = "ġ" +dot 'I' = "İ" +dot 'Z' = "Ż" +dot 'z' = "ż" +dot c = [c] + +macron :: Char -> String +macron 'A' = "Ā" +macron 'E' = "Ē" +macron 'I' = "Ī" +macron 'O' = "Ō" +macron 'U' = "Ū" +macron 'a' = "ā" +macron 'e' = "ē" +macron 'i' = "ī" +macron 'o' = "ō" +macron 'u' = "ū" +macron c = [c] + +cedilla :: Char -> String +cedilla 'c' = "ç" +cedilla 'C' = "Ç" +cedilla 's' = "ş" +cedilla 'S' = "Ş" +cedilla 't' = "ţ" +cedilla 'T' = "Ţ" +cedilla 'e' = "ȩ" +cedilla 'E' = "Ȩ" +cedilla 'h' = "ḩ" +cedilla 'H' = "Ḩ" +cedilla 'o' = "o̧" +cedilla 'O' = "O̧" +cedilla c = [c] + +hacek :: Char -> String +hacek 'A' = "Ǎ" +hacek 'a' = "ǎ" +hacek 'C' = "Č" +hacek 'c' = "č" +hacek 'D' = "Ď" +hacek 'd' = "ď" +hacek 'E' = "Ě" +hacek 'e' = "ě" +hacek 'G' = "Ǧ" +hacek 'g' = "ǧ" +hacek 'H' = "Ȟ" +hacek 'h' = "ȟ" +hacek 'I' = "Ǐ" +hacek 'i' = "ǐ" +hacek 'j' = "ǰ" +hacek 'K' = "Ǩ" +hacek 'k' = "ǩ" +hacek 'L' = "Ľ" +hacek 'l' = "ľ" +hacek 'N' = "Ň" +hacek 'n' = "ň" +hacek 'O' = "Ǒ" +hacek 'o' = "ǒ" +hacek 'R' = "Ř" +hacek 'r' = "ř" +hacek 'S' = "Š" +hacek 's' = "š" +hacek 'T' = "Ť" +hacek 't' = "ť" +hacek 'U' = "Ǔ" +hacek 'u' = "ǔ" +hacek 'Z' = "Ž" +hacek 'z' = "ž" +hacek c = [c] + +breve :: Char -> String +breve 'A' = "Ă" +breve 'a' = "ă" +breve 'E' = "Ĕ" +breve 'e' = "ĕ" +breve 'G' = "Ğ" +breve 'g' = "ğ" +breve 'I' = "Ĭ" +breve 'i' = "ĭ" +breve 'O' = "Ŏ" +breve 'o' = "ŏ" +breve 'U' = "Ŭ" +breve 'u' = "ŭ" +breve c = [c] + +toksToString :: [Tok] -> String +toksToString = T.unpack . untokenize + +mathDisplay :: String -> Inlines +mathDisplay = displayMath . trim + +mathInline :: String -> Inlines +mathInline = math . trim + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + contents <- trim . toksToString <$> + many (notFollowedBy (symbol '$') >> anyTok) + if display + then do + mathDisplay contents <$ try (symbol '$' >> symbol '$') + <|> (guard (null contents) >> return (mathInline "")) + else mathInline contents <$ (symbol '$') + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: PandocMonad m => LP m [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> (manyTill citationLabel egroup) + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + +citationLabel :: PandocMonad m => LP m String +citationLabel = do + optional sp + toksToString <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* optional sp + <* optional (symbol ',') + <* optional sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char] + +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw)) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines +complexNatbibCitation mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` (symbol ';') + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ toksToString raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + +inlineCommand' :: PandocMonad m => LP m Inlines +inlineCommand' = try $ do + Tok _ (CtrlSeq name) cmd <- anyControlSeq + guard $ name /= "begin" && name /= "end" + (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] -- check non-starred as fallback let raw = do - guard $ not (isBlockCommand name) - rawargs <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = raw' ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - exts <- getOption readerExtensions - if transformed /= rawcommand - then parseFromString' inlines transformed - else if extensionEnabled Ext_raw_tex exts - then return $ rawInline "latex" rawcommand - else ignore rawcommand - (lookupListDefault raw [name',name] inlineCommands <* - optional (try (string "{}"))) - -rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines -rawInlineOr name' fallback = do - parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions - if parseRaw - then rawInline "latex" <$> getRawCommand name' - else fallback + guard $ isInlineCommand name || not (isBlockCommand name) + (_, rawargs) <- withRaw + (skipangles *> skipopts *> option "" dimenarg *> many braced) + let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs) + (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) + <|> ignore rawcommand + lookupListDefault raw names inlineCommands + +tok :: PandocMonad m => LP m Inlines +tok = grouped inline <|> inlineCommand' <|> singleChar + where singleChar = try $ do + Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t + if T.length t > 1 + then do + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ (Tok (lin, col + 1) toktype t2) : inp + return $ str (T.unpack t1) + else return $ str (T.unpack t) + +opt :: PandocMonad m => LP m Inlines +opt = bracketed inline + +rawopt :: PandocMonad m => LP m Text +rawopt = do + symbol '[' + inner <- untokenize <$> manyTill anyTok (symbol ']') + optional sp + return $ "[" <> inner <> "]" + +skipopts :: PandocMonad m => LP m () +skipopts = skipMany rawopt + +-- opts in angle brackets are used in beamer +rawangle :: PandocMonad m => LP m () +rawangle = try $ do + symbol '<' + () <$ manyTill anyTok (symbol '>') + +skipangles :: PandocMonad m => LP m () +skipangles = skipMany rawangle + +ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore raw = do + pos <- getPosition + report $ SkippedContent raw pos + return mempty + +withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) +withRaw parser = do + inp <- getInput + result <- parser + nxt <- option (Tok (0,0) Word "") (lookAhead anyTok) + let raw = takeWhile (/= nxt) inp + return (result, raw) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +unescapeURL :: String -> String +unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) +unescapeURL (x:xs) = x:unescapeURL xs +unescapeURL [] = "" + +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ + "\\end{" ++ T.unpack y ++ "}" -isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) +mathEnv :: PandocMonad m => Text -> LP m String +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ T.unpack $ untokenize res +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name inlineEnvironments -inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) -inlineEnvironments = M.fromList - [ ("displaymath", mathEnvWith id Nothing "displaymath") +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") , ("math", math <$> mathEnv "math") , ("equation", mathEnvWith id Nothing "equation") , ("equation*", mathEnvWith id Nothing "equation*") @@ -511,7 +1159,7 @@ inlineEnvironments = M.fromList , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") ] -inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) +inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineCommands = M.fromList $ [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) @@ -540,9 +1188,9 @@ inlineCommands = M.fromList $ , ("textgreek", tok) , ("sep", lit ",") , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty - , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) - , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline braced) + , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . toksToString <$> braced) , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -592,7 +1240,10 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional (bracketed inline) + spaces)) , (",", lit "\8198") , ("@", pure mempty) , (" ", lit "\160") @@ -607,13 +1258,14 @@ inlineCommands = M.fromList $ , ("verb", doverb) , ("lstinline", dolstinline) , ("Verb", doverb) - , ("url", (unescapeURL <$> braced) >>= \url -> - pure (link url "" (str url))) - , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> - tok >>= \lab -> - pure (link url "" lab)) + , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + pure (link url "" (str url))) + , ("href", (unescapeURL . toksToString <$> + braced <* optional sp) >>= \url -> + tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . removeDoubleQuotes <$> braced + src <- unescapeURL . T.unpack . + removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" NormalCitation False) @@ -686,362 +1338,456 @@ inlineCommands = M.fromList $ -- fontawesome , ("faCheck", lit "\10003") , ("faClose", lit "\10007") - ] ++ map ignoreInlines - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks: + ] + +ttfamily :: PandocMonad m => LP m Inlines +ttfamily = (code . stringify . toList) <$> tok + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' + else fallback + +getRawCommand :: PandocMonad m => Text -> LP m String +getRawCommand txt = do + (_, rawargs) <- withRaw + (many (try (optional sp *> opt)) *> + option "" (try (optional sp *> dimenarg)) *> + many braced) + return $ T.unpack (txt <> untokenize rawargs) + +isBlockCommand :: Text -> Bool +isBlockCommand s = + s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks)) + || s `Set.member` treatAsBlock + +treatAsBlock :: Set.Set Text +treatAsBlock = Set.fromList + [ "newcommand", "renewcommand" + , "newenvironment", "renewenvironment" + , "providecommand", "provideenvironment" + -- newcommand, etc. should be parsed by macroDef, but we need this + -- here so these aren't parsed as inline commands to ignore + , "special", "pdfannot", "pdfstringdef" + , "bibliographystyle" + , "maketitle", "makeindex", "makeglossary" + , "addcontentsline", "addtocontents", "addtocounter" + -- \ignore{} is used conventionally in literate haskell for definitions + -- that are to be processed by the compiler but not printed. + , "ignore" + , "hyperdef" + , "markboth", "markright", "markleft" + , "hspace", "vspace" + , "newpage" + , "clearpage" + , "pagebreak" + ] + +isInlineCommand :: Text -> Bool +isInlineCommand s = + s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines)) + || s `Set.member` treatAsInline + +treatAsInline :: Set.Set Text +treatAsInline = Set.fromList [ "index" , "hspace" , "vspace" + , "noindent" , "newpage" , "clearpage" , "pagebreak" ] -ttfamily :: PandocMonad m => LP m Inlines -ttfamily = (code . stringify . toList) <$> tok +lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where lookupList l m = msum $ map (`M.lookup` m) l -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do - let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") - _ -> (k, v) - let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options - let attr = ("",[], kvs) - let alt = str "image" - case takeExtension src of - "" -> do - defaultExt <- getOption readerDefaultImageExtension - return $ imageWith attr (addExtension src defaultExt) "" alt - _ -> return $ imageWith attr src "" alt +inline :: PandocMonad m => LP m Inlines +inline = (mempty <$ comment) + <|> (space <$ whitespace) + <|> (softbreak <$ endline) + <|> word + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + <|> (symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-'))) + <|> doubleQuote + <|> singleQuote + <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) + <|> (str "”" <$ symbol '”') + <|> (str "’" <$ symbol '\'') + <|> (str "’" <$ symbol '’') + <|> (str "\160" <$ symbol '~') + <|> dollarsMath + <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) + <|> (str . (:[]) <$> primEscape) + <|> regularSymbol + <|> (do res <- symbolIn "#^'`\"[]" + pos <- getPosition + let s = T.unpack (untoken res) + report $ ParsingUnescaped s pos + return $ str s) -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." +inlines :: PandocMonad m => LP m Inlines +inlines = mconcat <$> many inline -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" - -enquote :: PandocMonad m => LP m Inlines -enquote = do - skipopts - context <- stateQuoteContext <$> getState - if context == InDoubleQuote - then singleQuoted <$> withQuoteContext InSingleQuote tok - else doubleQuoted <$> withQuoteContext InDoubleQuote tok - -doverb :: PandocMonad m => LP m Inlines -doverb = do - marker <- anyChar - code <$> manyTill (satisfy (/='\n')) (char marker) - -dolstinline :: PandocMonad m => LP m Inlines -dolstinline = do - options <- option [] keyvals - let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage - marker <- char '{' <|> anyChar - codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker) - -doLHSverb :: PandocMonad m => LP m Inlines -doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') - --- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" -dosiunitx :: PandocMonad m => LP m Inlines -dosiunitx = do - skipopts - value <- tok - valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']')) - unit <- tok - let emptyOr160 "" = "" - emptyOr160 _ = "\160" - return . mconcat $ [valueprefix, - emptyOr160 valueprefix, - value, - emptyOr160 unit, - unit] - -lit :: String -> LP m Inlines -lit = pure . str - -accent :: (Char -> String) -> Inlines -> LP m Inlines -accent f ils = - case toList ils of - (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) - [] -> mzero - _ -> return ils - -grave :: Char -> String -grave 'A' = "À" -grave 'E' = "È" -grave 'I' = "Ì" -grave 'O' = "Ò" -grave 'U' = "Ù" -grave 'a' = "à" -grave 'e' = "è" -grave 'i' = "ì" -grave 'o' = "ò" -grave 'u' = "ù" -grave c = [c] - -acute :: Char -> String -acute 'A' = "Á" -acute 'E' = "É" -acute 'I' = "Í" -acute 'O' = "Ó" -acute 'U' = "Ú" -acute 'Y' = "Ý" -acute 'a' = "á" -acute 'e' = "é" -acute 'i' = "í" -acute 'o' = "ó" -acute 'u' = "ú" -acute 'y' = "ý" -acute 'C' = "Ć" -acute 'c' = "ć" -acute 'L' = "Ĺ" -acute 'l' = "ĺ" -acute 'N' = "Ń" -acute 'n' = "ń" -acute 'R' = "Ŕ" -acute 'r' = "ŕ" -acute 'S' = "Ś" -acute 's' = "ś" -acute 'Z' = "Ź" -acute 'z' = "ź" -acute c = [c] - -circ :: Char -> String -circ 'A' = "Â" -circ 'E' = "Ê" -circ 'I' = "Î" -circ 'O' = "Ô" -circ 'U' = "Û" -circ 'a' = "â" -circ 'e' = "ê" -circ 'i' = "î" -circ 'o' = "ô" -circ 'u' = "û" -circ 'C' = "Ĉ" -circ 'c' = "ĉ" -circ 'G' = "Ĝ" -circ 'g' = "ĝ" -circ 'H' = "Ĥ" -circ 'h' = "ĥ" -circ 'J' = "Ĵ" -circ 'j' = "ĵ" -circ 'S' = "Ŝ" -circ 's' = "ŝ" -circ 'W' = "Ŵ" -circ 'w' = "ŵ" -circ 'Y' = "Ŷ" -circ 'y' = "ŷ" -circ c = [c] +-- block elements: -tilde :: Char -> String -tilde 'A' = "Ã" -tilde 'a' = "ã" -tilde 'O' = "Õ" -tilde 'o' = "õ" -tilde 'I' = "Ĩ" -tilde 'i' = "ĩ" -tilde 'U' = "Ũ" -tilde 'u' = "ũ" -tilde 'N' = "Ñ" -tilde 'n' = "ñ" -tilde c = [c] +begin_ :: PandocMonad m => Text -> LP m () +begin_ t = (try $ do + controlSeq "begin" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard (t == txt)) ("\\begin{" ++ T.unpack t ++ "}") -umlaut :: Char -> String -umlaut 'A' = "Ä" -umlaut 'E' = "Ë" -umlaut 'I' = "Ï" -umlaut 'O' = "Ö" -umlaut 'U' = "Ü" -umlaut 'a' = "ä" -umlaut 'e' = "ë" -umlaut 'i' = "ï" -umlaut 'o' = "ö" -umlaut 'u' = "ü" -umlaut c = [c] +end_ :: PandocMonad m => Text -> LP m () +end_ t = (try $ do + controlSeq "end" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard $ t == txt) ("\\end{" ++ T.unpack t ++ "}") -hungarumlaut :: Char -> String -hungarumlaut 'A' = "A̋" -hungarumlaut 'E' = "E̋" -hungarumlaut 'I' = "I̋" -hungarumlaut 'O' = "Ő" -hungarumlaut 'U' = "Ű" -hungarumlaut 'Y' = "ӳ" -hungarumlaut 'a' = "a̋" -hungarumlaut 'e' = "e̋" -hungarumlaut 'i' = "i̋" -hungarumlaut 'o' = "ő" -hungarumlaut 'u' = "ű" -hungarumlaut 'y' = "ӳ" -hungarumlaut c = [c] +preamble :: PandocMonad m => LP m Blocks +preamble = mempty <$ many preambleBlock + where preambleBlock = spaces1 + <|> void include + <|> void macroDef + <|> void blockCommand + <|> void braced + <|> (notFollowedBy (begin_ "document") >> void anyTok) -dot :: Char -> String -dot 'C' = "Ċ" -dot 'c' = "ċ" -dot 'E' = "Ė" -dot 'e' = "ė" -dot 'G' = "Ġ" -dot 'g' = "ġ" -dot 'I' = "İ" -dot 'Z' = "Ż" -dot 'z' = "ż" -dot c = [c] +paragraph :: PandocMonad m => LP m Blocks +paragraph = do + x <- trimInlines . mconcat <$> many1 inline + if x == mempty + then return mempty + else return $ para x -macron :: Char -> String -macron 'A' = "Ā" -macron 'E' = "Ē" -macron 'I' = "Ī" -macron 'O' = "Ō" -macron 'U' = "Ū" -macron 'a' = "ā" -macron 'e' = "ē" -macron 'i' = "ī" -macron 'o' = "ō" -macron 'u' = "ū" -macron c = [c] +include :: PandocMonad m => LP m Blocks +include = do + (Tok _ (CtrlSeq name) _) <- + controlSeq "include" <|> controlSeq "input" <|> + controlSeq "subfile" <|> controlSeq "usepackage" + skipMany $ bracketed inline -- skip options + fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced + let fs' = if name == "usepackage" + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs' -cedilla :: Char -> String -cedilla 'c' = "ç" -cedilla 'C' = "Ç" -cedilla 's' = "ş" -cedilla 'S' = "Ş" -cedilla 't' = "ţ" -cedilla 'T' = "Ţ" -cedilla 'e' = "ȩ" -cedilla 'E' = "Ȩ" -cedilla 'h' = "ḩ" -cedilla 'H' = "Ḩ" -cedilla 'o' = "o̧" -cedilla 'O' = "O̧" -cedilla c = [c] +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp -hacek :: Char -> String -hacek 'A' = "Ǎ" -hacek 'a' = "ǎ" -hacek 'C' = "Č" -hacek 'c' = "č" -hacek 'D' = "Ď" -hacek 'd' = "ď" -hacek 'E' = "Ě" -hacek 'e' = "ě" -hacek 'G' = "Ǧ" -hacek 'g' = "ǧ" -hacek 'H' = "Ȟ" -hacek 'h' = "ȟ" -hacek 'I' = "Ǐ" -hacek 'i' = "ǐ" -hacek 'j' = "ǰ" -hacek 'K' = "Ǩ" -hacek 'k' = "ǩ" -hacek 'L' = "Ľ" -hacek 'l' = "ľ" -hacek 'N' = "Ň" -hacek 'n' = "ň" -hacek 'O' = "Ǒ" -hacek 'o' = "ǒ" -hacek 'R' = "Ř" -hacek 'r' = "ř" -hacek 'S' = "Š" -hacek 's' = "š" -hacek 'T' = "Ť" -hacek 't' = "ť" -hacek 'U' = "Ǔ" -hacek 'u' = "ǔ" -hacek 'Z' = "Ž" -hacek 'z' = "ž" -hacek c = [c] +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } -breve :: Char -> String -breve 'A' = "Ă" -breve 'a' = "ă" -breve 'E' = "Ĕ" -breve 'e' = "ĕ" -breve 'G' = "Ğ" -breve 'g' = "ğ" -breve 'I' = "Ĭ" -breve 'i' = "ĭ" -breve 'O' = "Ŏ" -breve 'o' = "ŏ" -breve 'U' = "Ŭ" -breve 'u' = "ŭ" -breve c = [c] +authors :: PandocMonad m => LP m () +authors = try $ do + bgroup + let oneAuthor = mconcat <$> + many1 (notFollowedBy' (controlSeq "and") >> + (inline <|> mempty <$ blockCommand)) + -- skip e.g. \vspace{10pt} + auths <- sepBy oneAuthor (controlSeq "and") + egroup + addMeta "author" (map trimInlines auths) -tok :: PandocMonad m => LP m Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar +macroDef :: PandocMonad m => LP m Blocks +macroDef = do + guardEnabled Ext_latex_macros + mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + where commandDef = do + (name, macro') <- newcommand + updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) } + environmentDef = do + (name, macro1, macro2) <- newenvironment + updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" + optional $ symbol '*' + Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- braced + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro numargs optarg contents) + +newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + optional $ symbol '*' + symbol '{' + spaces + Tok _ Word name <- satisfyTok isWordTok + spaces + symbol '}' + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + startcontents <- braced + spaces + endcontents <- braced + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro numargs optarg startcontents, + Macro 0 Nothing endcontents) + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + manyTill anyTok (symbol ']') + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead (T.unpack ds) of + Just i -> return i + _ -> return 0 -opt :: PandocMonad m => LP m Inlines -opt = bracketed inline +setCaption :: PandocMonad m => LP m Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ sCaption = Just ils' } + return mempty -rawopt :: PandocMonad m => LP m String -rawopt = do - contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> - try (string "\\[") <|> rawopt) - optional sp - return $ "[" ++ contents ++ "]" +looseItem :: PandocMonad m => LP m Blocks +looseItem = do + inListItem <- sInListItem <$> getState + guard $ not inListItem + skipopts + return mempty -skipopts :: PandocMonad m => LP m () -skipopts = skipMany rawopt +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing } --- opts in angle brackets are used in beamer -rawangle :: PandocMonad m => LP m () -rawangle = try $ do - char '<' - skipMany (noneOf ">") - char '>' - return () +section :: PandocMonad m => Attr -> Int -> LP m Blocks +section (ident, classes, kvs) lvl = do + skipopts + contents <- grouped inline + lab <- option ident $ + try (spaces >> controlSeq "label" + >> spaces >> toksToString <$> braced) + attr' <- registerHeader (lab, classes, kvs) contents + return $ headerWith attr' lvl contents -skipangles :: PandocMonad m => LP m () -skipangles = skipMany rawangle +blockCommand :: PandocMonad m => LP m Blocks +blockCommand = try $ do + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] + let raw = do + guard $ isBlockCommand name || not (isInlineCommand name) + rawBlock "latex" <$> getRawCommand txt + lookupListDefault raw names blockCommands -inlineText :: PandocMonad m => LP m Inlines -inlineText = str <$> many1 inlineChar +closing :: PandocMonad m => LP m Blocks +closing = do + contents <- tok + st <- getState + let extractInlines (MetaBlocks [Plain ys]) = ys + extractInlines (MetaBlocks [Para ys ]) = ys + extractInlines _ = [] + let sigs = case lookupMeta "author" (sMeta st) of + Just (MetaList xs) -> + para $ trimInlines $ fromList $ + intercalate [LineBreak] $ map extractInlines xs + _ -> mempty + return $ para (trimInlines contents) <> sigs -inlineChar :: PandocMonad m => LP m Char -inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" +blockCommands :: PandocMonad m => M.Map Text (LP m Blocks) +blockCommands = M.fromList $ + [ ("par", mempty <$ skipopts) + , ("parbox", braced >> grouped blocks) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) + , ("author", mempty <$ (skipopts *> authors)) + -- -- in letter class, temp. store address & sig as title, author + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) + , ("signature", mempty <$ (skipopts *> authors)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) + -- Koma-script metadata commands + , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) + -- sectioning + , ("part", section nullAttr (-1)) + , ("part*", section nullAttr (-1)) + , ("chapter", section nullAttr 0) + , ("chapter*", section ("",["unnumbered"],[]) 0) + , ("section", section nullAttr 1) + , ("section*", section ("",["unnumbered"],[]) 1) + , ("subsection", section nullAttr 2) + , ("subsection*", section ("",["unnumbered"],[]) 2) + , ("subsubsection", section nullAttr 3) + , ("subsubsection*", section ("",["unnumbered"],[]) 3) + , ("paragraph", section nullAttr 4) + , ("paragraph*", section ("",["unnumbered"],[]) 4) + , ("subparagraph", section nullAttr 5) + , ("subparagraph*", section ("",["unnumbered"],[]) 5) + -- beamer slides + , ("frametitle", section nullAttr 3) + , ("framesubtitle", section nullAttr 4) + -- letters + , ("opening", (para . trimInlines) <$> (skipopts *> tok)) + , ("closing", skipopts *> closing) + -- + , ("hrule", pure horizontalRule) + , ("strut", pure mempty) + , ("rule", skipopts *> tok *> tok *> pure horizontalRule) + , ("item", looseItem) + , ("documentclass", skipopts *> braced *> preamble) + , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) + , ("caption", skipopts *> setCaption) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + -- includes + , ("lstinputlisting", inputListing) + , ("graphicspath", graphicsPath) + -- hyperlink + , ("hypertarget", try $ braced >> grouped block) + ] + + +environments :: PandocMonad m => M.Map Text (LP m Blocks) +environments = M.fromList + [ ("document", env "document" blocks) + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) + , ("minipage", env "minipage" $ + skipopts *> spaces *> optional braced *> spaces *> blocks) + , ("figure", env "figure" $ skipopts *> figure) + , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) + , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> simpTable "longtable" False >>= addTableCaption) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", orderedList') + , ("alltt", alltt <$> env "alltt" blocks) + , ("code", guardEnabled Ext_literate_haskell *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") + , ("verbatim", codeBlock <$> verbEnv "verbatim") + , ("Verbatim", fancyverbEnv "Verbatim") + , ("BVerbatim", fancyverbEnv "BVerbatim") + , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals + codeBlockWith attr <$> verbEnv "lstlisting") + , ("minted", minted) + , ("obeylines", obeylines) + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") + ] environment :: PandocMonad m => LP m Blocks environment = do controlSeq "begin" - name <- braced + name <- untokenize <$> braced M.findWithDefault mzero name environments <|> rawEnv name -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name inlineEnvironments +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name -rawEnv :: PandocMonad m => String -> LP m Blocks +rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts rawOptions <- mconcat <$> many rawopt - let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions + let beginCommand = "\\begin{" <> name <> "}" <> rawOptions pos1 <- getPosition (bs, raw) <- withRaw $ env name blocks - raw' <- applyMacros' $ beginCommand ++ raw - if raw' /= beginCommand ++ raw - then parseFromString' blocks raw' - else if parseRaw - then return $ rawBlock "latex" $ beginCommand ++ raw' - else do - unless parseRaw $ do - report $ SkippedContent beginCommand pos1 - pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 - return bs - -rawVerbEnv :: PandocMonad m => String -> LP m Blocks + if parseRaw + then return $ rawBlock "latex" + $ T.unpack $ beginCommand <> untokenize raw + else do + unless parseRaw $ do + report $ SkippedContent (T.unpack beginCommand) pos1 + pos2 <- getPosition + report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + return bs + +rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{tikzpicture}" ++ raw + let raw' = "\\begin{tikzpicture}" ++ toksToString raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1050,36 +1796,106 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty ----- +verbEnv :: PandocMonad m => Text -> LP m String +verbEnv name = withVerbatimMode $ do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ toksToString res + +fancyverbEnv :: PandocMonad m => Text -> LP m Blocks +fancyverbEnv name = do + options <- option [] keyvals + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv name + +obeylines :: PandocMonad m => LP m Blocks +obeylines = do + para . fromList . removeLeadingTrailingBreaks . + walk softBreakToHard . toList <$> env "obeylines" inlines + where softBreakToHard SoftBreak = LineBreak + softBreakToHard x = x + removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak . + reverse . dropWhile isLineBreak + isLineBreak LineBreak = True + isLineBreak _ = False + +minted :: PandocMonad m => LP m Blocks +minted = do + options <- option [] keyvals + lang <- toksToString <$> braced + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ lang | not (null lang) ] ++ + [ "numberLines" | + lookup "linenos" options == Just "true" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv "minted" + +letterContents :: PandocMonad m => LP m Blocks +letterContents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case lookupMeta "address" (sMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty + return $ addr <> bs -- sig added by \closing + +figure :: PandocMonad m => LP m Blocks +figure = try $ do + resetCaption + blocks >>= addImageCaption + +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks +addImageCaption = walkM go + where go (Image attr alt (src,tit)) + | not ("fig:" `isPrefixOf` tit) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) + Nothing -> Image attr alt (src,tit) + go x = return x -maybeAddExtension :: String -> FilePath -> FilePath -maybeAddExtension ext fp = - if null (takeExtension fp) - then addExtension fp ext - else fp +graphicsPath :: PandocMonad m => LP m Blocks +graphicsPath = do + ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + getResourcePath >>= setResourcePath . (++ ps) + return mempty -include :: PandocMonad m => LP m Blocks -include = do - fs' <- try $ do - char '\\' - name <- try (string "include") - <|> try (string "input") - <|> try (string "subfile") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> manyTill anyChar (char ']') - fs <- (map trim . splitBy (==',')) <$> braced - return $ if name == "usepackage" - then map (maybeAddExtension ".sty") fs - else map (maybeAddExtension ".tex") fs - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mconcat <$> mapM (insertIncludedFile blocks dirs) fs' +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + +alltt :: Blocks -> Blocks +alltt = walk strToCode + where strToCode (Str s) = Code nullAttr s + strToCode Space = RawInline (Format "latex") "\\ " + strToCode SoftBreak = LineBreak + strToCode x = x + +parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions options = + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + ++ maybeToList (lookup "language" options + >>= fromListingsLanguage) + in (fromMaybe "" (lookup "label" options), classes, kvs) inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') <$> braced + f <- filter (/='"') . toksToString <$> braced dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mbCode <- readFileFromDirs dirs f codeLines <- case mbCode of @@ -1098,169 +1914,10 @@ inputListing = do drop (firstline - 1) codeLines return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents -parseListingsOptions :: [(String, String)] -> Attr -parseListingsOptions options = - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - ++ maybeToList (lookup "language" options - >>= fromListingsLanguage) - in (fromMaybe "" (lookup "label" options), classes, kvs) - ----- - -keyval :: PandocMonad m => LP m (String, String) -keyval = try $ do - key <- many1 alphaNum - val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\")) - skipMany spaceChar - optional (char ',') - skipMany spaceChar - return (key, val) - - -keyvals :: PandocMonad m => LP m [(String, String)] -keyvals = try $ char '[' *> manyTill keyval (char ']') - -alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString' blocks - (substitute " " "\\ " $ substitute "%" "\\%" $ - intercalate "\\\\\n" $ lines t) - where strToCode (Str s) = Code nullAttr s - strToCode x = x - -rawLaTeXBlock :: PandocMonad m => LP m String -rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) - -rawLaTeXInline :: PandocMonad m => LP m Inline -rawLaTeXInline = do - raw <- (snd <$> withRaw inlineCommand) - <|> (snd <$> withRaw inlineEnvironment) - <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw - -addImageCaption :: PandocMonad m => Blocks -> LP m Blocks -addImageCaption = walkM go - where go (Image attr alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) - Nothing -> Image attr alt (src,tit) - go x = return x - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table c als ws hs rs) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Table (toList ils) als ws hs rs - Nothing -> Table c als ws hs rs - go x = return x - -environments :: PandocMonad m => M.Map String (LP m Blocks) -environments = M.fromList - [ ("document", env "document" blocks <* skipMany anyChar) - , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) - , ("letter", env "letter" letterContents) - , ("minipage", env "minipage" $ - skipopts *> spaces' *> optional braced *> spaces' *> blocks) - , ("figure", env "figure" $ skipopts *> figure) - , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) - , ("center", env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) - , ("quote", blockQuote <$> env "quote" blocks) - , ("quotation", blockQuote <$> env "quotation" blocks) - , ("verse", blockQuote <$> env "verse" blocks) - , ("itemize", bulletList <$> listenv "itemize" (many item)) - , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", orderedList') - , ("alltt", alltt =<< verbEnv "alltt") - , ("code", guardEnabled Ext_literate_haskell *> - (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - verbEnv "code")) - , ("comment", mempty <$ verbEnv "comment") - , ("verbatim", codeBlock <$> verbEnv "verbatim") - , ("Verbatim", fancyverbEnv "Verbatim") - , ("BVerbatim", fancyverbEnv "BVerbatim") - , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals - codeBlockWith attr <$> verbEnv "lstlisting") - , ("minted", do options <- option [] keyvals - lang <- grouped (many1 $ satisfy (/='}')) - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ - [ "numberLines" | - lookup "linenos" options == Just "true" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv "minted") - , ("obeylines", parseFromString - (para . trimInlines . mconcat <$> many inline) =<< - intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnvWith para Nothing "displaymath") - , ("equation", mathEnvWith para Nothing "equation") - , ("equation*", mathEnvWith para Nothing "equation*") - , ("gather", mathEnvWith para (Just "gathered") "gather") - , ("gather*", mathEnvWith para (Just "gathered") "gather*") - , ("multline", mathEnvWith para (Just "gathered") "multline") - , ("multline*", mathEnvWith para (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") - , ("align", mathEnvWith para (Just "aligned") "align") - , ("align*", mathEnvWith para (Just "aligned") "align*") - , ("alignat", mathEnvWith para (Just "aligned") "alignat") - , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") - , ("tikzpicture", rawVerbEnv "tikzpicture") - ] - -figure :: PandocMonad m => LP m Blocks -figure = try $ do - resetCaption - blocks >>= addImageCaption - -letterContents :: PandocMonad m => LP m Blocks -letterContents = do - bs <- blocks - st <- getState - -- add signature (author) and address (title) - let addr = case lookupMeta "address" (stateMeta st) of - Just (MetaBlocks [Plain xs]) -> - para $ trimInlines $ fromList xs - _ -> mempty - return $ addr <> bs -- sig added by \closing - -closing :: PandocMonad m => LP m Blocks -closing = do - contents <- tok - st <- getState - let extractInlines (MetaBlocks [Plain ys]) = ys - extractInlines (MetaBlocks [Para ys ]) = ys - extractInlines _ = [] - let sigs = case lookupMeta "author" (stateMeta st) of - Just (MetaList xs) -> - para $ trimInlines $ fromList $ - intercalate [LineBreak] $ map extractInlines xs - _ -> mempty - return $ para (trimInlines contents) <> sigs +-- lists item :: PandocMonad m => LP m Blocks -item = blocks *> controlSeq "item" *> skipopts *> blocks - -looseItem :: PandocMonad m => LP m Blocks -looseItem = do - ctx <- stateParserContext `fmap` getState - if ctx == ListItemState - then mzero - else return mempty +item = void blocks *> controlSeq "item" *> skipopts *> blocks descItem :: PandocMonad m => LP m (Inlines, [Blocks]) descItem = do @@ -1271,302 +1928,210 @@ descItem = do bs <- blocks return (ils, [bs]) -env :: PandocMonad m => String -> LP m a -> LP m a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name)) - ("\\end{" ++ name ++ "}")) - -listenv :: PandocMonad m => String -> LP m a -> LP m a +listenv :: PandocMonad m => Text -> LP m a -> LP m a listenv name p = try $ do - oldCtx <- stateParserContext `fmap` getState - updateState $ \st -> st{ stateParserContext = ListItemState } + oldInListItem <- sInListItem `fmap` getState + updateState $ \st -> st{ sInListItem = True } res <- env name p - updateState $ \st -> st{ stateParserContext = oldCtx } + updateState $ \st -> st{ sInListItem = oldInListItem } return res -mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" - -mathEnv :: PandocMonad m => String -> LP m String -mathEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = skipMany comment *> - (many1 (noneOf "\\%") <|> try (string "\\%") - <|> try (string "\\\\") <|> count 1 anyChar) - res <- concat <$> manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -verbEnv :: PandocMonad m => String -> LP m String -verbEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = anyChar - res <- manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -fancyverbEnv :: PandocMonad m => String -> LP m Blocks -fancyverbEnv name = do - options <- option [] keyvals - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv name - orderedList' :: PandocMonad m => LP m Blocks orderedList' = try $ do - optional sp - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ char '[' *> anyOrderedListMarker <* char ']' spaces - optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced + let markerSpec = do + symbol '[' + ts <- toksToString <$> manyTill anyTok (symbol ']') + case runParser anyOrderedListMarker def "option" ts of + Right r -> return r + Left _ -> do + pos <- getPosition + report $ SkippedContent ("[" ++ ts ++ "]") pos + return (1, DefaultStyle, DefaultDelim) + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces - start <- option 1 $ try $ do controlSeq "setcounter" - grouped (string "enum" *> many1 (oneOf "iv")) + optional $ try $ controlSeq "setlength" + *> grouped (count 1 $ controlSeq "itemindent") + *> braced + spaces + start <- option 1 $ try $ do pos <- getPosition + controlSeq "setcounter" + ctr <- toksToString <$> braced + guard $ "enum" `isPrefixOf` ctr + guard $ all (`elem` ['i','v']) (drop 4 ctr) optional sp - num <- grouped (many1 digit) - spaces - return (read num + 1 :: Int) + num <- toksToString <$> braced + case safeRead num of + Just i -> return (i + 1 :: Int) + Nothing -> do + report $ SkippedContent + ("\\setcounter{" ++ ctr ++ + "}{" ++ num ++ "}") pos + return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs -paragraph :: PandocMonad m => LP m Blocks -paragraph = do - x <- trimInlines . mconcat <$> many1 inline - if x == mempty - then return mempty - else return $ para x - -preamble :: PandocMonad m => LP m Blocks -preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = void comment - <|> void sp - <|> void blanklines - <|> void include - <|> void macro - <|> void blockCommand - <|> void anyControlSeq - <|> void braced - <|> void anyChar - -------- - --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - keys <- try $ bgroup *> (manyTill citationLabel egroup) - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m String -citationLabel = optional sp *> - (many1 (satisfy isBibtexKeyChar) - <* optional sp - <* optional (char ',') - <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) +-- tables -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then many1 simpleCiteArgs - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional $ bracketed inline + return () -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - let ils = (toList . trimInlines . mconcat) <$> - many (notFollowedBy (oneOf "\\};") >> inline) - let parseOne = try $ do - skipSpaces - pref <- ils - cit' <- inline -- expect a citation - let citlist = toList cit' - cits' <- case citlist of - [Cite cs _] -> return cs - _ -> mzero - suff <- ils - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff cits' - (c:cits, raw) <- withRaw $ grouped parseOne - return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ raw) +amp :: PandocMonad m => LP m Tok +amp = symbol '&' --- tables +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> do + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + _ -> return () -parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] +parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] parseAligns = try $ do - bgroup - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) - maybeBar - let cAlign = AlignCenter <$ char 'c' - let lAlign = AlignLeft <$ char 'l' - let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ char 'p' - -- algins from tabularx - let xAlign = AlignLeft <$ char 'X' - let mAlign = AlignLeft <$ char 'm' - let bAlign = AlignLeft <$ char 'b' - let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign - let alignPrefix = char '>' >> braced - let alignSuffix = char '<' >> braced + let maybeBar = skipMany $ + sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced let colWidth = try $ do - char '{' - ds <- many1 (oneOf "0123456789.") + symbol '{' + ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") spaces - string "\\linewidth" - char '}' + symbol '}' case safeRead ds of Just w -> return w Nothing -> return 0.0 - let alignSpec = do + let alignSpec = try $ do spaces - pref <- option "" alignPrefix + pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- braced + width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) spaces - suff <- option "" alignSuffix + suff <- option [] alignSuffix return (al, width, (pref, suff)) - aligns' <- sepEndBy alignSpec maybeBar + bgroup + spaces + maybeBar + aligns' <- many (alignSpec <* maybeBar) spaces egroup spaces - return $ aligns' - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces' - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces' - optional $ bracketed (many1 (satisfy (/=']'))) - return () - -lbreak :: PandocMonad m => LP m () -lbreak = () <$ try (spaces' *> - (controlSeq "\\" <|> controlSeq "tabularnewline") <* - spaces') - -amp :: PandocMonad m => LP m () -amp = () <$ try (spaces' *> char '&' <* spaces') + return aligns' parseTableRow :: PandocMonad m - => String -- ^ table environment name - -> [(String, String)] -- ^ pref/suffixes + => Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes -> LP m [Blocks] -parseTableRow envname prefsufs = try $ do +parseTableRow envname prefsufs = do + notFollowedBy (spaces *> end_ envname) let cols = length prefsufs - let tableCellRaw = concat <$> many - (do notFollowedBy amp - notFollowedBy lbreak - notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) - many1 (noneOf "&%\n\r\\") - <|> try (string "\\&") - <|> count 1 anyChar) - let plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - rawcells <- sepBy1 tableCellRaw amp - guard $ length rawcells == cols - let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs - let tableCell = plainify <$> blocks - cells' <- mapM (parseFromString' tableCell) rawcells' - let numcells = length cells' + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- many (notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + >> anyTok) + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref + ++ contents ++ + map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff + rawcells <- sequence (map celltoks prefsufs) + oldInput <- getInput + cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells + setInput oldInput + spaces + let numcells = length cells guard $ numcells <= cols && numcells >= 1 - guard $ cells' /= [mempty] + guard $ cells /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: - let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces' - return cells'' + return $ cells ++ replicate (cols - numcells) mempty -spaces' :: PandocMonad m => LP m () -spaces' = spaces *> skipMany (comment *> spaces) +parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell = do + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + updateState $ \st -> st{ sInTableCell = True } + cells <- plainify <$> blocks + updateState $ \st -> st{ sInTableCell = False } + return cells -simpTable :: PandocMonad m => String -> Bool -> LP m Blocks +simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces' >> tok) + when hasWidthParameter $ () <$ (spaces >> tok) skipopts colspecs <- parseAligns let (aligns, widths, prefsufs) = unzip3 colspecs let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces skipMany hline - spaces' + spaces header' <- option [] $ try (parseTableRow envname prefsufs <* lbreak <* many1 hline) - spaces' + spaces rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) - spaces' + spaces optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces let header'' = if null header' then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns widths) header'' rows -removeDoubleQuotes :: String -> String -removeDoubleQuotes ('"':xs) = - case reverse xs of - '"':ys -> reverse ys - _ -> '"':xs -removeDoubleQuotes xs = xs +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs + go x = return x + + +block :: PandocMonad m => LP m Blocks +block = (mempty <$ spaces1) + <|> environment + <|> include + <|> macroDef + <|> paragraph + <|> blockCommand + <|> grouped block + +blocks :: PandocMonad m => LP m Blocks +blocks = mconcat <$> many block + diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs new file mode 100644 index 000000000..6f84ae1f1 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -0,0 +1,48 @@ +{- +Copyright (C) 2017 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Types + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Types for LaTeX tokens and macros. +-} +module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) + , TokType(..) + , Macro(..) + , Line + , Column ) +where +import Data.Text (Text) +import Text.Parsec.Pos (Line, Column) + +data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | + Esc1 | Esc2 | Arg Int + deriving (Eq, Ord, Show) + +data Tok = Tok (Line, Column) TokType Text + deriving (Eq, Ord, Show) + +data Macro = Macro Int (Maybe [Tok]) [Tok] + deriving Show + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c2342b9f3..ab6a32b78 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -61,7 +61,8 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) -import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros, + macro) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) @@ -1105,10 +1106,11 @@ latexMacro = try $ do rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> + result <- (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) + <|> (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + spaces return $ return result @@ -1553,8 +1555,8 @@ code = try $ do Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) @@ -1878,9 +1880,8 @@ rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead (char '\\') notFollowedBy' rawConTeXtEnvironment - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index a51306347..1ae73c148 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -58,7 +58,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) -import Text.Pandoc.Parsing hiding (macro, nested) +import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag) import Text.Pandoc.XML (fromEntities) import System.FilePath (takeExtension) diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 66273e05d..42fdfd4dd 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: PandocMonad m => OrgParser m String inlineLaTeXCommand = try $ do rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest + st <- getState + parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest case parsed of - Right (RawInline _ cs) -> do + Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. let cmdNoSpc = dropWhileEnd isSpace cs diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 92f868516..fc98213fb 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState , OrgNoteRecord , HasReaderOptions (..) , HasQuoteContext (..) + , HasMacros (..) , TodoMarker (..) , TodoSequence , TodoState (..) @@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local) import Data.Default (Default (..)) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) import Text.Pandoc.Logging import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), HasIncludeFiles (..), HasLastStrPosition (..), HasLogMessages (..), HasQuoteContext (..), + HasMacros (..), HasReaderOptions (..), ParserContext (..), QuoteContext (..), SourcePos, askF, asksF, returnF, runF, trimInlinesF) @@ -118,6 +122,7 @@ data OrgParserState = OrgParserState , orgStateParserContext :: ParserContext , orgStateTodoSequences :: [TodoSequence] , orgLogMessages :: [LogMessage] + , orgMacros :: M.Map Text Macro } data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } @@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasMacros OrgParserState where + extractMacros st = orgMacros st + updateMacros f st = st{ orgMacros = f (orgMacros st) } + instance HasIncludeFiles OrgParserState where getIncludeFiles = orgStateIncludeFiles addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } @@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState , orgStateParserContext = NullState , orgStateTodoSequences = [] , orgLogMessages = [] + , orgMacros = M.empty } optionsToParserState :: ReaderOptions -> OrgParserState diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 210d3e5aa..d41152de5 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -45,7 +45,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Shared (crFilter) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a80d75340..853d2768f 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -573,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - B.singleton <$> rawLaTeXInline + B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 5708358f6..f000646c2 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -40,7 +40,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) +import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter) import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index afac9e8cb..f2be6de5f 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -58,7 +58,8 @@ tests = [ testGroup "basic" , "blank lines + space + comments" =: "% my comment\n\n \n % another\n\nhi" =?> para "hi" , "comment in paragraph" =: - "hi % this is a comment\nthere\n" =?> para "hi there" + "hi % this is a comment\nthere\n" =?> + para ("hi" <> softbreak <> "there") ] , testGroup "code blocks" diff --git a/test/command/1390.md b/test/command/1390.md new file mode 100644 index 000000000..ffd2cef8d --- /dev/null +++ b/test/command/1390.md @@ -0,0 +1,20 @@ +``` +% pandoc -f latex -t native +\newcommand\foo{+} +Testing: $\mu\foo\eta$. +^D +[Para [Str "Testing:",Space,Math InlineMath "\\mu+\\eta",Str "."]] +``` + + + diff --git a/test/command/2118.md b/test/command/2118.md new file mode 100644 index 000000000..d640e2e2b --- /dev/null +++ b/test/command/2118.md @@ -0,0 +1,11 @@ +``` +% pandoc -f latex -t native +\newcommand{\inclgraph}{\includegraphics[width=0.8\textwidth]} +\begin{figure}[ht] + \inclgraph{setminus.png} + \caption{Set subtraction} + \label{fig:setminus} +\end{figure} +^D +[Para [Image ("",[],[("width","80%")]) [Str "Set",Space,Str "subtraction",Span ("",[],[("data-label","fig:setminus")]) []] ("setminus.png","fig:")]] +``` diff --git a/test/command/3113.md b/test/command/3113.md index f44e25709..5ca171d97 100644 --- a/test/command/3113.md +++ b/test/command/3113.md @@ -8,6 +8,6 @@ C&=&D,\\ E&=&F \end{eqnarray} ^D -[Para [Math DisplayMath "\\begin{aligned}\nA&=&B,\\\\\nC&=&D,\\\\\nE&=&F\\end{aligned}"]] +[Para [Math DisplayMath "\\begin{aligned}\nA&=&B,\\\\\nC&=&D,\\\\\n%\\end{eqnarray}\n%\\begin{eqnarray}\nE&=&F\\end{aligned}"]] ``` diff --git a/test/command/3236.md b/test/command/3236.md new file mode 100644 index 000000000..1d1a9b2c3 --- /dev/null +++ b/test/command/3236.md @@ -0,0 +1,9 @@ +``` +pandoc -f latex -t native +\newcommand{\mycolor}{red} + +\includegraphics[width=17cm]{\mycolor /header} +Magnificent \mycolor{} header. +^D +[Para [Image ("",[],[("width","17cm")]) [Str "image"] ("red/header",""),SoftBreak,Str "Magnificent",Space,Str "red",Space,Str "header."]] +``` diff --git a/test/command/3558.md b/test/command/3558.md index 3f4079064..795858b78 100644 --- a/test/command/3558.md +++ b/test/command/3558.md @@ -1,8 +1,12 @@ ``` % pandoc -t native -\startmulti +\multi + hello + \endmulti ^D -[Para [RawInline (Format "tex") "\\startmulti\n",Str "hello",SoftBreak,RawInline (Format "tex") "\\endmulti"]] +[RawBlock (Format "latex") "\\multi" +,Para [Str "hello"] +,RawBlock (Format "latex") "\\endmulti"] ``` diff --git a/test/command/3779.md b/test/command/3779.md new file mode 100644 index 000000000..1097123f0 --- /dev/null +++ b/test/command/3779.md @@ -0,0 +1,28 @@ +``` +% pandoc -f latex -t native +\newcommand{\fakeitemize}[1]{ + \begin{itemize} + #1 + \end{itemize} +} +\newcommand{\testcmd}[1]{ + #1 +} +\fakeitemize{ + \item Pandoc is 100\% awesome. +} + +\begin{itemize} + \item Pandoc is 200\% awesome. +\end{itemize} + +\testcmd{ + Pandoc is 300\% awesome. +} +^D +[BulletList + [[Para [Str "Pandoc",Space,Str "is",Space,Str "100%",Space,Str "awesome."]]] +,BulletList + [[Para [Str "Pandoc",Space,Str "is",Space,Str "200%",Space,Str "awesome."]]] +,Para [Str "Pandoc",Space,Str "is",Space,Str "300%",Space,Str "awesome."]] +``` diff --git a/test/command/934.md b/test/command/934.md new file mode 100644 index 000000000..ef99abdf9 --- /dev/null +++ b/test/command/934.md @@ -0,0 +1,12 @@ +``` +% pandoc -f latex -t native +\newcommand{\ddb}[2]{ + \textit{``#1''} + + \textbf{#2} +} +\ddb{This should be italic and in quotes}{And this is the attribution} +^D +[Para [Emph [Quoted DoubleQuote [Str "This",Space,Str "should",Space,Str "be",Space,Str "italic",Space,Str "and",Space,Str "in",Space,Str "quotes"]]] +,Para [Strong [Str "And",Space,Str "this",Space,Str "is",Space,Str "the",Space,Str "attribution"]]] +``` diff --git a/test/command/982.md b/test/command/982.md new file mode 100644 index 000000000..5f54f7713 --- /dev/null +++ b/test/command/982.md @@ -0,0 +1,11 @@ +``` +% pandoc -f latex -t native +\newcommand{\BEQ}{\begin{equation}} +\newcommand{\EEQ}{\end{equation}} + +\BEQ +y=x^2 +\EEQ +^D +[Para [Math DisplayMath "y=x^2"]] +``` diff --git a/test/latex-reader.latex b/test/latex-reader.latex index 2ebdfed99..7cbcc9672 100644 --- a/test/latex-reader.latex +++ b/test/latex-reader.latex @@ -4,7 +4,6 @@ \setlength{\parindent}{0pt} \setlength{\parskip}{6pt plus 2pt minus 1pt} -\newcommand{\textsubscript}[1]{\ensuremath{_{\scriptsize\textrm{#1}}}} \usepackage[breaklinks=true,unicode=true]{hyperref} \usepackage[normalem]{ulem} % avoid problems with \sout in headers with hyperref: diff --git a/test/latex-reader.native b/test/latex-reader.native index d481a714d..04be2538e 100644 --- a/test/latex-reader.native +++ b/test/latex-reader.native @@ -261,7 +261,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format "latex") "\\doublespacing\n"]] + ,[Para [RawInline (Format "latex") "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native index 1007dbac7..5d63a21de 100644 --- a/test/markdown-reader-more.native +++ b/test/markdown-reader-more.native @@ -3,7 +3,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,Header 2 ("blank-line-before-url-in-link-reference",[],[]) [Str "Blank",Space,Str "line",Space,Str "before",Space,Str "URL",Space,Str "in",Space,Str "link",Space,Str "reference"] ,Para [Link ("",[],[]) [Str "foo"] ("/url",""),Space,Str "and",Space,Link ("",[],[]) [Str "bar"] ("/url","title")] ,Header 2 ("raw-context-environments",[],[]) [Str "Raw",Space,Str "ConTeXt",Space,Str "environments"] -,Plain [RawInline (Format "tex") "\\placeformula "] +,RawBlock (Format "latex") "\\placeformula " ,RawBlock (Format "context") "\\startformula\n L_{1} = L_{2}\n \\stopformula" ,RawBlock (Format "context") "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" ,Header 2 ("raw-latex-environments",[],[]) [Str "Raw",Space,Str "LaTeX",Space,Str "environments"] @@ -56,7 +56,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,OrderedList (3,Example,TwoParens) [[Plain [Str "Third",Space,Str "example."]]] ,Header 2 ("macros",[],[]) [Str "Macros"] -,Para [Math InlineMath "{\\langle x,y \\rangle}"] +,Para [Math InlineMath "\\langle x,y \\rangle"] ,Header 2 ("case-insensitive-references",[],[]) [Str "Case-insensitive",Space,Str "references"] ,Para [Link ("",[],[]) [Str "Fum"] ("/fum","")] ,Para [Link ("",[],[]) [Str "FUM"] ("/fum","")] -- cgit v1.2.3 From 41209ea6765e9898d7e15c4c945c06275b6c0420 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 11 Jul 2017 15:52:38 +0200 Subject: HTML reader: Ensure that paragraphs are closed properly... when the parent block element closes, even without `

`. Closes #3794. --- src/Text/Pandoc/Readers/HTML.hs | 2 ++ test/command/3794.md | 7 +++++++ 2 files changed, 9 insertions(+) create mode 100644 test/command/3794.md (limited to 'test') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b07b65019..734973e33 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -805,6 +805,8 @@ pCloses tagtype = try $ do (TagClose "dl") | tagtype == "dd" -> return () (TagClose "table") | tagtype == "td" -> return () (TagClose "table") | tagtype == "tr" -> return () + (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags + -> return () -- see #3794 _ -> mzero pTagText :: PandocMonad m => TagParser m Inlines diff --git a/test/command/3794.md b/test/command/3794.md new file mode 100644 index 000000000..b56e7b504 --- /dev/null +++ b/test/command/3794.md @@ -0,0 +1,7 @@ +``` +% pandoc -f html -t native +

hello

+^D +[Div ("",[],[]) + [Para [Str "hello"]]] +``` -- cgit v1.2.3 From 013fd1c6b68f2c061202d931f541aa4877ae543f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 12 Jul 2017 13:58:47 +0200 Subject: Make sure \write18 is parsed as raw LaTeX. The change is in the LaTeX reader's treatment of raw commands, but it also affects the Markdown reader. --- src/Text/Pandoc/Readers/LaTeX.hs | 16 +++++++++------- test/command/3494.md | 3 ++- test/command/3577.md | 2 ++ test/command/write18.md | 14 ++++++++++++++ test/latex-reader.native | 2 +- 5 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 test/command/write18.md (limited to 'test') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fde177f14..cd2c7c7f8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1049,14 +1049,12 @@ inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq guard $ name /= "begin" && name /= "end" - (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp) + star <- option "" ("*" <$ symbol '*' <* optional sp) let name' = name <> star let names = ordNub [name', name] -- check non-starred as fallback let raw = do guard $ isInlineCommand name || not (isBlockCommand name) - (_, rawargs) <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs) + rawcommand <- getRawCommand (cmd <> star) (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) <|> ignore rawcommand lookupListDefault raw names inlineCommands @@ -1353,7 +1351,11 @@ rawInlineOr name' fallback = do getRawCommand :: PandocMonad m => Text -> LP m String getRawCommand txt = do (_, rawargs) <- withRaw - (many (try (optional sp *> opt)) *> + ((if txt == "\\write" + then () <$ satisfyTok isWordTok -- digits + else return ()) *> + skipangles *> + skipopts *> option "" (try (optional sp *> dimenarg)) *> many braced) return $ T.unpack (txt <> untokenize rawargs) @@ -1631,7 +1633,7 @@ blockCommand = try $ do let names = ordNub [name', name] let raw = do guard $ isBlockCommand name || not (isInlineCommand name) - rawBlock "latex" <$> getRawCommand txt + rawBlock "latex" <$> getRawCommand (txt <> star) lookupListDefault raw names blockCommands closing :: PandocMonad m => LP m Blocks @@ -2128,8 +2130,8 @@ block = (mempty <$ spaces1) <|> environment <|> include <|> macroDef - <|> paragraph <|> blockCommand + <|> paragraph <|> grouped block blocks :: PandocMonad m => LP m Blocks diff --git a/test/command/3494.md b/test/command/3494.md index 7c480fde6..534041246 100644 --- a/test/command/3494.md +++ b/test/command/3494.md @@ -25,7 +25,8 @@ thank you -blah + +

blah

blah blah diff --git a/test/command/3577.md b/test/command/3577.md index dc88937e9..ca9dba97c 100644 --- a/test/command/3577.md +++ b/test/command/3577.md @@ -15,9 +15,11 @@ \caption{Subfigure with Subfloat} \end{figure} ^D +

diff --git a/test/command/write18.md b/test/command/write18.md new file mode 100644 index 000000000..344dfc8cf --- /dev/null +++ b/test/command/write18.md @@ -0,0 +1,14 @@ +Handle \write18{..} as raw tex: +``` +% pandoc -t native +\write18{git --version} +^D +[RawBlock (Format "latex") "\\write18{git --version}"] +``` + +``` +% pandoc -f latex+raw_tex -t native +\write18{git --version} +^D +[RawBlock (Format "latex") "\\write18{git --version}"] +``` diff --git a/test/latex-reader.native b/test/latex-reader.native index 04be2538e..a62f2069e 100644 --- a/test/latex-reader.native +++ b/test/latex-reader.native @@ -261,7 +261,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format "latex") "\\doublespacing"]] + ,[RawBlock (Format "latex") "\\doublespacing"] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] -- cgit v1.2.3 From de117fbd9e32e890663eb831b47fd91fcd6419a0 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Wed, 12 Jul 2017 18:16:02 +0300 Subject: Muse writer: indent lists inside with at least one space (#3795) --- src/Text/Pandoc/Writers/Muse.hs | 15 +++++++++++---- test/Tests/Writers/Muse.hs | 11 +++++++++++ test/writer.muse | 6 +++--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index b386a85b9..0383d9d86 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -102,6 +102,13 @@ pandocToMuse (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +-- | Convert list of Pandoc block elements to Muse +-- | without setting stTopLevel. +flatBlockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +flatBlockListToMuse blocks = cat <$> mapM blockToMuse blocks + -- | Convert list of Pandoc block elements to Muse. blockListToMuse :: PandocMonad m => [Block] -- ^ List of block elements @@ -111,11 +118,11 @@ blockListToMuse blocks = do modify $ \s -> s { stTopLevel = not $ stInsideBlock s , stInsideBlock = True } - contents <- mapM blockToMuse blocks + result <- flatBlockListToMuse blocks modify $ \s -> s { stTopLevel = stTopLevel oldState , stInsideBlock = stInsideBlock oldState } - return $ cat contents + return result -- | Convert Pandoc block element to Muse. blockToMuse :: PandocMonad m @@ -141,10 +148,10 @@ blockToMuse (RawBlock (Format format) str) = return $ blankline $$ " text format <> "\">" $$ text str $$ "" $$ blankline blockToMuse (BlockQuote blocks) = do - contents <- blockListToMuse blocks + contents <- flatBlockListToMuse blocks return $ blankline <> "" - $$ flush contents -- flush to drop blanklines + $$ nest 0 contents -- nest 0 to remove trailing blank lines $$ "" <> blankline blockToMuse (OrderedList (start, style, _) items) = do diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index d83cc5c9b..ebe5d45cd 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -137,6 +137,17 @@ tests = [ testGroup "block elements" , " second inner definition :: second inner description" ] ] + -- Check that list is intended with one space even inside a quote + , "List inside block quote" =: blockQuote (orderedList [ plain $ text "first" + , plain $ text "second" + , plain $ text "third" + ]) + =?> unlines [ "" + , " 1. first" + , " 2. second" + , " 3. third" + , "" + ] ] , testGroup "headings" [ "normal heading" =: diff --git a/test/writer.muse b/test/writer.muse index 41d1c9a5b..fda025812 100644 --- a/test/writer.muse +++ b/test/writer.muse @@ -65,8 +65,8 @@ sub status { A list: -1. item one -2. item two + 1. item one + 2. item two Nested block quotes: @@ -281,7 +281,7 @@ Multiple blocks with italics:

-orange block quote + orange block quote Multiple definitions, tight: -- cgit v1.2.3 From 8b502dd50ff842bdbbf346a67a607d1a7905bda3 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Wed, 12 Jul 2017 11:19:49 -0400 Subject: Fixed #3760. (#3784) Using the same solution as in the LaTeX reader: equation -> displaymath align -> displaymath \begin{aligned} ... \end{aligned} etc.. --- src/Text/Pandoc/Readers/Vimwiki.hs | 32 ++++++++++++++++++++++++++------ test/vimwiki-reader.native | 12 ++++++++---- test/vimwiki-reader.wiki | 10 ++++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 11faedb24..52bf37d35 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -75,7 +75,8 @@ import qualified Text.Pandoc.Builder as B (headerWith, str, space, strong, emph, strikeout, code, link, image, spanWith, para, horizontalRule, blockQuote, bulletList, plain, orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, - setMeta, definitionList, superscript, subscript) + setMeta, definitionList, superscript, subscript, displayMath, + math) import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition (Pandoc(..), Inline(Space), Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), @@ -265,13 +266,32 @@ displayMath :: PandocMonad m => VwParser m Blocks displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser + many space contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags - | mathTag == "" = "\\[" ++ contents ++ "\n\\]" - | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + | mathTag == "" = contents + | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents ++ "\n\\end{" ++ mathTag ++ "}" - return $ B.plain $ B.str contentsWithTags + return $ B.para $ B.displayMath contentsWithTags + + +mathTagLaTeX :: String -> String +mathTagLaTeX s = case s of + "equation" -> "" + "equation*" -> "" + "gather" -> "gathered" + "gather*" -> "gathered" + "multline" -> "gathered" + "multline*" -> "gathered" + "eqnarray" -> "aligned" + "eqnarray*" -> "aligned" + "align" -> "aligned" + "align*" -> "aligned" + "alignat" -> "aligned" + "alignat*" -> "aligned" + _ -> s + mixedList :: PandocMonad m => VwParser m Blocks mixedList = try $ do @@ -598,7 +618,7 @@ inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ do char '$' contents <- many1Till (noneOf "\n") (char '$') - return $ B.str $ "\$" ++ contents ++ "\$" + return $ B.math contents tag :: PandocMonad m => VwParser m Inlines tag = try $ do @@ -650,4 +670,4 @@ mathTagParser = do s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) char '%' >> string s >> char '%' - return s + return $ mathTagLaTeX s diff --git a/test/vimwiki-reader.native b/test/vimwiki-reader.native index 26388b71a..8c9bff3f6 100644 --- a/test/vimwiki-reader.native +++ b/test/vimwiki-reader.native @@ -81,7 +81,7 @@ Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "2017-05-01"]),("title ,Header 2 ("lists",[],[]) [Str "lists"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "ordered",Space,Str "list",Space,Str "item",Space,Str "1,",Space,Str "and",Space,Str "here",Space,Str "is",Space,Str "some",Space,Str "math",Space,Str "belonging",Space,Str "to",Space,Str "list",Space,Str "item",Space,Str "1"] - ,Plain [Str "\\[\n a^2 + b^2 = c^2\n\\]"] + ,Para [Math DisplayMath "a^2 + b^2 = c^2"] ,Plain [Str "and",Space,Str "some",Space,Str "preformatted",Space,Str "and",Space,Str "tables",Space,Str "belonging",Space,Str "to",Space,Str "item",Space,Str "1",Space,Str "as",Space,Str "well"] ,CodeBlock ("",[],[]) "I'm part of item 1." ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] @@ -184,9 +184,13 @@ Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "2017-05-01"]),("title ,[Plain [Str "b"]]]]]]] ,[Plain [Span ("",["done4"],[]) [],Str "task",Space,Str "2"]]] ,Header 2 ("math",[],[]) [Str "math"] -,Para [Str "\$ \\sum_i a_i^2 = 1 \$"] -,Plain [Str "\\[\n\\sum_i a_i^2\n=\n1\n\\]"] -,Plain [Str "\\begin{align}\n\\sum_i a_i^2 &= 1 + 1 \\\\\n&= 2.\n\\end{align}"] +,Para [Math InlineMath " \\sum_i a_i^2 = 1 "] +,Para [Math DisplayMath "\\sum_i a_i^2\n=\n1"] +,Para [Math DisplayMath "\\begin{aligned}\n\\sum_i a_i^2 &= 1 + 1 \\\\\n&= 2.\n\\end{aligned}"] +,Para [Str "edge",Space,Str "case",Space,Str "(the",Space,Code ("",[],[]) "c^2 + ",Space,Str "after",Space,Str "the",Space,Str "multline",Space,Str "tag",Space,Str "is",Space,Str "in",Space,Str "the",Space,Str "equation):"] +,Para [Math DisplayMath "\\begin{gathered}\nc^2 + \na^2 + b^2\n\\end{gathered}"] +,Para [Str "edge",Space,Str "case",Space,Str "(the",Space,Str "tag",Space,Str "is",Space,Code ("",[],[]) "hello%bye",Str ")"] +,Para [Math DisplayMath "\\begin{hello%bye}\n\\int_a^b f(x) dx\n\\end{hello%bye}"] ,Para [Str "Just",Space,Str "two",Space,Str "dollar",Space,Str "signs:",Space,Str "$$"] ,Para [Str "[not",Space,Str "math]",Space,Str "You",Space,Str "have",Space,Str "$1",SoftBreak,Str "and",Space,Str "I",Space,Str "have",Space,Str "$1."] ,Header 2 ("tags",[],[]) [Str "tags"] diff --git a/test/vimwiki-reader.wiki b/test/vimwiki-reader.wiki index ad724e090..63d39b146 100644 --- a/test/vimwiki-reader.wiki +++ b/test/vimwiki-reader.wiki @@ -297,6 +297,16 @@ $ \sum_i a_i^2 = 1 $ &= 2. }}$ +edge case (the `c^2 + ` after the multline tag is in the equation): +{{$%multline%c^2 + +a^2 + b^2 +}}$ + +edge case (the tag is `hello%bye`) +{{$%hello%bye% +\int_a^b f(x) dx +}}$ + Just two dollar signs: $$ [not math] You have $1 -- cgit v1.2.3