diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 407 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 207 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Org.hs | 2 |
8 files changed, 419 insertions, 237 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b67a53f5b..0330c46e2 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -291,6 +291,8 @@ writers = [ writeHtmlString o{ writerSlideVariant = RevealJsSlides , writerHtml5 = True }) ,("docbook" , PureStringWriter writeDocbook) + ,("docbook5" , PureStringWriter $ \o -> + writeDocbook o{ writerDocbook5 = True }) ,("opml" , PureStringWriter writeOPML) ,("opendocument" , PureStringWriter writeOpenDocument) ,("latex" , PureStringWriter writeLaTeX) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 171210962..701cd8bd1 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -357,6 +357,7 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerDocbook5 :: Bool -- ^ Produce DocBook5 , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -403,6 +404,7 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc + , writerDocbook5 = False , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 950497992..d3cee08e2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -225,7 +225,7 @@ table = do Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep - hasheader <- option False $ True <$ (lookAhead (char '!')) + hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!')) (cellspecs',hdr) <- unzip <$> tableRow let widths = map ((tableWidth *) . snd) cellspecs' let restwidth = tableWidth - sum widths diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e98be31d..ceab1e120 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,6 +1,5 @@ +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -31,10 +30,10 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), - trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -42,22 +41,20 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) +import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks, local) -import Data.Char (isAlphaNum, toLower) -import Data.Default +import Control.Monad (foldM, guard, mplus, mzero, when) +import Control.Monad.Reader ( Reader, runReader ) +import Data.Char (isAlphaNum, isSpace, toLower) import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) -import Text.Pandoc.Error -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -65,132 +62,12 @@ readOrg :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") -data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } - +-- | The parser used to read org files. type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) -instance HasIdentifierList OrgParserState where - extractIdentifierList = orgStateIdentifiers - updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } - -instance HasHeaderMap OrgParserState where - extractHeaderMap = orgStateHeaderMap - updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } - -parseOrg :: OrgParser Pandoc -parseOrg = do - blocks' <- parseBlocks - st <- getState - let meta = runF (orgStateMeta' st) st - let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) - --- | Drop COMMENT headers and the document tree below those headers. -dropCommentTrees :: [Block] -> [Block] -dropCommentTrees [] = [] -dropCommentTrees (b:bs) = - maybe (b:dropCommentTrees bs) - (dropCommentTrees . flip dropUntilHeaderAboveLevel bs) - (commentHeaderLevel b) - --- | Return the level of a header starting a comment or :noexport: tree and --- Nothing otherwise. -commentHeaderLevel :: Block -> Maybe Int -commentHeaderLevel blk = - case blk of - (Header level _ ((Str "COMMENT"):_)) -> Just level - (Header level _ title) | hasNoExportTag title -> Just level - _ -> Nothing - where - hasNoExportTag :: [Inline] -> Bool - hasNoExportTag = any isNoExportTag - - isNoExportTag :: Inline -> Bool - isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True - isNoExportTag _ = False - --- | Drop blocks until a header on or above the given level is seen -dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] -dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) - -isHeaderLevelLowerEq :: Int -> Block -> Bool -isHeaderLevelLowerEq n blk = - case blk of - (Header level _ _) -> n >= level - _ -> False - -- --- Parser State for Org +-- Functions acting on the parser state -- - -type OrgNoteRecord = (String, F Blocks) -type OrgNoteTable = [OrgNoteRecord] - -type OrgBlockAttributes = M.Map String String - -type OrgLinkFormatters = M.Map String (String -> String) - --- | Org-mode parser state -data OrgParserState = OrgParserState - { orgStateOptions :: ReaderOptions - , orgStateAnchorIds :: [String] - , orgStateBlockAttributes :: OrgBlockAttributes - , orgStateEmphasisCharStack :: [Char] - , orgStateEmphasisNewlines :: Maybe Int - , orgStateLastForbiddenCharPos :: Maybe SourcePos - , orgStateLastPreCharPos :: Maybe SourcePos - , orgStateLastStrPos :: Maybe SourcePos - , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMeta :: Meta - , orgStateMeta' :: F Meta - , orgStateNotes' :: OrgNoteTable - , orgStateParserContext :: ParserContext - , orgStateIdentifiers :: Set.Set String - , orgStateHeaderMap :: M.Map Inlines String - } - -instance Default OrgParserLocal where - def = OrgParserLocal NoQuote - -instance HasReaderOptions OrgParserState where - extractReaderOptions = orgStateOptions - -instance HasMeta OrgParserState where - setMeta field val st = - st{ orgStateMeta = setMeta field val $ orgStateMeta st } - deleteMeta field st = - st{ orgStateMeta = deleteMeta field $ orgStateMeta st } - -instance HasLastStrPosition OrgParserState where - getLastStrPos = orgStateLastStrPos - setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } - -instance HasQuoteContext st (Reader OrgParserLocal) where - getQuoteContext = asks orgLocalQuoteContext - withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) - -instance Default OrgParserState where - def = defaultOrgParserState - -defaultOrgParserState :: OrgParserState -defaultOrgParserState = OrgParserState - { orgStateOptions = def - , orgStateAnchorIds = [] - , orgStateBlockAttributes = M.empty - , orgStateEmphasisCharStack = [] - , orgStateEmphasisNewlines = Nothing - , orgStateLastForbiddenCharPos = Nothing - , orgStateLastPreCharPos = Nothing - , orgStateLastStrPos = Nothing - , orgStateLinkFormatters = M.empty - , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta - , orgStateNotes' = [] - , orgStateParserContext = NullState - , orgStateIdentifiers = Set.empty - , orgStateHeaderMap = M.empty - } - recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } @@ -244,44 +121,117 @@ addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. -parseFromString :: OrgParser a -> String -> OrgParser a -parseFromString parser str' = do - oldLastPreCharPos <- orgStateLastPreCharPos <$> getState - updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } - result <- P.parseFromString parser str' - updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } - return result +-- +-- Export Settings +-- +exportSetting :: OrgParser () +exportSetting = choice + [ booleanSetting "^" setExportSubSuperscripts + , ignoredSetting "'" + , ignoredSetting "*" + , ignoredSetting "-" + , ignoredSetting ":" + , ignoredSetting "<" + , ignoredSetting "\\n" + , ignoredSetting "arch" + , ignoredSetting "author" + , ignoredSetting "c" + , ignoredSetting "creator" + , ignoredSetting "d" + , ignoredSetting "date" + , ignoredSetting "e" + , ignoredSetting "email" + , ignoredSetting "f" + , ignoredSetting "H" + , ignoredSetting "inline" + , ignoredSetting "num" + , ignoredSetting "p" + , ignoredSetting "pri" + , ignoredSetting "prop" + , ignoredSetting "stat" + , ignoredSetting "tags" + , ignoredSetting "tasks" + , ignoredSetting "tex" + , ignoredSetting "timestamp" + , ignoredSetting "title" + , ignoredSetting "toc" + , ignoredSetting "todo" + , ignoredSetting "|" + ] <?> "export setting" + +booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser () +booleanSetting settingIdentifier setter = try $ do + string settingIdentifier + char ':' + value <- many nonspaceChar + let boolValue = case value of + "nil" -> False + "{}" -> False + _ -> True + updateState $ modifyExportSettings setter boolValue +ignoredSetting :: String -> OrgParser () +ignoredSetting s = try (() <$ string s <* char ':' <* many nonspaceChar) -- --- Adaptions and specializations of parsing utilities +-- Parser -- +parseOrg :: OrgParser Pandoc +parseOrg = do + blocks' <- parseBlocks + st <- getState + let meta = runF (orgStateMeta' st) st + let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Monad, Applicative, Functor) +-- | Drop COMMENT headers and the document tree below those headers. +dropCommentTrees :: [Block] -> [Block] +dropCommentTrees [] = [] +dropCommentTrees (b:bs) = + maybe (b:dropCommentTrees bs) + (dropCommentTrees . flip dropUntilHeaderAboveLevel bs) + (commentHeaderLevel b) -runF :: F a -> OrgParserState -> a -runF = runReader . unF +-- | Return the level of a header starting a comment or :noexport: tree and +-- Nothing otherwise. +commentHeaderLevel :: Block -> Maybe Int +commentHeaderLevel blk = + case blk of + (Header level _ ((Str "COMMENT"):_)) -> Just level + (Header level _ title) | hasNoExportTag title -> Just level + _ -> Nothing + where + hasNoExportTag :: [Inline] -> Bool + hasNoExportTag = any isNoExportTag -askF :: F OrgParserState -askF = F ask + isNoExportTag :: Inline -> Bool + isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True + isNoExportTag _ = False -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f +-- | Drop blocks until a header on or above the given level is seen +dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] +dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence +isHeaderLevelLowerEq :: Int -> Block -> Bool +isHeaderLevelLowerEq n blk = + case blk of + (Header level _ _) -> n >= level + _ -> False -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines -returnF :: a -> OrgParser (F a) -returnF = return . return +-- +-- Adaptions and specializations of parsing utilities +-- +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char @@ -692,8 +642,9 @@ optionLine :: OrgParser () optionLine = try $ do key <- metaKey case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - _ -> mzero + "link" -> parseLinkFormat >>= uncurry addLinkFormat + "options" -> () <$ sepBy spaces exportSetting + _ -> mzero parseLinkFormat :: OrgParser ((String, String -> String)) parseLinkFormat = try $ do @@ -774,9 +725,13 @@ data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow +-- OrgTable is strongly related to the pandoc table ADT. Using the same +-- (i.e. pandoc-global) ADT would mean that the reader would break if the +-- global structure was to be changed, which would be bad. The final table +-- should be generated using a builder function. Column widths aren't +-- implemented yet, so they are not tracked here. data OrgTable = OrgTable - { orgTableColumns :: Int - , orgTableAlignments :: [Alignment] + { orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] } @@ -792,7 +747,7 @@ table = try $ do orgToPandocTable :: OrgTable -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) caption = +orgToPandocTable (OrgTable aligns heads lns) caption = B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char @@ -803,18 +758,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ - OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells tableAlignCell :: OrgParser Alignment tableAlignCell = @@ -829,65 +785,61 @@ tableAlignCell = where emptyCell = try $ skipSpaces *> endOfCell tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + rowsToTable :: [OrgTableRow] -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable - where zeroTable = OrgTable 0 mempty mempty mempty - -normalizeTable :: OrgTable - -> OrgTable -normalizeTable (OrgTable cols aligns heads lns) = - let aligns' = fillColumns aligns AlignDefault - heads' = if heads == mempty - then mempty - else fillColumns heads (B.plain mempty) - lns' = map (`fillColumns` B.plain mempty) lns - fillColumns base padding = take cols $ base ++ repeat padding - in OrgTable cols aligns' heads' lns' +rowsToTable = foldM rowToContent emptyTable + where emptyTable = OrgTable mempty mempty mempty +normalizeTable :: OrgTable -> OrgTable +normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows + where + refRow = if heads /= mempty + then heads + else if rows == mempty then mempty else head rows + cols = length refRow + fillColumns base padding = take cols $ base ++ repeat padding + aligns' = fillColumns aligns AlignDefault -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTableRow - -> OrgTable - -> F OrgTable -rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - setLongestRow rs =<< appendToBody rs t - -setLongestRow :: [a] - -> OrgTable - -> F OrgTable -setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } - -maybeBodyToHeader :: OrgTable - -> F OrgTable -maybeBodyToHeader t = case t of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t - -appendToBody :: [Blocks] - -> OrgTable +rowToContent :: OrgTable + -> OrgTableRow -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } +rowToContent orgTable row = + case row of + OrgHlineRow -> return singleRowPromotedToHeader + OrgAlignRow as -> return . setAligns $ as + OrgContentRow cs -> appendToBody cs + where + singleRowPromotedToHeader :: OrgTable + singleRowPromotedToHeader = case orgTable of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + orgTable{ orgTableHeader = b , orgTableRows = [] } + _ -> orgTable + + setAligns :: [Alignment] -> OrgTable + setAligns aligns = orgTable{ orgTableAlignments = aligns } -setAligns :: [Alignment] - -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } + appendToBody :: F [Blocks] -> F OrgTable + appendToBody frow = do + newRow <- frow + let oldRows = orgTableRows orgTable + -- NOTE: This is an inefficient O(n) operation. This should be changed + -- if performance ever becomes a problem. + return orgTable{ orgTableRows = oldRows ++ [newRow] } -- @@ -1561,7 +1513,9 @@ subOrSuperExpr = try $ where enclosing (left, right) s = left : s ++ [right] simpleSubOrSuperString :: OrgParser String -simpleSubOrSuperString = try $ +simpleSubOrSuperString = try $ do + state <- getState + guard . exportSubSuperscripts . orgStateExportSettings $ state choice [ string "*" , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum @@ -1581,14 +1535,14 @@ inlineLaTeX = try $ do parseAsMathMLSym :: String -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- dropWhileEnd would be nice here, but it's not available before base 4.5 - where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1 + -- drop initial backslash and any trailing "{}" + where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} - texMathToPandoc inp = (maybeRight $ readTeX inp) >>= - writePandoc DisplayInline + texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -1598,11 +1552,18 @@ inlineLaTeXCommand = try $ do rest <- getInput case runParser rawLaTeXInline def "source" rest of Right (RawInline _ cs) -> do - let len = length cs + -- drop any trailing whitespace, those are not be part of the command as + -- far as org mode is concerned. + let cmdNoSpc = dropWhileEnd isSpace cs + let len = length cmdNoSpc count len anyChar - return cs + return cmdNoSpc _ -> mzero +-- Taken from Data.OldList. +dropWhileEnd :: (a -> Bool) -> [a] -> [a] +dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] + smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs new file mode 100644 index 000000000..49cfa2be2 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -0,0 +1,207 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Options + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Define the Org-mode parser state. +-} +module Text.Pandoc.Readers.Org.ParserState + ( OrgParserState(..) + , OrgParserLocal(..) + , OrgNoteRecord + , F(..) + , askF + , asksF + , trimInlinesF + , runF + , returnF + , ExportSettingSetter + , exportSubSuperscripts + , setExportSubSuperscripts + , modifyExportSettings + ) where + +import Control.Monad (liftM, liftM2) +import Control.Monad.Reader (Reader, runReader, ask, asks, local) + +import Data.Default (Default(..)) +import qualified Data.Map as M +import qualified Data.Set as Set + +import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), + trimInlines ) +import Text.Pandoc.Definition ( Meta(..), nullMeta ) +import Text.Pandoc.Options ( ReaderOptions(..) ) +import Text.Pandoc.Parsing ( HasHeaderMap(..) + , HasIdentifierList(..) + , HasLastStrPosition(..) + , HasQuoteContext(..) + , HasReaderOptions(..) + , ParserContext(..) + , QuoteContext(..) + , SourcePos ) + +-- | An inline note / footnote containing the note key and its (inline) value. +type OrgNoteRecord = (String, F Blocks) +-- | Table of footnotes +type OrgNoteTable = [OrgNoteRecord] +-- | Map of org block attributes (e.g. LABEL, CAPTION, NAME, etc) +type OrgBlockAttributes = M.Map String String +-- | Map of functions for link transformations. The map key is refers to the +-- link-type, the corresponding function transforms the given link string. +type OrgLinkFormatters = M.Map String (String -> String) + +-- | Export settings <http://orgmode.org/manual/Export-settings.html> +-- These settings can be changed via OPTIONS statements. +data ExportSettings = ExportSettings + { exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts + } + +-- | Org-mode parser state +data OrgParserState = OrgParserState + { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] + , orgStateBlockAttributes :: OrgBlockAttributes + , orgStateEmphasisCharStack :: [Char] + , orgStateEmphasisNewlines :: Maybe Int + , orgStateExportSettings :: ExportSettings + , orgStateLastForbiddenCharPos :: Maybe SourcePos + , orgStateLastPreCharPos :: Maybe SourcePos + , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters + , orgStateMeta :: Meta + , orgStateMeta' :: F Meta + , orgStateNotes' :: OrgNoteTable + , orgStateParserContext :: ParserContext + , orgStateIdentifiers :: Set.Set String + , orgStateHeaderMap :: M.Map Inlines String + } + +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } + +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote + +instance HasReaderOptions OrgParserState where + extractReaderOptions = orgStateOptions + +instance HasMeta OrgParserState where + setMeta field val st = + st{ orgStateMeta = setMeta field val $ orgStateMeta st } + deleteMeta field st = + st{ orgStateMeta = deleteMeta field $ orgStateMeta st } + +instance HasLastStrPosition OrgParserState where + getLastStrPos = orgStateLastStrPos + setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } + +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + +instance HasIdentifierList OrgParserState where + extractIdentifierList = orgStateIdentifiers + updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } + +instance HasHeaderMap OrgParserState where + extractHeaderMap = orgStateHeaderMap + updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } + +instance Default ExportSettings where + def = defaultExportSettings + +instance Default OrgParserState where + def = defaultOrgParserState + +defaultOrgParserState :: OrgParserState +defaultOrgParserState = OrgParserState + { orgStateOptions = def + , orgStateAnchorIds = [] + , orgStateBlockAttributes = M.empty + , orgStateEmphasisCharStack = [] + , orgStateEmphasisNewlines = Nothing + , orgStateExportSettings = def + , orgStateLastForbiddenCharPos = Nothing + , orgStateLastPreCharPos = Nothing + , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty + , orgStateMeta = nullMeta + , orgStateMeta' = return nullMeta + , orgStateNotes' = [] + , orgStateParserContext = NullState + , orgStateIdentifiers = Set.empty + , orgStateHeaderMap = M.empty + } + +defaultExportSettings :: ExportSettings +defaultExportSettings = ExportSettings + { exportSubSuperscripts = True + } + + +-- +-- Setter for exporting options +-- +type ExportSettingSetter a = a -> ExportSettings -> ExportSettings + +setExportSubSuperscripts :: ExportSettingSetter Bool +setExportSubSuperscripts val es = es { exportSubSuperscripts = val } + +-- | Modify a parser state +modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParserState +modifyExportSettings setter val state = + state { orgStateExportSettings = setter val . orgStateExportSettings $ state } + +-- +-- Parser state reader +-- + +-- | Reader monad wrapping the parser state. This is used to delay evaluation +-- until all relevant information has been parsed and made available in the +-- parser state. See also the newtype of the same name in +-- Text.Pandoc.Parsing. +newtype F a = F { unF :: Reader OrgParserState a + } deriving (Functor, Applicative, Monad) + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = fmap mconcat . sequence + +runF :: F a -> OrgParserState -> a +runF = runReader . unF + +askF :: F OrgParserState +askF = F ask + +asksF :: (OrgParserState -> a) -> F a +asksF f = F $ asks f + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines + +returnF :: Monad m => a -> m (F a) +returnF = return . return diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2aaebf99f..9acfe289a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -112,10 +112,15 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = else elements tag = case lvl of n | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "sect" ++ show n + | n >= 1 && n <= 5 -> if writerDocbook5 opts + then "section" + else "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | - not (null id')] $ + idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook")] + else [] + attribs = nsAttr ++ idAttr + in inTags True tag attribs $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') @@ -227,9 +232,11 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = blockToDocbook opts (DefinitionList lst) = let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock f str) +blockToDocbook opts (RawBlock f str) | f == "docbook" = text str -- raw XML block - | f == "html" = text str -- allow html for backwards compatibility + | f == "html" = if writerDocbook5 opts + then empty -- No html in Docbook5 + else text str -- allow html for backwards compatibility | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = @@ -344,7 +351,9 @@ inlineToDocbook opts (Link attr txt (src, _)) | otherwise = (if isPrefixOf "#" src then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ + else if writerDocbook5 opts + then inTags False "link" $ ("xlink:href", src) : idAndRole attr + else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ inlinesToDocbook opts txt inlineToDocbook opts (Image attr _ (src, tit)) = let titleDoc = if null tit diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 283c8bc44..9284d18ee 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -39,7 +39,8 @@ import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) import Data.Aeson (object, (.=), FromJSON) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy ) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, + nub, nubBy, foldl' ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord, isAlphaNum ) import Data.Maybe ( fromMaybe, isJust, catMaybes ) @@ -674,7 +675,8 @@ tableCellToLaTeX header (width, align, blocks) = do AlignDefault -> "\\raggedright" return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") $$ + (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <> + "\\end{minipage}") $$ notesToLaTeX notes notesToLaTeX :: [Doc] -> Doc @@ -725,7 +727,7 @@ sectionHeader :: Bool -- True for unnumbered -> State WriterState Doc sectionHeader unnumbered ident level lst = do txt <- inlineListToLaTeX lst - plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst + plain <- stringToLaTeX TextString $ concatMap stringify lst let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -1037,7 +1039,7 @@ citationsToNatbib (c:cs) | citationMode c == AuthorInText = do citationsToNatbib cits = do cits' <- mapM convertOne cits - return $ text "\\citetext{" <> foldl combineTwo empty cits' <> text "}" + return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" where combineTwo a b | isEmpty a = b | otherwise = a <> text "; " <> b @@ -1086,7 +1088,7 @@ citationsToBiblatex (one:[]) citationsToBiblatex (c:cs) = do args <- mapM convertOne (c:cs) - return $ text cmd <> foldl (<>) empty args + return $ text cmd <> foldl' (<>) empty args where cmd = case citationMode c of AuthorInText -> "\\textcites" @@ -1305,4 +1307,3 @@ pDocumentClass = else do P.skipMany (P.satisfy (/='{')) P.char '{' P.manyTill P.letter (P.char '}') - diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 20086ed19..e57a6fc11 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -170,7 +170,7 @@ blockToOrg (Table caption' _ _ headers rows) = do map ((+2) . numChars) $ transpose (headers' : rawRows) -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (map height blocks) + where h = maximum (1 : map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") end = lblock 2 $ vcat (map text $ replicate h " |") |