diff options
author | John MacFarlane <jgm@berkeley.edu> | 2016-06-03 09:39:32 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2016-06-03 09:39:32 -0700 |
commit | 2b2769975fea76337a5afd9e985a4de43c7276a0 (patch) | |
tree | 1657d1b7920d2854fe3623500bc448323319996d /src/Text/Pandoc/Readers | |
parent | 9eb3dba243cf88e6e4d852161a308e815ef626d7 (diff) | |
parent | f56792927fbcebfd25593c488476fee69d078b48 (diff) | |
download | pandoc-2b2769975fea76337a5afd9e985a4de43c7276a0.tar.gz |
Merge pull request #2962 from tarleb/org-more-export-settings
Org reader: support more export settings
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 41 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 63 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 78 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Shared.hs | 76 |
6 files changed, 174 insertions, 103 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1042b5a21..d593f856d 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,7 +29,7 @@ module Text.Pandoc.Readers.Org ( readOrg ) where import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) -import Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) ) +import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState ) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -42,7 +42,8 @@ import Control.Monad.Reader ( runReader ) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Either PandocError Pandoc -readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = flip runReader def $ + readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") -- -- Parser diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 36645a356..75e564f2f 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -35,6 +35,9 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared + ( isImageFilename, rundocBlockClass, toRundocAttrib + , translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) @@ -43,7 +46,6 @@ import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import Text.Pandoc.Shared ( compactify', compactify'DL ) -import Control.Arrow ( first ) import Control.Monad ( foldM, guard, mzero ) import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) @@ -67,7 +69,7 @@ blockList = do meta :: OrgParser Meta meta = do st <- getState - return $ runF (orgStateMeta' st) st + return $ runF (orgStateMeta st) st blocks :: OrgParser (F Blocks) blocks = mconcat <$> manyTill block eof @@ -314,7 +316,6 @@ codeHeaderArgs = try $ do else ([ pandocLang ], parameters) where hasRundocParameters = not . null - toRundocAttrib = first ("rundoc-" ++) switch :: OrgParser (Char, Maybe String) switch = try $ simpleSwitch <|> lineNumbersSwitch @@ -323,25 +324,6 @@ switch = try $ simpleSwitch <|> lineNumbersSwitch lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> (string "-l \"" *> many1Till nonspaceChar (char '"')) -translateLang :: String -> String -translateLang "C" = "c" -translateLang "C++" = "cpp" -translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported -translateLang "js" = "javascript" -translateLang "lisp" = "commonlisp" -translateLang "R" = "r" -translateLang "sh" = "bash" -translateLang "sqlite" = "sql" -translateLang cs = cs - --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - blockOption :: OrgParser (String, String) blockOption = try $ do argKey <- orgArgKey @@ -480,12 +462,11 @@ commentLine = commentLineStart *> anyLine *> pure mempty declarationLine :: OrgParser () declarationLine = try $ do - key <- metaKey - inlinesF <- metaInlines + key <- metaKey + value <- metaInlines updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } - return () + let meta' = B.setMeta key <$> value <*> pure nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -519,9 +500,9 @@ addLinkFormat key formatter = updateState $ \s -> exportSetting :: OrgParser () exportSetting = choice [ booleanSetting "^" setExportSubSuperscripts - , ignoredSetting "'" - , ignoredSetting "*" - , ignoredSetting "-" + , booleanSetting "'" setExportSmartQuotes + , booleanSetting "*" setExportEmphasizedText + , booleanSetting "-" setExportSpecialStrings , ignoredSetting ":" , ignoredSetting "<" , ignoredSetting "\\n" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6971ca3c6..001aeb569 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -30,13 +30,15 @@ module Text.Pandoc.Readers.Org.Inlines ( inline , inlines , addToNotesTable - , isImageFilename , linkTarget ) where import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared + ( isImageFilename, rundocBlockClass, toRundocAttrib + , translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines ) @@ -47,35 +49,12 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Arrow ( first ) import Control.Monad ( guard, mplus, mzero, when ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( isPrefixOf, isSuffixOf ) +import Data.List ( isPrefixOf ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - -translateLang :: String -> String -translateLang "C" = "c" -translateLang "C++" = "cpp" -translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported -translateLang "js" = "javascript" -translateLang "lisp" = "commonlisp" -translateLang "R" = "r" -translateLang "sh" = "bash" -translateLang "sqlite" = "sql" -translateLang cs = cs - -- -- Functions acting on the parser state -- @@ -129,10 +108,7 @@ inline = , inlineCodeBlock , str , endline - , emph - , strong - , strikeout - , underline + , emphasizedText , code , math , displayMath @@ -405,15 +381,6 @@ cleanLinkString s = in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme && not (null path) -isImageFilename :: String -> Bool -isImageFilename filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename) - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] - internalLink :: String -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds @@ -473,6 +440,16 @@ inlineCodeBlock = try $ do <* skipSpaces +emphasizedText :: OrgParser (F Inlines) +emphasizedText = do + state <- getState + guard . exportEmphasizedText . orgStateExportSettings $ state + try $ choice + [ emph + , strong + , strikeout + , underline + ] enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char @@ -751,8 +728,12 @@ smart = do doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) where - orgDash = dash <* updatePositions '-' - orgEllipses = ellipses <* updatePositions '.' + orgDash = do + guard =<< getExportSetting exportSpecialStrings + dash <* updatePositions '-' + orgEllipses = do + guard =<< getExportSetting exportSpecialStrings + ellipses <* updatePositions '.' orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos @@ -760,6 +741,7 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do + guard =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -771,6 +753,7 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do + guard =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index e648a883e..0c58183f9 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -42,9 +42,13 @@ module Text.Pandoc.Readers.Org.ParserState , returnF , ExportSettingSetter , ExportSettings (..) - , setExportSubSuperscripts , setExportDrawers + , setExportEmphasizedText + , setExportSmartQuotes + , setExportSpecialStrings + , setExportSubSuperscripts , modifyExportSettings + , optionsToParserState ) where import Control.Monad (liftM, liftM2) @@ -54,8 +58,7 @@ import Data.Default (Default(..)) import qualified Data.Map as M import qualified Data.Set as Set -import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), - trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) import Text.Pandoc.Definition ( Meta(..), nullMeta ) import Text.Pandoc.Options ( ReaderOptions(..) ) import Text.Pandoc.Parsing ( HasHeaderMap(..) @@ -78,30 +81,32 @@ type OrgLinkFormatters = M.Map String (String -> String) -- | Export settings <http://orgmode.org/manual/Export-settings.html> -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings - { exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts - , exportDrawers :: Either [String] [String] + { exportDrawers :: Either [String] [String] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. + , exportEmphasizedText :: Bool -- ^ Parse emphasized text + , exportSmartQuotes :: Bool -- ^ Parse quotes smartly + , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly + , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts } -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateOptions :: ReaderOptions - , orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [String] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateExportSettings :: ExportSettings + , orgStateHeaderMap :: M.Map Inlines String + , orgStateIdentifiers :: Set.Set String , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMeta :: Meta - , orgStateMeta' :: F Meta + , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateOptions :: ReaderOptions , orgStateParserContext :: ParserContext - , orgStateIdentifiers :: Set.Set String - , orgStateHeaderMap :: M.Map Inlines String } data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } @@ -112,12 +117,6 @@ instance Default OrgParserLocal where instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions -instance HasMeta OrgParserState where - setMeta field val st = - st{ orgStateMeta = setMeta field val $ orgStateMeta st } - deleteMeta field st = - st{ orgStateMeta = deleteMeta field $ orgStateMeta st } - instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } @@ -142,45 +141,64 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState - { orgStateOptions = def - , orgStateAnchorIds = [] + { orgStateAnchorIds = [] , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def + , orgStateHeaderMap = M.empty + , orgStateIdentifiers = Set.empty , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty - , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta + , orgStateMeta = return nullMeta , orgStateNotes' = [] + , orgStateOptions = def , orgStateParserContext = NullState - , orgStateIdentifiers = Set.empty - , orgStateHeaderMap = M.empty } defaultExportSettings :: ExportSettings defaultExportSettings = ExportSettings - { exportSubSuperscripts = True - , exportDrawers = Left ["LOGBOOK"] + { exportDrawers = Left ["LOGBOOK"] + , exportEmphasizedText = True + , exportSmartQuotes = True + , exportSpecialStrings = True + , exportSubSuperscripts = True } +optionsToParserState :: ReaderOptions -> OrgParserState +optionsToParserState opts = + def { orgStateOptions = opts } + -- -- Setter for exporting options -- type ExportSettingSetter a = a -> ExportSettings -> ExportSettings --- | Set export options for sub/superscript parsing. The short syntax will --- not be parsed if this is set set to @False@. -setExportSubSuperscripts :: ExportSettingSetter Bool -setExportSubSuperscripts val es = es { exportSubSuperscripts = val } - -- | Set export options for drawers. See the @exportDrawers@ in ADT -- @ExportSettings@ for details. setExportDrawers :: ExportSettingSetter (Either [String] [String]) setExportDrawers val es = es { exportDrawers = val } +-- | Set export options for emphasis parsing. +setExportEmphasizedText :: ExportSettingSetter Bool +setExportEmphasizedText val es = es { exportEmphasizedText = val } + +-- | Set export options for parsing of smart quotes. +setExportSmartQuotes :: ExportSettingSetter Bool +setExportSmartQuotes val es = es { exportSmartQuotes = val } + +-- | Set export options for parsing of special strings (like em/en dashes or +-- ellipses). +setExportSpecialStrings :: ExportSettingSetter Bool +setExportSpecialStrings val es = es { exportSpecialStrings = val } + +-- | Set export options for sub/superscript parsing. The short syntax will +-- not be parsed if this is set set to @False@. +setExportSubSuperscripts :: ExportSettingSetter Bool +setExportSubSuperscripts val es = es { exportSubSuperscripts = val } + -- | Modify a parser state modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParserState modifyExportSettings setter val state = diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 0b6b876d8..8cf0c696c 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -37,6 +37,7 @@ module Text.Pandoc.Readers.Org.Parsing , skipSpaces1 , inList , withContext + , getExportSetting , updateLastForbiddenCharPos , updateLastPreCharPos , orgArgKey @@ -174,9 +175,13 @@ withContext context parser = do return result -- --- Parser state update functions +-- Parser state functions -- +-- | Get an export setting. +getExportSetting :: (ExportSettings -> a) -> OrgParser a +getExportSetting s = s . orgStateExportSettings <$> getState + -- | Set the current position as the last position at which a forbidden char -- was found (i.e. a character which is not allowed at the inner border of -- markup). @@ -190,13 +195,20 @@ updateLastPreCharPos :: OrgParser () updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} +-- +-- Org key-value parsing +-- + +-- | Read the key of a plist style key-value list. orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar +-- | Read the value of a plist style key-value list. orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Chars treated as part of a word in plists. orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs new file mode 100644 index 000000000..3ba46b9e4 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Options + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Utility functions used in other Pandoc Org modules. +-} +module Text.Pandoc.Readers.Org.Shared + ( isImageFilename + , rundocBlockClass + , toRundocAttrib + , translateLang + ) where + +import Control.Arrow ( first ) +import Data.List ( isPrefixOf, isSuffixOf ) + + +-- | Check whether the given string looks like the path to of URL of an image. +isImageFilename :: String -> Bool +isImageFilename filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + (any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename) + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] + +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +-- | Prefix the name of a attribute, marking it as a code execution parameter. +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first (rundocPrefix ++) + +-- | Translate from Org-mode's programming language identifiers to those used +-- by Pandoc. This is useful to allow for proper syntax highlighting in +-- Pandoc output. +translateLang :: String -> String +translateLang cs = + case cs of + "C" -> "c" + "C++" -> "cpp" + "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported + "js" -> "javascript" + "lisp" -> "commonlisp" + "R" -> "r" + "sh" -> "bash" + "sqlite" -> "sql" + _ -> cs |