diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 45 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 81 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ExportSettings.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Meta.hs | 181 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 6 |
7 files changed, 233 insertions, 96 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2bc17c069..b51572783 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings #-} +{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-} {- -Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx - Copyright : Copyright (C) 2014 Jesse Rosenthal + Copyright : Copyright (C) 2014-2016 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -83,7 +83,7 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Combine import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Data.List (delete, (\\), intersect) +import Data.List (delete, intersect) import Text.TeXMath (writeTeX) import Data.Default (Default) import qualified Data.ByteString.Lazy as B @@ -93,6 +93,9 @@ import Control.Monad.Reader import Control.Monad.State import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +#if !(MIN_VERSION_base(4,8,0)) +import Data.Traversable (traverse) +#endif import Text.Pandoc.Error import Text.Pandoc.Compat.Except @@ -412,39 +415,39 @@ parPartToInlines (PlainOMath exps) = do return $ math $ writeTeX exps isAnchorSpan :: Inline -> Bool -isAnchorSpan (Span (_, classes, kvs) ils) = +isAnchorSpan (Span (_, classes, kvs) _) = classes == ["anchor"] && - null kvs && - null ils + null kvs isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: Blocks -> DocxContext Blocks -makeHeaderAnchor bs = case viewl $ unMany bs of - (x :< xs) -> do - x' <- (makeHeaderAnchor' x) - xs' <- (makeHeaderAnchor $ Many xs) - return $ (singleton x') <> xs' - EmptyL -> return mempty +makeHeaderAnchor bs = traverse makeHeaderAnchor' bs makeHeaderAnchor' :: Block -> DocxContext Block -- If there is an anchor already there (an anchor span in the header, -- to be exact), we rename and associate the new id with the old one. -makeHeaderAnchor' (Header n (_, classes, kvs) ils) - | (c:cs) <- filter isAnchorSpan ils - , (Span (ident, ["anchor"], _) _) <- c = do +makeHeaderAnchor' (Header n (ident, classes, kvs) ils) + | (c:_) <- filter isAnchorSpan ils + , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap - let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) - modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} - return $ Header n (newIdent, classes, kvs) (ils \\ (c:cs)) + let newIdent = if null ident + then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + else ident + newIls = concatMap f ils where f il | il == c = cIls + | otherwise = [il] + modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap} + makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls -- Otherwise we just give it a name, and register that name (associate -- it with itself.) -makeHeaderAnchor' (Header n (_, classes, kvs) ils) = +makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap - let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + let newIdent = if null ident + then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor' blk = return blk diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index c265ad074..395a53907 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Lists - Copyright : Copyright (C) 2014 Jesse Rosenthal + Copyright : Copyright (C) 2014-2016 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 9ae7f22ec..3efdb70a8 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,7 +1,7 @@ {-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-} {- -Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014 Jesse Rosenthal + Copyright : Copyright (C) 2014-2016 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 6a8bb8b28..f5873d55f 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -34,8 +34,8 @@ module Text.Pandoc.Readers.Org.Blocks ) where import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.Meta ( metaExport, metaLine ) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared @@ -52,9 +52,7 @@ import Text.Pandoc.Shared ( compactify', compactify'DL ) import Control.Monad ( foldM, guard, mzero, void ) import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) -import qualified Data.Map as M import Data.Maybe ( fromMaybe, isNothing ) -import Network.HTTP ( urlEncode ) -- -- Org headers @@ -82,6 +80,10 @@ newtype PropertyValue = PropertyValue { fromValue :: String } toPropertyValue :: String -> PropertyValue toPropertyValue = PropertyValue +-- | Check whether the property value is non-nil (i.e. truish). +isNonNil :: PropertyValue -> Bool +isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] + -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -202,12 +204,16 @@ propertiesToAttr properties = toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" + unnumberedKey = toPropertyKey "unnumbered" + specialProperties = [customIdKey, classKey, unnumberedKey] id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` [customIdKey, classKey]) . fst) + kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) $ properties + isUnnumbered = + fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties in - (id', words cls, kvs') + (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') tagTitle :: Inlines -> [Tag] -> Inlines tagTitle title tags = title <> (mconcat $ map tagToInline tags) @@ -232,8 +238,8 @@ blockList = do -- | Get the meta information safed in the state. meta :: OrgParser Meta meta = do - st <- getState - return $ runF (orgStateMeta st) st + meta' <- metaExport + runF meta' <$> getState blocks :: OrgParser (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) @@ -631,67 +637,9 @@ exampleCode = B.codeBlockWith ("", ["example"], []) specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine --- The order, in which blocks are tried, makes sure that we're not looking at --- the beginning of a block, so we don't need to check for it -metaLine :: OrgParser Blocks -metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) - commentLine :: OrgParser Blocks commentLine = commentLineStart *> anyLine *> pure mempty -declarationLine :: OrgParser () -declarationLine = try $ do - key <- metaKey - value <- metaInlines - updateState $ \st -> - let meta' = B.setMeta key <$> value <*> pure nullMeta - in st { orgStateMeta = orgStateMeta st <> meta' } - -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - -metaKey :: OrgParser String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces - -optionLine :: OrgParser () -optionLine = try $ do - key <- metaKey - case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - "options" -> exportSettings - _ -> mzero - -addLinkFormat :: String - -> (String -> String) - -> OrgParser () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - -parseLinkFormat :: OrgParser ((String, String -> String)) -parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces - linkSubst <- parseFormat - return (linkType, linkSubst) - --- | An ad-hoc, single-argument-only implementation of a printf-style format --- parser. -parseFormat :: OrgParser (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend - where - -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) - <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) - <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest - - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) - -- -- Tables @@ -868,9 +816,6 @@ paraOrPlain = try $ do *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline - -- -- list blocks diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index b48acc9c4..283cfa998 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -54,13 +54,15 @@ exportSetting = choice , ignoredSetting "<" , ignoredSetting "\\n" , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) - , ignoredSetting "author" + , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) , ignoredSetting "c" - , ignoredSetting "creator" + -- org-mode allows the special value `comment` for creator, which we'll + -- interpret as true as it doesn't make sense in the context of Pandoc. + , booleanSetting "creator" (\val es -> es { exportWithCreator = val }) , complementableListSetting "d" (\val es -> es { exportDrawers = val }) , ignoredSetting "date" , ignoredSetting "e" - , ignoredSetting "email" + , booleanSetting "email" (\val es -> es { exportWithEmail = val }) , ignoredSetting "f" , integerSetting "H" (\val es -> es { exportHeadlineLevels = val }) , ignoredSetting "inline" diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs new file mode 100644 index 000000000..11eb18e36 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Meta + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Parsers for Org-mode meta declarations. +-} +module Text.Pandoc.Readers.Org.Meta + ( metaLine + , metaExport + ) where + +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder ( Blocks, Inlines ) +import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) + +import Control.Monad ( mzero ) +import Data.Char ( toLower ) +import Data.List ( intersperse ) +import qualified Data.Map as M +import Network.HTTP ( urlEncode ) + +-- | Returns the current meta, respecting export options. +metaExport :: OrgParser (F Meta) +metaExport = do + st <- getState + let settings = orgStateExportSettings st + return $ (if exportWithAuthor settings then id else removeMeta "author") + . (if exportWithCreator settings then id else removeMeta "creator") + . (if exportWithEmail settings then id else removeMeta "email") + <$> orgStateMeta st + +removeMeta :: String -> Meta -> Meta +removeMeta key meta' = + let metaMap = unMeta meta' + in Meta $ M.delete key metaMap + +-- | Parse and handle a single line containing meta information +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLine :: OrgParser Blocks +metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) + +declarationLine :: OrgParser () +declarationLine = try $ do + key <- map toLower <$> metaKey + (key', value) <- metaValue key + updateState $ \st -> + let meta' = B.setMeta key' <$> value <*> pure nullMeta + in st { orgStateMeta = meta' <> orgStateMeta st } + +metaKey :: OrgParser String +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +metaValue :: String -> OrgParser (String, (F MetaValue)) +metaValue key = + let inclKey = "header-includes" + in case key of + "author" -> (key,) <$> metaInlinesCommaSeparated + "title" -> (key,) <$> metaInlines + "date" -> (key,) <$> metaInlines + "header-includes" -> (key,) <$> accumulatingList key metaInlines + "latex_header" -> (inclKey,) <$> + accumulatingList inclKey (metaExportSnippet "latex") + "latex_class" -> ("documentclass",) <$> metaString + -- Org-mode expects class options to contain the surrounding brackets, + -- pandoc does not. + "latex_class_options" -> ("classoption",) <$> + metaModifiedString (filter (`notElem` "[]")) + "html_head" -> (inclKey,) <$> + accumulatingList inclKey (metaExportSnippet "html") + _ -> (key,) <$> metaString + +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline + +metaInlinesCommaSeparated :: OrgParser (F MetaValue) +metaInlinesCommaSeparated = do + authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + newline + authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs + let toMetaInlines = MetaInlines . B.toList + return $ MetaList . map toMetaInlines <$> sequence authors + +metaString :: OrgParser (F MetaValue) +metaString = metaModifiedString id + +metaModifiedString :: (String -> String) -> OrgParser (F MetaValue) +metaModifiedString f = return . MetaString . f <$> anyLine + +-- | Read an format specific meta definition +metaExportSnippet :: String -> OrgParser (F MetaValue) +metaExportSnippet format = + return . MetaInlines . B.toList . B.rawInline format <$> anyLine + +-- | Accumulate the result of the @parser@ in a list under @key@. +accumulatingList :: String + -> OrgParser (F MetaValue) + -> OrgParser (F MetaValue) +accumulatingList key p = do + value <- p + meta' <- orgStateMeta <$> getState + return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value + where curList m = case lookupMeta key m of + Just (MetaList ms) -> ms + Just x -> [x] + _ -> [] + +-- +-- export options +-- +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + "options" -> exportSettings + _ -> mzero + +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 48e7717cd..84dbe9d33 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -163,6 +163,9 @@ data ExportSettings = ExportSettings , exportSmartQuotes :: Bool -- ^ Parse quotes smartly , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts + , exportWithAuthor :: Bool -- ^ Include author in final meta-data + , exportWithCreator :: Bool -- ^ Include creator in final meta-data + , exportWithEmail :: Bool -- ^ Include email in final meta-data } instance Default ExportSettings where @@ -177,6 +180,9 @@ defaultExportSettings = ExportSettings , exportSmartQuotes = True , exportSpecialStrings = True , exportSubSuperscripts = True + , exportWithAuthor = True + , exportWithCreator = True + , exportWithEmail = True } |