diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 9 |
11 files changed, 66 insertions, 38 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index a4d963221..d7311d978 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -320,14 +320,14 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations] +getDefaultExtensions "org" = Set.fromList [Ext_citations, + Ext_auto_identifiers] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, Ext_native_divs, Ext_native_spans] getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers, - Ext_raw_html, +getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, Ext_native_divs, Ext_native_spans, Ext_epub_html_exts] diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3cc2a4479..352b94496 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -194,7 +194,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] indexterm - A wrapper for terms to be indexed [x] info - A wrapper for information about a component or other block. (DocBook v5) [x] informalequation - A displayed mathematical equation without a title -[ ] informalexample - A displayed example without a title +[x] informalexample - A displayed example without a title [ ] informalfigure - A untitled figure [ ] informaltable - A table without a title [ ] initializer - The initializer for a FieldSynopsis @@ -611,6 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", + "informalexample", "screen","programlisting","example","calloutlist"] isBlockElement _ = False @@ -766,6 +767,8 @@ parseBlock (Elem e) = "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable + "informalexample" -> divWith ("", ["informalexample"], []) <$> + getBlocks e "literallayout" -> codeBlockWithLang "screen" -> codeBlockWithLang "programlisting" -> codeBlockWithLang diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 338540533..aefc32e0e 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -181,7 +181,6 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = (walk $ renameImages root) - . (walk normalisePath) . (walk $ fixBlockIRs filename) . (walk $ fixInlineIRs filename) where @@ -196,12 +195,6 @@ fixInlineIRs s (Link t ('#':url, tit)) = Link t (addHash s url, tit) fixInlineIRs _ v = v -normalisePath :: Inline -> Inline -normalisePath (Link t (url, tit)) = - let (path, uid) = span (/= '#') url in - Link t (takeFileName path ++ uid, tit) -normalisePath s = s - prependHash :: [String] -> Inline -> Inline prependHash ps l@(Link is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = @@ -223,7 +216,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP addHash :: String -> String -> String addHash _ "" = "" -addHash s ident = s ++ "#" ++ ident +addHash s ident = takeFileName s ++ "#" ++ ident removeEPUBAttrs :: [(String, String)] -> [(String, String)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 02bfcb2bb..b32264d61 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk +import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) @@ -75,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) - "source" tags + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -101,7 +103,9 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String + baseHref :: Maybe String, + identifiers :: [String], + headerMap :: M.Map Inlines String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -398,9 +402,10 @@ pHeader = try $ do let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] + attr' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith (ident, classes, keyvals) level contents + else B.headerWith attr' level contents pHrule :: TagParser Blocks pHrule = do @@ -983,6 +988,14 @@ isSpace _ = False -- Instances +instance HasIdentifierList HTMLState where + extractIdentifierList = identifiers + updateIdentifierList f s = s{ identifiers = f (identifiers s) } + +instance HasHeaderMap HTMLState where + extractHeaderMap = headerMap + updateHeaderMap f s = s{ headerMap = f (headerMap s) } + -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m instance HasQuoteContext st (Reader HTMLLocal) where @@ -992,9 +1005,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState -instance Default HTMLState where - def = HTMLState def [] Nothing - instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} deleteMeta s st = st {parserState = deleteMeta s $ parserState st} diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0da912ea6..16d387dc4 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -524,6 +524,7 @@ inlineCommands = M.fromList $ , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) + , ("Cite", citation "cite" AuthorInText False) , ("citep", citation "citep" NormalCitation False) , ("citep*", citation "citep*" NormalCitation False) , ("citeal", citation "citeal" NormalCitation False) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 2a5adab22..b23b44544 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -252,8 +252,8 @@ parseAttr = try $ do skipMany spaceChar k <- many1 letter char '=' - char '"' - v <- many1Till (satisfy (/='\n')) (char '"') + v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) + <|> many1 nonspaceChar return (k,v) tableStart :: MWParser () diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 980f63504..55ac92bcb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -70,6 +70,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +instance HasIdentifierList OrgParserState where + extractIdentifierList = orgStateIdentifiers + updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } + +instance HasHeaderMap OrgParserState where + extractHeaderMap = orgStateHeaderMap + updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } + parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks @@ -135,6 +143,8 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateIdentifiers :: [String] + , orgStateHeaderMap :: M.Map Inlines String } instance Default OrgParserLocal where @@ -174,6 +184,8 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateIdentifiers = [] + , orgStateHeaderMap = M.empty } recordAnchorId :: String -> OrgParser () @@ -668,7 +680,10 @@ header = try $ do title <- manyTill inline (lookAhead headerEnd) tags <- headerEnd let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags - return $ B.header level <$> inlns + st <- getState + let inlines = runF inlns st + attr <- registerHeader nullAttr inlines + return $ pure (B.headerWith attr level inlines) where tagToInlineF :: String -> F Inlines tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 678eecc52..f9663b19a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -614,20 +614,22 @@ directive' = do return mempty -- TODO: --- - Silently ignores illegal fields -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - let (baseRole, baseFmt, baseAttr) = - maybe (parentRole, Nothing, nullAttr) id $ - M.lookup parentRole customRoles + let getBaseRole (r, f, a) roles = + case M.lookup r roles of + Just (r', f', a') -> getBaseRole (r', f', a') roles + Nothing -> (r, f, a) + (baseRole, baseFmt, baseAttr) = + getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt annotate :: [String] -> [String] annotate = maybe id (:) $ - if parentRole == "code" + if baseRole == "code" then lookup "language" fields else Nothing attr = let (ident, classes, keyValues) = baseAttr @@ -636,12 +638,12 @@ addNewRole roleString fields = do -- warn about syntax we ignore flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "language" -> when (baseRole /= "code") $ addWarning Nothing $ "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :code:" - "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + role ++ ": is :" ++ baseRole ++ ": not :code:" + "format" -> when (baseRole /= "raw") $ addWarning Nothing $ "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :raw:" + role ++ ": is :" ++ baseRole ++ ": not :raw:" _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ ": in definition of role :" ++ role ++ ": in" when (parentRole == "raw" && countKeys "format" > 1) $ diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 07a7e962c..c44133e12 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -540,6 +540,7 @@ stringify = query go . walk deNote go (Str x) = x go (Code _ x) = x go (Math _ x) = x + go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 go LineBreak = " " go _ = "" deNote (Note _) = Str "" diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index da4c78cef..0cb313e7b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -181,8 +181,8 @@ renumIds f renumMap = map (renumId f renumMap) -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: Pandoc -> Pandoc -stripInvalidChars = bottomUp (filter isValidChar) +stripInvalidChars :: String -> String +stripInvalidChars = filter isValidChar -- | See XML reference isValidChar :: Char -> Bool @@ -208,7 +208,7 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = stripInvalidChars . walk fixDisplayMath $ doc + let doc' = walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime distArchive <- getDefaultReferenceDocx Nothing @@ -974,7 +974,7 @@ formattedString str = do return [ mknode "w:r" [] $ props ++ [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] str ] ] + [("xml:space","preserve")] (stripInvalidChars str) ] ] setFirstPara :: WS () setFirstPara = modify $ \s -> s { stFirstPara = True } diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 151d3c2ae..fae908f30 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -333,7 +333,8 @@ blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc inlineListToRST lst = - mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= + return . hcat where -- remove spaces after displaymath, as they screw up indentation: removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = Math DisplayMath x : dropWhile (==Space) zs @@ -341,8 +342,8 @@ inlineListToRST lst = removeSpaceAfterDisplayMath [] = [] insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) - | isComplex y && surroundComplex x z = - x : y : RawInline "rst" "\\ " : insertBS (z:zs) + | isComplex y && (surroundComplex x z) = + x : y : insertBS (z : zs) insertBS (x:y:zs) | isComplex x && not (okAfterComplex y) = x : RawInline "rst" "\\ " : insertBS (y : zs) @@ -383,6 +384,8 @@ inlineListToRST lst = isComplex (Image _ _) = True isComplex (Code _ _) = True isComplex (Math _ _) = True + isComplex (Cite _ (x:_)) = isComplex x + isComplex (Span _ (x:_)) = isComplex x isComplex _ = False -- | Convert Pandoc inline element to RST. |