diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 45 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 33 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Shared.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 5 |
5 files changed, 79 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index d3cee08e2..dcf0c5f4a 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -253,7 +253,7 @@ parseAttr = try $ do k <- many1 letter char '=' v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 nonspaceChar + <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: MWParser () @@ -376,11 +376,17 @@ preformatted = try $ do spacesStr _ = False if F.all spacesStr contents then return mempty - else return $ B.para $ walk strToCode contents - -strToCode :: Inline -> Inline -strToCode (Str s) = Code ("",[],[]) s -strToCode x = x + else return $ B.para $ encode contents + +encode :: Inlines -> Inlines +encode = B.fromList . normalizeCode . B.toList . walk strToCode + where strToCode (Str s) = Code ("",[],[]) s + strToCode Space = Code ("",[],[]) " " + strToCode x = x + normalizeCode [] = [] + normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = + normalizeCode $ (Code a1 (x ++ y)) : zs + normalizeCode (x:xs) = x : normalizeCode xs header :: MWParser Blocks header = try $ do @@ -545,8 +551,8 @@ inlineTag = do TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" - TagOpen "code" _ -> walk strToCode <$> inlinesInTags "code" - TagOpen "tt" _ -> walk strToCode <$> inlinesInTags "tt" + TagOpen "code" _ -> encode <$> inlinesInTags "code" + TagOpen "tt" _ -> encode <$> inlinesInTags "tt" TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 0bd82ce2f..6a8bb8b28 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared - ( isImageFilename, rundocBlockClass, toRundocAttrib - , translateLang ) + ( cleanLinkString, isImageFilename, rundocBlockClass + , toRundocAttrib, translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) @@ -422,7 +422,16 @@ verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content) + <$> mapM parseVerseLine (lines content) + where + -- replace initial spaces with nonbreaking spaces to preserve + -- indentation, parse the rest as normal inline + parseVerseLine :: String -> OrgParser (F Inlines) + parseVerseLine cs = do + let (initialSpaces, indentedLine) = span isSpace cs + let nbspIndent = B.str $ map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine ++ "\n") + return (pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" @@ -571,23 +580,33 @@ figure :: OrgParser (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* newline - guard . not . isNothing . blockAttrCaption $ figAttrs - guard (isImageFilename src) - let figName = fromMaybe mempty $ blockAttrName figAttrs - let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs - let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - let figKeyVals = blockAttrKeyValues figAttrs - let attr = (figLabel, mempty, figKeyVals) - return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption) + case cleanLinkString src of + Nothing -> mzero + Just imgSrc -> do + guard (not . isNothing . blockAttrCaption $ figAttrs) + guard (isImageFilename imgSrc) + return $ figureBlock figAttrs imgSrc where + selfTarget :: OrgParser String + selfTarget = try $ char '[' *> linkTarget <* char ']' + + figureBlock :: BlockAttributes -> String -> (F Blocks) + figureBlock figAttrs imgSrc = + let + figName = fromMaybe mempty $ blockAttrName figAttrs + figLabel = fromMaybe mempty $ blockAttrLabel figAttrs + figCaption = fromMaybe mempty $ blockAttrCaption figAttrs + figKeyVals = blockAttrKeyValues figAttrs + attr = (figLabel, mempty, figKeyVals) + in + B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption + withFigPrefix :: String -> String withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs else "fig:" ++ cs - selfTarget :: OrgParser String - selfTarget = try $ char '[' *> linkTarget <* char ']' -- -- Examples diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index e1a66a8c7..31f098d27 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared - ( isImageFilename, rundocBlockClass, toRundocAttrib - , translateLang ) + ( cleanLinkString, isImageFilename, rundocBlockClass + , toRundocAttrib, translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines ) @@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Prelude hiding (sequence) import Control.Monad ( guard, mplus, mzero, when, void ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse, isPrefixOf ) +import Data.List ( intersperse ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M import Data.Traversable (sequence) @@ -435,9 +435,11 @@ explicitOrImageLink = try $ do char ']' return $ do src <- srcF - if isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + case cleanLinkString title of + Just imgSrc | isImageFilename imgSrc -> + pure $ B.link src "" $ B.image imgSrc mempty mempty + _ -> + linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -482,25 +484,6 @@ linkToInlinesF linkStr = else pure . B.link cleanedLink "" Nothing -> internalLink linkStr -- other internal link --- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if --- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' - _ | isUrl s -> Just s -- URL - _ -> Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) - internalLink :: String -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 3ba46b9e4..8c87cfa25 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( isImageFilename + ( cleanLinkString + , isImageFilename , rundocBlockClass , toRundocAttrib , translateLang ) where import Control.Arrow ( first ) +import Data.Char ( isAlphaNum ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf ) isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x++":") `isPrefixOf` filename) protocols || + (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols || ':' `notElem` filename) where imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if +-- the string does not appear to be a link. +cleanLinkString :: String -> Maybe String +cleanLinkString s = + case s of + '/':_ -> Just $ "file://" ++ s -- absolute path + '.':'/':_ -> Just s -- relative path + '.':'.':'/':_ -> Just s -- relative path + -- Relative path or URL (file schema) + 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + _ | isUrl s -> Just s -- URL + _ -> Nothing + where + isUrl :: String -> Bool + isUrl cs = + let (scheme, path) = break (== ':') cs + in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme + && not (null path) + -- | Prefix used for Rundoc classes and arguments. rundocPrefix :: String rundocPrefix = "rundoc-" diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 4ab0243fe..a4de85dfb 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -60,7 +60,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag ) import Text.Pandoc.Shared (trim) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) +import Text.HTML.TagSoup (fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate, transpose, intersperse ) import Data.Char ( digitToInt, isUpper ) @@ -182,8 +182,7 @@ trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse codeBlockPre :: Parser [Char] ParserState Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- (innerText . parseTags) `fmap` -- remove internal tags - manyTill anyChar (htmlTag (tagClose (=="pre"))) + result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) optional blanklines -- drop leading newline if any let result'' = case result' of |