diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
21 files changed, 439 insertions, 252 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 51a35c8ad..71c7d05b2 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -103,7 +103,7 @@ addInline (Node _ (TEXT t) _) = (map toinl clumps ++) toinl (' ':_) = Space toinl xs = Str xs addInline (Node _ LINEBREAK _) = (LineBreak :) -addInline (Node _ SOFTBREAK _) = (Space :) +addInline (Node _ SOFTBREAK _) = (SoftBreak :) addInline (Node _ (INLINE_HTML t) _) = (RawInline (Format "html") (unpack t) :) addInline (Node _ (CODE t) _) = @@ -113,7 +113,7 @@ addInline (Node _ EMPH nodes) = addInline (Node _ STRONG nodes) = (Strong (addInlines nodes) :) addInline (Node _ (LINK url title) nodes) = - (Link (addInlines nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) addInline (Node _ (IMAGE url title) nodes) = - (Image (addInlines nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index f679ddb57..e8fe92e27 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -635,11 +635,20 @@ addToStart toadd bs = -- A DocBook mediaobject is a wrapper around a set of alternative presentations getMediaobject :: Element -> DB Inlines getMediaobject e = do - imageUrl <- case filterChild (named "imageobject") e of - Nothing -> return mempty - Just z -> case filterChild (named "imagedata") z of - Nothing -> return mempty - Just i -> return $ attrValue "fileref" i + (imageUrl, attr) <- + case filterChild (named "imageobject") e of + Nothing -> return (mempty, nullAttr) + Just z -> case filterChild (named "imagedata") z of + Nothing -> return (mempty, nullAttr) + Just i -> let atVal a = attrValue a i + w = case atVal "width" of + "" -> [] + d -> [("width", d)] + h = case atVal "depth" of + "" -> [] + d -> [("height", d)] + atr = (atVal "id", words $ atVal "role", w ++ h) + in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x || named "alt" x) el of @@ -649,7 +658,7 @@ getMediaobject e = do let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (image imageUrl title) caption + liftM (imageWith attr imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -968,7 +977,8 @@ parseInline (Elem e) = Just h -> h _ -> ('#' : attrValue "linkend" e) let ils' = if ils == mempty then str href else ils - return $ link href "" ils' + let attr = (attrValue "id" e, words $ attrValue "role" e, []) + return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of "bold" -> strong <$> innerInlines diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index ab49bf002..44f67ce75 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -298,10 +298,17 @@ runToInlines (Footnote bps) = do runToInlines (Endnote bps) = do blksList <- concatReduce <$> (mapM bodyPartToBlocks bps) return $ note blksList -runToInlines (InlineDrawing fp bs) = do +runToInlines (InlineDrawing fp bs ext) = do mediaBag <- gets docxMediaBag modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } - return $ image fp "" "" + return $ imageWith (extentToAttr ext) fp "" "" + +extentToAttr :: Extent -> Attr +extentToAttr (Just (w, h)) = + ("", [], [("width", showDim w), ("height", showDim h)] ) + where + showDim d = show (d / 914400) ++ "in" +extentToAttr _ = nullAttr parPartToInlines :: ParPart -> DocxContext Inlines parPartToInlines (PlainRun r) = runToInlines r @@ -348,10 +355,10 @@ parPartToInlines (BookMark _ anchor) = unless inHdrBool (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) return $ spanWith (newAnchor, ["anchor"], []) mempty -parPartToInlines (Drawing fp bs) = do +parPartToInlines (Drawing fp bs ext) = do mediaBag <- gets docxMediaBag modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } - return $ image fp "" "" + return $ imageWith (extentToAttr ext) fp "" "" parPartToInlines (InternalHyperLink anchor runs) = do ils <- concatReduce <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils @@ -486,7 +493,7 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ case isNull ils' of True -> mempty _ -> parStyleToTransform pPr $ para ils' -bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do +bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do let kvs = case levelInfo of (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -503,6 +510,10 @@ bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do ] blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks +bodyPartToBlocks (ListItem pPr _ _ _ parparts) = + let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} + in + bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty bodyPartToBlocks (Tbl cap _ look (r:rs)) = do @@ -535,10 +546,10 @@ bodyPartToBlocks (OMathPara e) = do -- replace targets with generated anchors. rewriteLink' :: Inline -> DocxContext Inline -rewriteLink' l@(Link ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils ('#':target, title)) = do anchorMap <- gets docxAnchorMap return $ case M.lookup target anchorMap of - Just newTarget -> (Link ils ('#':newTarget, title)) + Just newTarget -> (Link attr ils ('#':newTarget, title)) Nothing -> l rewriteLink' il = return il diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 432965d49..eec8b12c9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -35,6 +35,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Body(..) , BodyPart(..) , TblLook(..) + , Extent , ParPart(..) , Run(..) , RunElem(..) @@ -62,6 +63,7 @@ import Control.Monad.Reader import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except +import Text.Pandoc.Shared (safeRead) import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) @@ -75,6 +77,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envFont :: Maybe Font , envCharStyles :: CharStyleMap , envParStyles :: ParStyleMap + , envLocation :: DocumentLocation } deriving Show @@ -87,7 +90,7 @@ instance Error DocxError where type D = ExceptT DocxError (Reader ReaderEnv) runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -140,7 +143,10 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote + deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces @@ -173,7 +179,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String Level [ParPart] + | ListItem ParagraphStyle String String (Maybe Level) [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -192,20 +198,23 @@ data Row = Row [Cell] data Cell = Cell [BodyPart] deriving Show +-- (width, height) in EMUs +type Extent = Maybe (Double, Double) + data ParPart = PlainRun Run | Insertion ChangeId Author ChangeDate [Run] | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath B.ByteString + | Drawing FilePath B.ByteString Extent | PlainOMath [Exp] deriving Show data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath B.ByteString + | InlineDrawing FilePath B.ByteString Extent deriving Show data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen @@ -238,7 +247,6 @@ defaultRunStyle = RunStyle { isBold = Nothing , rUnderline = Nothing , rStyle = Nothing} - type Target = String type Anchor = String type URL = String @@ -255,7 +263,8 @@ archiveToDocx archive = do rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles + rEnv = + ReaderEnv notes numbering rels media Nothing styles parstyles InDocument doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -362,29 +371,30 @@ archiveToNotes zf = in Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - + return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp + , Just entry <- findEntryByPath fp ar + , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] + archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels + concatMap (filePathToRelationships archive) $ filesInArchive archive filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = @@ -409,6 +419,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls return lvl + numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | qName (elName element) == "num" && @@ -558,9 +569,8 @@ elemToBodyPart ns element let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) num <- asks envNumbering - case lookupLevel numId lvl num of - Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts elemToBodyPart ns element | isElem ns "w" "p" element = do sty <- asks envParStyles @@ -569,11 +579,8 @@ elemToBodyPart ns element case pNumInfo parstyle of Just (numId, lvl) -> do num <- asks envNumbering - case lookupLevel numId lvl num of - Just levelInfo -> - return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> - throwError WrongElem + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -596,13 +603,16 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = + lookup (docLocation, relid) pairs + where + pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do - target <- asks (lookupRelationship s . envRelationships) + location <- asks envLocation + target <- asks (lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -614,13 +624,13 @@ expandDrawingId s = do elemToParPart :: NameSpaces -> Element -> D ParPart elemToParPart ns element | isElem ns "w" "r" element - , Just _ <- findChild (elemName ns "w" "drawing") element = + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs $ elemToExtent drawingElem) Nothing -> throwError WrongElem -- The below is an attempt to deal with images in deprecated vml format. elemToParPart ns element @@ -630,7 +640,7 @@ elemToParPart ns element >>= findAttr (elemName ns "r" "id") in case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs Nothing) Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element = @@ -657,9 +667,10 @@ elemToParPart ns element elemToParPart ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttr (elemName ns "r" "id") element = do + location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships - case lookupRelationship relId rels of + case lookupRelationship location relId rels of Just target -> do case findAttr (elemName ns "w" "anchor") element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -681,6 +692,16 @@ lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s) lookupEndnote :: String -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s) +elemToExtent :: Element -> Extent +elemToExtent drawingElem = + case (getDim "cx", getDim "cy") of + (Just w, Just h) -> Just (w, h) + _ -> Nothing + where + wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" + getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem + >>= findAttr (QName at Nothing Nothing) >>= safeRead + elemToRun :: NameSpaces -> Element -> D Run elemToRun ns element | isElem ns "w" "r" element @@ -691,7 +712,7 @@ elemToRun ns element in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs) + (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem) Nothing -> throwError WrongElem elemToRun ns element | isElem ns "w" "r" element @@ -699,7 +720,7 @@ elemToRun ns element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupFootnote fnId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] elemToRun ns element @@ -708,7 +729,7 @@ elemToRun ns element , Just enId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupEndnote enId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] elemToRun ns element diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 2da5e9e18..79aa540f6 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry @@ -99,12 +100,12 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline -renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b) +renameImages root (Image attr a (url, b)) = Image attr a (collapseFilePath (root </> url), b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc @@ -189,14 +190,14 @@ fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link t ('#':url, tit)) = - Link t (addHash s url, tit) +fixInlineIRs s (Link attr t ('#':url, tit)) = + Link attr t (addHash s url, tit) fixInlineIRs _ v = v prependHash :: [String] -> Inline -> Inline -prependHash ps l@(Link is (url, tit)) +prependHash ps l@(Link attr is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = - Link is ('#':url, tit) + Link attr is ('#':url, tit) | otherwise = l prependHash _ i = i diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 8b66d2d3d..a34e2fb5c 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -66,7 +66,7 @@ import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) - +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Parsec.Error @@ -601,16 +601,8 @@ pLineBreak = do return B.linebreak pLink :: TagParser Inlines -pLink = pRelLink <|> pAnchor - -pAnchor :: TagParser Inlines -pAnchor = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id")) - return $ B.spanWith (fromAttrib "id" tag , [], []) mempty - -pRelLink :: TagParser Inlines -pRelLink = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) +pLink = try $ do + tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag let url = case (isURI url', mbBaseHref) of @@ -618,11 +610,9 @@ pRelLink = try $ do _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag - let spanC = case uid of - [] -> id - s -> B.spanWith (s, [], []) + let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ spanC $ B.link (escapeURI url) title lab + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -634,7 +624,13 @@ pImage = do _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return $ B.image (escapeURI url) title (B.text alt) + let uid = fromAttrib "id" tag + let cls = words $ fromAttrib "class" tag + let getAtt k = case fromAttrib k tag of + "" -> [] + v -> [(k, v)] + let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] + return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: TagParser Inlines pCode = try $ do @@ -803,7 +799,10 @@ pBad = do return $ B.str [c'] pSpace :: InlinesParser Inlines -pSpace = many1 (satisfy isSpace) >> return B.space +pSpace = many1 (satisfy isSpace) >>= \xs -> + if '\n' `elem` xs + then return B.softbreak + else return B.space -- -- Constants @@ -948,6 +947,7 @@ htmlTag f = try $ do parseOptions{ optTagWarning = True } inp guard $ f next case next of + TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 2b74f5f62..16f3d7ef3 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Shared (trim, splitBy) import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 359661c3e..5a4612862 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -54,6 +54,7 @@ import Data.List (intercalate) import qualified Data.Map as M import qualified Control.Exception as E import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. @@ -99,8 +100,13 @@ dimenarg = try $ do return $ ch ++ num ++ dim sp :: LP () -sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline) +sp = whitespace <|> endline + +whitespace :: LP () +whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') + +endline :: LP () +endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' @@ -166,24 +172,37 @@ mathChars = (concat <$>) $ quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines quoted' f starter ender = do startchs <- starter - try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs + smart <- getOption readerSmart + if smart + then do + ils <- many (notFollowedBy ender >> inline) + (ender >> return (f (mconcat ils))) <|> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + _ -> startchs) + else lit startchs doubleQuote :: LP Inlines -doubleQuote = - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +doubleQuote = do + quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") + <|> quoted' doubleQuoted (string "\"") (void $ char '"') singleQuote :: LP Inlines -singleQuote = - quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) +singleQuote = do + smart <- getOption readerSmart + if smart + then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + else str <$> many1 (oneOf "`\'‘’") inline :: LP Inlines inline = (mempty <$ comment) - <|> (space <$ sp) + <|> (space <$ whitespace) + <|> (softbreak <$ endline) <|> inlineText <|> inlineCommand <|> inlineEnvironment @@ -392,7 +411,8 @@ inlineCommand = try $ do star <- option "" (string "*") let name' = name ++ star let raw = do - rawcommand <- getRawCommand name' + rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + let rawcommand = '\\' : name ++ star ++ snd rawargs transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed @@ -522,7 +542,9 @@ inlineCommands = M.fromList $ , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) - , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage) + , ("includegraphics", do options <- option [] keyvals + src <- unescapeURL <$> braced + mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) , ("Cite", citation "cite" AuthorInText False) @@ -584,14 +606,19 @@ inlineCommands = M.fromList $ -- in which case they will appear as raw latex blocks: [ "index" ] -mkImage :: String -> LP Inlines -mkImage src = do +mkImage :: [(String, String)] -> String -> LP Inlines +mkImage options src = do + let replaceTextwidth (k,v) = case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let attr = ("",[], kvs) let alt = str "image" case takeExtension src of "" -> do defaultExt <- getOption readerDefaultImageExtension - return $ image (addExtension src defaultExt) "" alt - _ -> return $ image src "" alt + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt inNote :: Inlines -> Inlines inNote ils = @@ -886,7 +913,7 @@ verbatimEnv' = fmap snd <$> string "\\begin" name <- braced' guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", - "minted", "alltt"] + "minted", "alltt", "comment"] manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") blob' :: IncludeParser @@ -972,7 +999,7 @@ readFileFromDirs (d:ds) f = keyval :: LP (String, String) keyval = try $ do key <- many1 alphaNum - val <- option "" $ char '=' >> many1 alphaNum + val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') skipMany spaceChar optional (char ',') skipMany spaceChar @@ -999,11 +1026,11 @@ rawLaTeXInline = do addImageCaption :: Blocks -> LP Blocks addImageCaption = walkM go - where go (Image alt (src,tit)) = do + where go (Image attr alt (src,tit)) = do mbcapt <- stateCaption <$> getState return $ case mbcapt of - Just ils -> Image (toList ils) (src, "fig:") - Nothing -> Image alt (src,tit) + Just ils -> Image attr (toList ils) (src, "fig:") + Nothing -> Image attr alt (src,tit) go x = return x addTableCaption :: Blocks -> LP Blocks @@ -1039,6 +1066,7 @@ environments = M.fromList , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f2b0872bb..7b1341af4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,8 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -50,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -62,6 +65,7 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState @@ -326,23 +330,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -355,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do @@ -367,23 +382,26 @@ referenceKey = try $ do let sourceURL = liftM unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle + notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> manyTill litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle - -- currently we just ignore MMD-style link/image attributes - _kvs <- option [] $ guardEnabled Ext_link_attributes - >> many (try $ spnl >> keyValAttr) + attr <- option nullAttr $ try $ + guardEnabled Ext_link_attributes >> skipSpaces >> attributes + addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes + >> many (try $ spnl >> keyValAttr) blanklines - let target = (escapeURI $ trimr src, tit) + let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs + target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () - updateState $ \s -> s { stateKeys = M.insert key target oldkeys } + updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty referenceTitle :: MarkdownParser String @@ -466,7 +484,6 @@ block = do res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList @@ -476,6 +493,7 @@ block = do , htmlBlock , table , codeBlockIndented + , guardEnabled Ext_latex_macros *> (macro >>= return . return) , rawTeXBlock , lineBlock , blockQuote @@ -516,9 +534,9 @@ atxHeader = try $ do (text, raw) <- withRaw $ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -559,16 +577,16 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> String -> MarkdownParser () -registerImplicitHeader raw ident = do +registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = - M.insert key ('#':ident,"") (stateHeaderKeys s) }) + M.insert key (('#':ident,""), attr) (stateHeaderKeys s) }) -- -- hrule block @@ -979,11 +997,11 @@ para = try $ do return $ do result' <- result case B.toList result' of - [Image alt (src,tit)] + [Image attr alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton - $ Image alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' plain :: MarkdownParser (F Blocks) @@ -1321,7 +1339,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1331,16 +1349,23 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- many pipeTableRow + let maxlength = maximum $ + map (\x -> length . stringify $ runF x def) (heads : lines') + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1369,19 +1394,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () @@ -1460,6 +1486,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1666,7 +1693,7 @@ endline = try $ do (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.space) + <|> (skipMany spaceChar >> return (return B.softbreak)) -- -- links @@ -1710,16 +1737,18 @@ link = try $ do setState $ st{ stateAllowLinks = False } (lab,raw) <- reference setState $ st{ stateAllowLinks = True } - regLink B.link lab <|> referenceLink B.link (lab,raw) + regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + attr <- option nullAttr $ + guardEnabled Ext_link_attributes >> attributes + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1731,7 +1760,7 @@ referenceLink constructor (lab, raw) = do let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1748,10 +1777,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just (src, tit) -> constructor src tit <$> lab - Nothing -> makeFallback + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab + Nothing -> makeFallback else makeFallback - Just (src,tit) -> constructor src tit <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1785,9 +1814,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.image (addExtension src defaultExt) - _ -> B.image src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1891,6 +1920,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) @@ -1923,7 +1967,7 @@ textualCite = try $ do spc | null spaces' = mempty | otherwise = B.space lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' - fallback <- referenceLink B.link (lab,raw') + fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback cs' <- cs diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 4f5f9c293..e423720df 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -39,6 +39,7 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) @@ -554,7 +555,8 @@ inlineHtml :: MWParser Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' whitespace :: MWParser Inlines -whitespace = B.space <$ (skipMany1 spaceChar <|> endline <|> htmlComment) +whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment) + <|> B.softbreak <$ endline endline :: MWParser () endline = () <$ try (newline <* @@ -575,21 +577,29 @@ image = try $ do sym "[[" choice imageIdentifiers fname <- many1 (noneOf "|]") - _ <- many (try $ char '|' *> imageOption) + _ <- many imageOption + dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px") + <|> return [] + _ <- many imageOption + let kvs = case dims of + w:[] -> [("width", w)] + w:(h:[]) -> [("width", w), ("height", h)] + _ -> [] + let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.image fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String -imageOption = - try (oneOfStrings [ "border", "thumbnail", "frameless" - , "thumb", "upright", "left", "right" - , "center", "none", "baseline", "sub" - , "super", "top", "text-top", "middle" - , "bottom", "text-bottom" ]) - <|> try (string "frame") - <|> try (many1 (oneOf "x0123456789") <* string "px") - <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +imageOption = try $ char '|' *> opt + where + opt = try (oneOfStrings [ "border", "thumbnail", "frameless" + , "thumb", "upright", "left", "right" + , "center", "none", "baseline", "sub" + , "super", "top", "text-top", "middle" + , "bottom", "text-bottom" ]) + <|> try (string "frame") + <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) collapseUnderscores :: String -> String collapseUnderscores [] = [] diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 2cc83183f..30f96c557 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -46,6 +46,7 @@ import Control.Monad import Data.Foldable +import Text.Pandoc.Compat.Monoid import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index d4dcf5be2..8c9ee0539 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -46,7 +46,7 @@ import qualified Data.Foldable as F import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils - +import Text.Pandoc.Compat.Monoid and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') and2 = (&&&) @@ -129,24 +129,23 @@ joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z joinOn = arr.uncurry -- | Applies a function to the uncurried result-pair of an arrow-application. --- (The §-symbol was chosen to evoke an association with pairs through the --- shared first character) -(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>§ f = a >>^ uncurry f +-- (The %-symbol was chosen to evoke an association with pairs.) +(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>% f = a >>^ uncurry f --- | '(>>§)' with its arguments flipped -(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(§<<) = flip (>>§) +-- | '(>>%)' with its arguments flipped +(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(%<<) = flip (>>%) -- | Precomposition with an uncurried function -(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f §>> a = uncurry f ^>> a +(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f %>> a = uncurry f ^>> a -- | Precomposition with an uncurried function (right to left variant) -(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<§) = flip (§>>) +(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<%) = flip (%>>) -infixr 2 >>§, §<<, §>>, <<§ +infixr 2 >>%, %<<, %>>, <<% -- | Duplicate a value and apply an arrow to the second instance. @@ -271,7 +270,7 @@ newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend -- | Evaluates a collection of arrows in a parallel fashion. -- @@ -433,29 +432,29 @@ a ^>>?^? f = a ^>> Left ^|||^ f a >>?! f = a >>> right f --- -(>>?§) :: (ArrowChoice a, Monoid f) +(>>?%) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> c) -> FallibleArrow a x f c -a >>?§ f = a >>?^ (uncurry f) +a >>?% f = a >>?^ (uncurry f) --- -(^>>?§) :: (ArrowChoice a, Monoid f) +(^>>?%) :: (ArrowChoice a, Monoid f) => (x -> Either f (b,b')) -> (b -> b' -> c) -> FallibleArrow a x f c -a ^>>?§ f = arr a >>?^ (uncurry f) +a ^>>?% f = arr a >>?^ (uncurry f) --- -(>>?§?) :: (ArrowChoice a, Monoid f) +(>>?%?) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> (Either f c)) -> FallibleArrow a x f c -a >>?§? f = a >>?^? (uncurry f) +a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! -infixr 1 >>?§, ^>>?§, >>?§? +infixr 1 >>?%, ^>>?%, >>?%? -- | Keep values that are Right, replace Left values by a constant. ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 9ff3532e1..1f1c57646 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -145,7 +145,7 @@ type OdtReaderSafe a b = XMLReaderSafe ReaderState a b fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b fromStyles f = keepingTheValue (getExtraState >>^ styleSet) - >>§ f + >>% f -- getStyleByName :: OdtReader StyleName Style @@ -162,7 +162,7 @@ lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice -- switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) switchCurrentListStyle = keepingTheValue getExtraState - >>§ swapCurrentListStyle + >>% swapCurrentListStyle >>> first setExtraState >>^ snd @@ -170,7 +170,7 @@ switchCurrentListStyle = keepingTheValue getExtraState pushStyle :: OdtReaderSafe Style Style pushStyle = keepingTheValue ( ( keepingTheValue getExtraState - >>§ pushStyle' + >>% pushStyle' ) >>> setExtraState ) @@ -470,7 +470,7 @@ matchingElement :: (Monoid e) matchingElement ns name reader = (ns, name, asResultAccumulator reader) where asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>) -- matchChildContent' :: (Monoid result) @@ -497,14 +497,14 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -- -- | Open Document allows several consecutive spaces if they are marked up read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>§ recover +read_plain_text = fst ^&&& read_plain_text' >>% recover where -- fallible version read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines read_plain_text' = ( second ( arr extractText ) >>^ spreadChoice >>?! second text ) - >>?§ (<>) + >>?% (<>) -- extractText :: XML.Content -> Fallible String extractText (XML.Text cData) = succeedWith (XML.cdData cData) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 343ec14ee..d0fdc228f 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -41,7 +41,7 @@ module Text.Pandoc.Readers.Odt.Generic.Fallible where import Control.Applicative import Control.Monad - +import Text.Pandoc.Compat.Monoid ((<>)) import qualified Data.Foldable as F -- | Default for now. Will probably become a class at some point. diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 7c1764889..8c03d1a09 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -331,7 +331,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA where setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?§ flip replaceExtraState + >>^ spreadChoice >>?% flip replaceExtraState -- | First sets the extra state to the new value. Then produces a new -- extra state with a converter that uses the new state. Finally, the @@ -413,14 +413,14 @@ elemName :: (NameSpaceID nsID) -> XMLConverter nsID extraState x XML.QName elemName nsID name = lookupNSiri nsID &&& lookupNSprefix nsID - >>§ XML.QName name + >>% XML.QName name -- | Checks if a given element matches both a specified namespace id -- and a specified element name elemNameIs :: (NameSpaceID nsID) => nsID -> ElementName -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName where hasThatName e iri = let elName = XML.elName e in XML.qName elName == name && XML.qURI elName == iri @@ -461,8 +461,8 @@ currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> (XML.qName >>^ (&&).(== name) ) ^&&&^ (XML.qIRI >>^ (==) ) - ) >>§ (.) - ) &&& lookupNSiri nsID >>§ ($) + ) >>% (.) + ) &&& lookupNSiri nsID >>% ($) -} -- @@ -487,7 +487,7 @@ findChildren :: (NameSpaceID nsID) -> XMLConverter nsID extraState x [XML.Element] findChildren nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChildren + >>% XML.findChildren -- filterChildren :: (XML.Element -> Bool) @@ -508,7 +508,7 @@ findChild' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe XML.Element) findChild' nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChild + >>% XML.findChild -- findChild :: (NameSpaceID nsID) @@ -596,7 +596,7 @@ isThatTheAttrValue :: (NameSpaceID nsID) isThatTheAttrValue nsID attrName = keepingTheValue (findAttr nsID attrName) - >>§ right.(==) + >>% right.(==) -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary @@ -669,7 +669,7 @@ findAttr' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe AttributeValue) findAttr' nsID attrName = elemName nsID attrName &&& getCurrentElement - >>§ XML.findAttr + >>% XML.findAttr -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -787,7 +787,7 @@ prepareIteration :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [(b, XML.Element)] prepareIteration nsID name = keepingTheValue (findChildren nsID name) - >>§ distributeValue + >>% distributeValue -- | Applies a converter to every child element of a specific type. -- Collects results in a 'Monoid'. @@ -877,9 +877,9 @@ makeMatcherE nsID name c = ( second ( elemNameIs nsID name >>^ bool Nothing (Just tryC) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd -- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. @@ -899,14 +899,14 @@ makeMatcherC nsID name c = ( second ( contentToElem >>^ bool Nothing (Just cWithJump) ) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd where cWithJump = ( fst ^&&& ( second contentToElem >>> spreadChoice ^>>? executeThere c ) - >>§ recover) + >>% recover) &&&^ snd contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element contentToElem = arr $ \e -> case e of diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index e28056814..deb009998 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -107,4 +107,4 @@ nsIDs = [ ("http://www.w3.org/1999/xhtml" , NsXHtml ), ("http://www.w3.org/2002/xforms" , NsXForms ), ("http://www.w3.org/1999/xlink" , NsXLink ) - ]
\ No newline at end of file + ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index e403424f6..96cfed0b3 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -174,7 +174,7 @@ findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) findPitch = ( lookupAttr NsStyle "font-pitch" `ifFailedDo` findAttr NsStyle "font-name" >>? ( keepingTheValue getExtraState - >>§ M.lookup + >>% M.lookup >>^ maybeToChoice ) ) @@ -447,7 +447,7 @@ readAllStyles :: StyleReader _x Styles readAllStyles = ( readFontPitches >>?! ( readAutomaticStyles &&& readStyles )) - >>?§? chooseMax + >>?%? chooseMax -- all top elements are always on the same hierarchy level -- diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 27a8fe957..99a6927e2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de> +Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014 Albert Krewinkel + Copyright : Copyright (C) 2014-2015 Albert Krewinkel License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Conversion of org-mode formatted plain text to 'Pandoc' document. -} @@ -34,6 +34,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), trimInlines ) import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -140,6 +141,7 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateParserContext :: ParserContext , orgStateIdentifiers :: [String] , orgStateHeaderMap :: M.Map Inlines String } @@ -181,6 +183,7 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateParserContext = NullState , orgStateIdentifiers = [] , orgStateHeaderMap = M.empty } @@ -291,6 +294,23 @@ blanklines = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do + ctx <- orgStateParserContext <$> getState + guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context + -> OrgParser a -- ^ Parser to run in that context + -> OrgParser a +withContext context parser = do + oldContext <- orgStateParserContext <$> getState + updateState $ \s -> s{ orgStateParserContext = context } + result <- parser + updateState $ \s -> s{ orgStateParserContext = oldContext } + return result + -- -- parsing blocks -- @@ -513,10 +533,16 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgParamValue +blockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgParamValue + return (argKey, paramValue) inlineBlockOption :: OrgParser (String, String) -inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue +inlineBlockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgInlineParamValue + return (argKey, paramValue) orgArgKey :: OrgParser String orgArgKey = try $ @@ -525,11 +551,17 @@ orgArgKey = try $ orgParamValue :: OrgParser String orgParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':' ) + *> many1 (noneOf "\t\n\r ") + <* skipSpaces orgInlineParamValue :: OrgParser String orgInlineParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':') + *> many1 (noneOf "\t\n\r ]") + <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -699,7 +731,7 @@ headerTags = try $ headerStart :: OrgParser Int headerStart = try $ - (length <$> many1 (char '*')) <* many1 (char ' ') + (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos -- Don't use (or need) the reader wrapper here, we want hline to be @@ -891,9 +923,13 @@ noteBlock = try $ do paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do ils <- parseInlines - nl <- option False (newline >> return True) - try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) + nl <- option False (newline *> return True) + -- Read block as paragraph, except if we are in a list context and the block + -- is directly followed by a list item, in which case the block is read as + -- plain text. + try (guard nl + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) + *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) inlinesTillNewline :: OrgParser (F Inlines) @@ -958,19 +994,22 @@ definitionListItem :: OrgParser Int -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try $ string "::") + term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' + where + definitionMarker = + spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser (F Blocks) -listItem start = try $ do +listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) @@ -1064,7 +1103,7 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return . return $ B.space + return . return $ B.softbreak cite :: OrgParser (F Inlines) cite = try $ do @@ -1549,8 +1588,11 @@ smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, dash, ellipses]) - where orgApostrophe = + choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) + where + orgDash = dash <* updatePositions '-' + orgEllipses = ellipses <* updatePositions '.' + orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos *> return (B.str "\x2019") @@ -1558,9 +1600,10 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do singleQuoteStart + updatePositions '\'' withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd + many1Till inline (singleQuoteEnd <* updatePositions '\'') -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote @@ -1568,6 +1611,7 @@ singleQuoted = try $ do doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart + updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4fb30e6c4..7be0cd392 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -48,7 +48,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) - +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. @@ -541,6 +541,12 @@ directive' = do body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" + imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height") + where + classes = words $ maybe "" trim $ lookup cl fields + getAtt k = case lookup k fields of + Just v -> [(k, filter (not . isSpace) v)] + Nothing -> [] case label of "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields @@ -590,15 +596,16 @@ directive' = do "figure" -> do (caption, legend) <- parseFromString extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.image src "fig:" caption) <> legend + return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields + let attr = imgAttr "class" return $ B.para $ case lookup "target" fields of Just t -> B.link (escapeURI $ trim t) "" - $ B.image src "" alt - Nothing -> B.image src "" alt + $ B.imageWith attr src "" alt + Nothing -> B.imageWith attr src "" alt "class" -> do let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) -- directive content or the first immediately following element @@ -812,10 +819,10 @@ substKey = try $ do res <- B.toList <$> directive' il <- case res of -- use alt unless :alt: attribute on image: - [Para [Image [Str "image"] (src,tit)]] -> - return $ B.image src tit alt - [Para [Link [Image [Str "image"] (src,tit)] (src',tit')]] -> - return $ B.link src' tit' (B.image src tit alt) + [Para [Image attr [Str "image"] (src,tit)]] -> + return $ B.imageWith attr src tit alt + [Para [Link _ [Image attr [Str "image"] (src,tit)] (src',tit')]] -> + return $ B.link src' tit' (B.imageWith attr src tit alt) [Para ils] -> return $ B.fromList ils _ -> mzero let key = toKey $ stripFirstAndLast ref @@ -827,7 +834,8 @@ anonymousKey = try $ do src <- targetURI pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) - updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s } + --TODO: parse width, height, class and name attributes + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } stripTicks :: String -> String stripTicks = reverse . stripTick . reverse . stripTick @@ -841,7 +849,8 @@ regularKey = try $ do char ':' src <- targetURI let key = toKey $ stripTicks ref - updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s } + --TODO: parse width, height, class and name attributes + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -- -- tables @@ -1096,7 +1105,7 @@ endline = try $ do then notFollowedBy (anyOrderedListMarker >> spaceChar) >> notFollowedBy' bulletListStart else return () - return B.space + return B.softbreak -- -- links @@ -1131,12 +1140,12 @@ referenceLink = try $ do if null anonKeys then mzero else return (head anonKeys) - (src,tit) <- case M.lookup key keyTable of - Nothing -> fail "no corresponding key" - Just target -> return target + ((src,tit), attr) <- case M.lookup key keyTable of + Nothing -> fail "no corresponding key" + Just val -> return val -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ B.link src tit label' + return $ B.linkWith attr src tit label' autoURI :: RSTParser Inlines autoURI = do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 83280aa2e..355285f54 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -51,6 +51,7 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where +import Text.Pandoc.CSS import Text.Pandoc.Definition import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B @@ -64,6 +65,7 @@ import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM, when ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Printf import Debug.Trace (trace) import Text.Pandoc.Error @@ -79,11 +81,12 @@ readTextile opts s = -- | Generate a Pandoc ADT from a textile document parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default + -- textile allows raw HTML and does smart punctuation by default, + -- but we do not enable smart punctuation unless it is explicitly + -- asked for, for better conversion to other light markup formats oldOpts <- stateOptions `fmap` getState updateState $ \state -> state{ stateOptions = - oldOpts{ readerSmart = True - , readerParseRaw = True + oldOpts{ readerParseRaw = True , readerOldDashes = True } } many blankline @@ -533,10 +536,14 @@ link = try $ do image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space + (ident, cls, kvs) <- attributes + let attr = case lookup "style" kvs of + Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) + Nothing -> (ident, cls, kvs) src <- manyTill anyChar' (lookAhead $ oneOf "!(") alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')'))) char '!' - return $ B.image src alt (B.str alt) + return $ B.imageWith attr src alt (B.str alt) escapedInline :: Parser [Char] ParserState Inlines escapedInline = escapedEqs <|> escapedTag diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 1c868f5f0..c28ce1653 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) @@ -549,7 +550,7 @@ endline = try $ do notFollowedBy quote notFollowedBy list notFollowedBy table - return $ B.space + return $ B.softbreak str :: T2T Inlines str = try $ do |