diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
25 files changed, 425 insertions, 265 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index db438e26d..e8fe92e27 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -8,16 +8,15 @@ import Text.XML.Light import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Either (rights) import Data.Generics -import Data.Monoid import Data.Char (isSpace) import Control.Monad.State -import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Compat.Except import Data.Default +import Data.Foldable (asum) {- @@ -194,7 +193,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] indexterm - A wrapper for terms to be indexed [x] info - A wrapper for information about a component or other block. (DocBook v5) [x] informalequation - A displayed mathematical equation without a title -[ ] informalexample - A displayed example without a title +[x] informalexample - A displayed example without a title [ ] informalfigure - A untitled figure [ ] informaltable - A table without a title [ ] initializer - The initializer for a FieldSynopsis @@ -498,7 +497,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] warning - An admonition set off from the text [x] wordasword - A word meant specifically as a word and not representing anything else -[ ] xref - A cross reference to another part of the document +[x] xref - A cross reference to another part of the document [ ] year - The year of publication of a document [x] ?asciidoc-br? - line break from asciidoc docbook output -} @@ -511,6 +510,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines + , dbContent :: [Content] } deriving Show instance Default DBState where @@ -519,13 +519,14 @@ instance Default DBState where , dbMeta = mempty , dbAcceptsMeta = False , dbBook = False - , dbFigureTitle = mempty } + , dbFigureTitle = mempty + , dbContent = [] } readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' - inp' = handleInstructions inp + where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree + tree = normalizeTree . parseXML . handleInstructions $ inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. @@ -611,6 +612,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", + "informalexample", "screen","programlisting","example","calloutlist"] isBlockElement _ = False @@ -656,7 +658,7 @@ getMediaobject e = do let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (imageWith imageUrl title attr) caption + liftM (imageWith attr imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -775,6 +777,8 @@ parseBlock (Elem e) = "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable + "informalexample" -> divWith ("", ["informalexample"], []) <$> + getBlocks e "literallayout" -> codeBlockWithLang "screen" -> codeBlockWithLang "programlisting" -> codeBlockWithLang @@ -956,7 +960,13 @@ parseInline (Elem e) = "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) "menuchoice" -> menuchoice <$> (mapM parseInline $ filter isGuiMenu $ elContent e) - "xref" -> return $ str "?" -- so at least you know something is there + "xref" -> do + content <- dbContent <$> get + let linkend = attrValue "linkend" e + let title = case attrValue "endterm" e of + "" -> maybe "???" xrefTitleByElem (findElementById linkend content) + endterm -> maybe "???" strContent (findElementById endterm content) + return $ link ('#' : linkend) "" (singleton (Str title)) "email" -> return $ link ("mailto:" ++ strContent e) "" $ str $ strContent e "uri" -> return $ link (strContent e) "" $ str $ strContent e @@ -968,7 +978,7 @@ parseInline (Elem e) = _ -> ('#' : attrValue "linkend" e) let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, words $ attrValue "role" e, []) - return $ linkWith href "" attr ils' + return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of "bold" -> strong <$> innerInlines @@ -1018,3 +1028,26 @@ parseInline (Elem e) = isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || named "guimenuitem" x isGuiMenu _ = False + + findElementById idString content + = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] + + -- Use the 'xreflabel' attribute for getting the title of a xref link; + -- if there's no such attribute, employ some heuristics based on what + -- docbook-xsl does. + xrefTitleByElem el + | not (null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of + "chapter" -> descendantContent "title" el + "sect1" -> descendantContent "title" el + "sect2" -> descendantContent "title" el + "sect3" -> descendantContent "title" el + "sect4" -> descendantContent "title" el + "sect5" -> descendantContent "title" el + "cmdsynopsis" -> descendantContent "command" el + "funcsynopsis" -> descendantContent "function" el + _ -> qName (elName el) ++ "_title" + where + xrefLabel = attrValue "xreflabel" el + descendantContent name = maybe "???" strContent + . findElement (QName name Nothing Nothing) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b80280553..439e2d3e4 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -85,14 +85,12 @@ import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.List (delete, (\\), intersect) -import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State -import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) @@ -206,11 +204,15 @@ runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak runElemToInlines (Tab) = space +runElemToInlines (SoftHyphen) = text "\xad" +runElemToInlines (NoBreakHyphen) = text "\x2011" runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] runElemToString (Tab) = ['\t'] +runElemToString (SoftHyphen) = ['\xad'] +runElemToString (NoBreakHyphen) = ['\x2011'] runToString :: Run -> String runToString (Run _ runElems) = concatMap runElemToString runElems @@ -501,6 +503,10 @@ bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do ] blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks +bodyPartToBlocks (DummyListItem pPr _ parparts) = + let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} + in + bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty bodyPartToBlocks (Tbl cap _ look (r:rs)) = do diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cce80fb48..5910a476b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -59,7 +59,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader -import Control.Applicative ((<$>), (<|>)) +import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) @@ -75,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envFont :: Maybe Font , envCharStyles :: CharStyleMap , envParStyles :: ParStyleMap + , envLocation :: DocumentLocation } deriving Show @@ -87,7 +88,7 @@ instance Error DocxError where type D = ExceptT DocxError (Reader ReaderEnv) runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -140,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote + deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces @@ -174,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] + | DummyListItem ParagraphStyle String [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -208,7 +213,7 @@ data Run = Run RunStyle [RunElem] | InlineDrawing FilePath B.ByteString deriving Show -data RunElem = TextRun String | LnBrk | Tab +data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show data VertAlign = BaseLn | SupScrpt | SubScrpt @@ -238,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing , rUnderline = Nothing , rStyle = Nothing} - type Target = String type Anchor = String type URL = String @@ -255,7 +259,8 @@ archiveToDocx archive = do rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles + rEnv = + ReaderEnv notes numbering rels media Nothing styles parstyles InDocument doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -362,29 +367,30 @@ archiveToNotes zf = in Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - + return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp + , Just entry <- findEntryByPath fp ar + , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] + archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels + concatMap (filePathToRelationships archive) $ filesInArchive archive filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = @@ -409,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls return lvl + numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | qName (elName element) == "num" && @@ -560,7 +567,7 @@ elemToBodyPart ns element num <- asks envNumbering case lookupLevel numId lvl num of Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Nothing -> return $ DummyListItem parstyle lvl parparts elemToBodyPart ns element | isElem ns "w" "p" element = do sty <- asks envParStyles @@ -573,7 +580,7 @@ elemToBodyPart ns element Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts Nothing -> - throwError WrongElem + return $ DummyListItem parstyle lvl parparts Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -596,13 +603,16 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = + lookup (docLocation, relid) pairs + where + pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do - target <- asks (lookupRelationship s . envRelationships) + location <- asks envLocation + target <- asks (lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -657,9 +667,10 @@ elemToParPart ns element elemToParPart ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttr (elemName ns "r" "id") element = do + location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships - case lookupRelationship relId rels of + case lookupRelationship location relId rels of Just target -> do case findAttr (elemName ns "w" "anchor") element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -699,7 +710,7 @@ elemToRun ns element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupFootnote fnId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] elemToRun ns element @@ -708,7 +719,7 @@ elemToRun ns element , Just enId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupEndnote enId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] elemToRun ns element @@ -877,6 +888,8 @@ elemToRunElem ns element map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab + | isElem ns "w" "softHyphen" element = return SoftHyphen + | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem where diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index 8269ca88d..c93b40119 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -8,7 +8,6 @@ module Text.Pandoc.Readers.Docx.Reducible ( concatReduce import Text.Pandoc.Builder -import Data.Monoid import Data.List import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Sequence as Seq (null) diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index fb86f1286..79aa540f6 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry @@ -25,9 +26,7 @@ import System.FilePath ( takeFileName, (</>), dropFileName, normalise , dropFileName , splitFileName ) import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) -import Control.Applicative ((<$>)) import Control.Monad (guard, liftM, when) -import Data.Monoid (mempty, (<>)) import Data.List (isPrefixOf, isInfixOf) import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Map as M (Map, lookup, fromList, elems) @@ -181,7 +180,6 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = (walk $ renameImages root) - . (walk normalisePath) . (walk $ fixBlockIRs filename) . (walk $ fixInlineIRs filename) where @@ -196,12 +194,6 @@ fixInlineIRs s (Link attr t ('#':url, tit)) = Link attr t (addHash s url, tit) fixInlineIRs _ v = v -normalisePath :: Inline -> Inline -normalisePath (Link attr t (url, tit)) = - let (path, uid) = span (/= '#') url in - Link attr t (takeFileName path ++ uid, tit) -normalisePath s = s - prependHash :: [String] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = @@ -223,7 +215,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP addHash :: String -> String -> String addHash _ "" = "" -addHash s ident = s ++ "#" ++ ident +addHash s ident = takeFileName s ++ "#" ++ ident removeEPUBAttrs :: [(String, String)] -> [(String, String)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5a93e0d5b..85e9a0743 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,13 +50,14 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk +import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) -import Control.Applicative ( (<$>), (<$), (<*), (*>), (<|>)) -import Data.Monoid (mconcat, Monoid, mempty, (<>), First (..)) +import Control.Applicative ( (<|>) ) +import Data.Monoid (First (..)) import Text.Printf (printf) import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) @@ -64,7 +65,8 @@ import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error - +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Parsec.Error @@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) - "source" tags + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -100,7 +103,9 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String + baseHref :: Maybe String, + identifiers :: [String], + headerMap :: M.Map Inlines String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -252,6 +257,22 @@ pListItem nonItem = do let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) (liDiv <>) <$> pInTags "li" block <* skipMany nonItem +parseListStyleType :: String -> ListNumberStyle +parseListStyleType "lower-roman" = LowerRoman +parseListStyleType "upper-roman" = UpperRoman +parseListStyleType "lower-alpha" = LowerAlpha +parseListStyleType "upper-alpha" = UpperAlpha +parseListStyleType "decimal" = Decimal +parseListStyleType _ = DefaultStyle + +parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr "i" = LowerRoman +parseTypeAttr "I" = UpperRoman +parseTypeAttr "a" = LowerAlpha +parseTypeAttr "A" = UpperAlpha +parseTypeAttr "1" = Decimal +parseTypeAttr _ = DefaultStyle + pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) @@ -261,23 +282,19 @@ pOrderedList = try $ do sta' = if all isDigit sta then read sta else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> - case lookup "type" attribs of - Just "1" -> Decimal - Just "I" -> UpperRoman - Just "i" -> LowerRoman - Just "A" -> UpperAlpha - Just "a" -> LowerAlpha - _ -> DefaultStyle + + pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"] + + typeAttr = fromMaybe "" $ lookup "type" attribs + classAttr = fromMaybe "" $ lookup "class" attribs + styleAttr = fromMaybe "" $ lookup "style" attribs + listStyle = fromMaybe "" $ pickListStyle styleAttr + + sty' = foldOrElse DefaultStyle + [ parseTypeAttr typeAttr + , parseListStyleType classAttr + , parseListStyleType listStyle + ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -330,9 +347,16 @@ pRawTag = do pDiv :: TagParser Blocks pDiv = try $ do guardEnabled Ext_native_divs - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) - contents <- pInTags "div" block - return $ B.divWith (mkAttr attr) contents + let isDivLike "div" = True + isDivLike "section" = True + isDivLike _ = False + TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + contents <- pInTags tag block + let (ident, classes, kvs) = mkAttr attr + let classes' = if tag == "section" + then "section":classes + else classes + return $ B.divWith (ident, classes', kvs) contents pRawHtmlBlock :: TagParser Blocks pRawHtmlBlock = do @@ -385,9 +409,10 @@ pHeader = try $ do let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] + attr' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith (ident, classes, keyvals) level contents + else B.headerWith attr' level contents pHrule :: TagParser Blocks pHrule = do @@ -587,7 +612,7 @@ pLink = try $ do let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.linkWith (escapeURI url) title (uid, cls, []) lab + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -605,7 +630,7 @@ pImage = do "" -> [] v -> [(k, v)] let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (escapeURI url) title (uid, cls, kvs) (B.text alt) + return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: TagParser Inlines pCode = try $ do @@ -618,12 +643,11 @@ pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - let attr' = mkAttr attr - return $ case attr' of - ("",[],[("style",s)]) - | filter (`notElem` " \t;") s == "font-variant:small-caps" -> - B.smallcaps contents - _ -> B.spanWith (mkAttr attr) contents + let isSmallCaps = fontVariant == "small-caps" + where styleAttr = fromMaybe "" $ lookup "style" attr + fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr + let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) + return $ tag contents pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do @@ -920,6 +944,7 @@ htmlTag f = try $ do parseOptions{ optTagWarning = True } inp guard $ f next case next of + TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar @@ -967,6 +992,14 @@ isSpace _ = False -- Instances +instance HasIdentifierList HTMLState where + extractIdentifierList = identifiers + updateIdentifierList f s = s{ identifiers = f (identifiers s) } + +instance HasHeaderMap HTMLState where + extractHeaderMap = headerMap + updateHeaderMap f s = s{ headerMap = f (headerMap s) } + -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m instance HasQuoteContext st (Reader HTMLLocal) where @@ -976,9 +1009,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState -instance Default HTMLState where - def = HTMLState def [] Nothing - instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} deleteMeta s st = st {parserState = deleteMeta s $ parserState st} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index aa2534afc..16f3d7ef3 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -16,8 +16,8 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Shared (trim, splitBy) -import Data.Monoid import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index def429232..673deba14 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -46,8 +46,7 @@ import Data.Char ( chr, ord, isLetter, isAlphaNum ) import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder -import Control.Applicative -import Data.Monoid +import Control.Applicative ((<|>), many, optional) import Data.Maybe (fromMaybe, maybeToList) import System.Environment (getEnv) import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) @@ -171,17 +170,23 @@ quoted' f starter ender = do try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs doubleQuote :: LP Inlines -doubleQuote = - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +doubleQuote = do + smart <- getOption readerSmart + if smart + then quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + else str <$> many1 (oneOf "`'“”\"") singleQuote :: LP Inlines -singleQuote = - quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) +singleQuote = do + smart <- getOption readerSmart + if smart + then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + else str <$> many1 (oneOf "`\'‘’") inline :: LP Inlines inline = (mempty <$ comment) @@ -235,7 +240,9 @@ blocks = mconcat <$> many block getRawCommand :: String -> LP String getRawCommand name' = do - rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + rawargs <- withRaw (many (try (optional sp *> opt)) *> + option "" (try (optional sp *> dimenarg)) *> + many braced) return $ '\\' : name' ++ snd rawargs lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v @@ -528,6 +535,7 @@ inlineCommands = M.fromList $ mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) + , ("Cite", citation "cite" AuthorInText False) , ("citep", citation "citep" NormalCitation False) , ("citep*", citation "citep*" NormalCitation False) , ("citeal", citation "citeal" NormalCitation False) @@ -597,8 +605,8 @@ mkImage options src = do case takeExtension src of "" -> do defaultExt <- getOption readerDefaultImageExtension - return $ imageWith (addExtension src defaultExt) "" attr alt - _ -> return $ imageWith src "" attr alt + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt inNote :: Inlines -> Inlines inNote ils = @@ -824,10 +832,10 @@ tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar opt :: LP Inlines -opt = bracketed inline <* optional sp +opt = bracketed inline skipopts :: LP () -skipopts = skipMany opt +skipopts = skipMany (opt *> optional sp) inlineText :: LP Inlines inlineText = str <$> many1 inlineChar @@ -893,7 +901,7 @@ verbatimEnv' = fmap snd <$> string "\\begin" name <- braced' guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", - "minted", "alltt"] + "minted", "alltt", "comment"] manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") blob' :: IncludeParser @@ -1030,6 +1038,8 @@ environments = M.fromList , ("figure", env "figure" $ resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) , ("tabular*", env "tabular" $ simpTable True) @@ -1044,6 +1054,7 @@ environments = M.fromList , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ebca7e83d..fd16a5f75 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,7 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -47,7 +48,7 @@ import qualified Data.HashMap.Strict as H import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) @@ -55,8 +56,6 @@ import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup @@ -64,6 +63,7 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState @@ -328,23 +328,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -506,9 +505,15 @@ block = do header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader <?> "header" +atxChar :: MarkdownParser Char +atxChar = do + exts <- getOption readerExtensions + return $ if Set.member Ext_literate_haskell exts + then '=' else '#' + atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- atxChar >>= many1 . char >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces @@ -524,7 +529,7 @@ atxClosing :: MarkdownParser Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) - skipMany (char '#') + skipMany . char =<< atxChar skipSpaces attr <- option attr' (guardEnabled Ext_header_attributes >> attributes) @@ -636,7 +641,11 @@ keyValAttr = try $ do val <- enclosed (char '"') (char '"') litChar <|> enclosed (char '\'') (char '\'') litChar <|> many (escapedChar' <|> noneOf " \t\n\r}") - return $ \(id',cs,kvs) -> (id',cs,kvs ++ [(key,val)]) + return $ \(id',cs,kvs) -> + case key of + "id" -> (val,cs,kvs) + "class" -> (id',cs ++ words val,kvs) + _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: MarkdownParser (Attr -> Attr) specialAttr = do @@ -1316,7 +1325,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1326,14 +1335,22 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + nonindentSpaces + lookAhead nonspaceChar + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + (lines', rawRows) <- unzip <$> many (withRaw pipeTableRow) + let maxlength = maximum $ map length rawRows + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1343,7 +1360,7 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do - nonindentSpaces + skipMany spaceChar openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) @@ -1362,19 +1379,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () @@ -1453,6 +1471,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1652,7 +1671,7 @@ endline = try $ do notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart - guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser @@ -1705,16 +1724,16 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Attr -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ guardEnabled Ext_common_link_attributes >> attributes - return $ constructor src tit attr <$> lab + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Attr -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1743,10 +1762,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just ((src, tit), _) -> constructor src tit nullAttr <$> lab + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab Nothing -> makeFallback else makeFallback - Just ((src,tit), attr) -> constructor src tit attr <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1780,9 +1799,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.imageWith (addExtension src defaultExt) - _ -> B.imageWith src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1886,6 +1905,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 6f7da2586..24b3f5c7e 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -38,15 +38,14 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Walk ( walk ) import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) -import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad import Data.List (intersperse, intercalate, isPrefixOf ) import Text.HTML.TagSoup @@ -252,8 +251,8 @@ parseAttr = try $ do skipMany spaceChar k <- many1 letter char '=' - char '"' - v <- many1Till (satisfy (/='\n')) (char '"') + v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) + <|> many1 nonspaceChar return (k,v) tableStart :: MWParser () @@ -588,7 +587,7 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith fname ("fig:" ++ stringify caption) attr caption + return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String imageOption = try $ char '|' *> opt diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 94ea9e3a2..4ec164e19 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -34,7 +34,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Error -import Control.Applicative -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 19ddba36b..b2e5f2e67 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -9,9 +9,7 @@ import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Generics -import Data.Monoid import Control.Monad.State -import Control.Applicative ((<$>), (<$)) import Data.Default import Text.Pandoc.Compat.Except import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 1c8ec51bc..a925c1d84 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -36,7 +36,6 @@ import Codec.Archive.Zip import qualified Text.XML.Light as XML import qualified Data.ByteString.Lazy as B -import Data.Monoid ( mempty ) import Text.Pandoc.Definition import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 310ca028e..30f96c557 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -44,9 +44,9 @@ import qualified Control.Category as Cat import Control.Arrow import Control.Monad -import Data.Monoid import Data.Foldable +import Text.Pandoc.Compat.Monoid import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index 9710973b3..8c9ee0539 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -42,12 +42,11 @@ module Text.Pandoc.Readers.Odt.Arrows.Utils where import Control.Arrow import Control.Monad ( join, MonadPlus(..) ) -import Data.Monoid import qualified Data.Foldable as F import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils - +import Text.Pandoc.Compat.Monoid and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') and2 = (&&&) @@ -130,24 +129,23 @@ joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z joinOn = arr.uncurry -- | Applies a function to the uncurried result-pair of an arrow-application. --- (The §-symbol was chosen to evoke an association with pairs through the --- shared first character) -(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>§ f = a >>^ uncurry f +-- (The %-symbol was chosen to evoke an association with pairs.) +(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>% f = a >>^ uncurry f --- | '(>>§)' with its arguments flipped -(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(§<<) = flip (>>§) +-- | '(>>%)' with its arguments flipped +(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(%<<) = flip (>>%) -- | Precomposition with an uncurried function -(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f §>> a = uncurry f ^>> a +(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f %>> a = uncurry f ^>> a -- | Precomposition with an uncurried function (right to left variant) -(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<§) = flip (§>>) +(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<%) = flip (%>>) -infixr 2 >>§, §<<, §>>, <<§ +infixr 2 >>%, %<<, %>>, <<% -- | Duplicate a value and apply an arrow to the second instance. @@ -272,7 +270,7 @@ newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend -- | Evaluates a collection of arrows in a parallel fashion. -- @@ -434,29 +432,29 @@ a ^>>?^? f = a ^>> Left ^|||^ f a >>?! f = a >>> right f --- -(>>?§) :: (ArrowChoice a, Monoid f) +(>>?%) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> c) -> FallibleArrow a x f c -a >>?§ f = a >>?^ (uncurry f) +a >>?% f = a >>?^ (uncurry f) --- -(^>>?§) :: (ArrowChoice a, Monoid f) +(^>>?%) :: (ArrowChoice a, Monoid f) => (x -> Either f (b,b')) -> (b -> b' -> c) -> FallibleArrow a x f c -a ^>>?§ f = arr a >>?^ (uncurry f) +a ^>>?% f = arr a >>?^ (uncurry f) --- -(>>?§?) :: (ArrowChoice a, Monoid f) +(>>?%?) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> (Either f c)) -> FallibleArrow a x f c -a >>?§? f = a >>?^? (uncurry f) +a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! -infixr 1 >>?§, ^>>?§, >>?§? +infixr 1 >>?%, ^>>?%, >>?%? -- | Keep values that are Right, replace Left values by a constant. ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 9bb585b8e..1f1c57646 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -44,7 +44,6 @@ import Control.Applicative hiding ( liftA, liftA2, liftA3 ) import qualified Data.Map as M import Data.List ( find ) -import Data.Monoid import Data.Maybe import qualified Text.XML.Light as XML @@ -146,7 +145,7 @@ type OdtReaderSafe a b = XMLReaderSafe ReaderState a b fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b fromStyles f = keepingTheValue (getExtraState >>^ styleSet) - >>§ f + >>% f -- getStyleByName :: OdtReader StyleName Style @@ -163,7 +162,7 @@ lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice -- switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) switchCurrentListStyle = keepingTheValue getExtraState - >>§ swapCurrentListStyle + >>% swapCurrentListStyle >>> first setExtraState >>^ snd @@ -171,7 +170,7 @@ switchCurrentListStyle = keepingTheValue getExtraState pushStyle :: OdtReaderSafe Style Style pushStyle = keepingTheValue ( ( keepingTheValue getExtraState - >>§ pushStyle' + >>% pushStyle' ) >>> setExtraState ) @@ -471,7 +470,7 @@ matchingElement :: (Monoid e) matchingElement ns name reader = (ns, name, asResultAccumulator reader) where asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>) -- matchChildContent' :: (Monoid result) @@ -498,14 +497,14 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -- -- | Open Document allows several consecutive spaces if they are marked up read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>§ recover +read_plain_text = fst ^&&& read_plain_text' >>% recover where -- fallible version read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines read_plain_text' = ( second ( arr extractText ) >>^ spreadChoice >>?! second text ) - >>?§ (<>) + >>?% (<>) -- extractText :: XML.Content -> Fallible String extractText (XML.Text cData) = succeedWith (XML.cdData cData) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 5922164c9..d0fdc228f 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -41,9 +41,8 @@ module Text.Pandoc.Readers.Odt.Generic.Fallible where import Control.Applicative import Control.Monad - +import Text.Pandoc.Compat.Monoid ((<>)) import qualified Data.Foldable as F -import Data.Monoid -- | Default for now. Will probably become a class at some point. type Failure = () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ec7e0ea5e..8c03d1a09 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -123,7 +123,6 @@ import Control.Arrow import qualified Data.Map as M import qualified Data.Foldable as F import Data.Default -import Data.Monoid ( Monoid ) import Data.Maybe import qualified Text.XML.Light as XML @@ -332,7 +331,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA where setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?§ flip replaceExtraState + >>^ spreadChoice >>?% flip replaceExtraState -- | First sets the extra state to the new value. Then produces a new -- extra state with a converter that uses the new state. Finally, the @@ -414,14 +413,14 @@ elemName :: (NameSpaceID nsID) -> XMLConverter nsID extraState x XML.QName elemName nsID name = lookupNSiri nsID &&& lookupNSprefix nsID - >>§ XML.QName name + >>% XML.QName name -- | Checks if a given element matches both a specified namespace id -- and a specified element name elemNameIs :: (NameSpaceID nsID) => nsID -> ElementName -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName where hasThatName e iri = let elName = XML.elName e in XML.qName elName == name && XML.qURI elName == iri @@ -462,8 +461,8 @@ currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> (XML.qName >>^ (&&).(== name) ) ^&&&^ (XML.qIRI >>^ (==) ) - ) >>§ (.) - ) &&& lookupNSiri nsID >>§ ($) + ) >>% (.) + ) &&& lookupNSiri nsID >>% ($) -} -- @@ -488,7 +487,7 @@ findChildren :: (NameSpaceID nsID) -> XMLConverter nsID extraState x [XML.Element] findChildren nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChildren + >>% XML.findChildren -- filterChildren :: (XML.Element -> Bool) @@ -509,7 +508,7 @@ findChild' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe XML.Element) findChild' nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChild + >>% XML.findChild -- findChild :: (NameSpaceID nsID) @@ -597,7 +596,7 @@ isThatTheAttrValue :: (NameSpaceID nsID) isThatTheAttrValue nsID attrName = keepingTheValue (findAttr nsID attrName) - >>§ right.(==) + >>% right.(==) -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary @@ -670,7 +669,7 @@ findAttr' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe AttributeValue) findAttr' nsID attrName = elemName nsID attrName &&& getCurrentElement - >>§ XML.findAttr + >>% XML.findAttr -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -788,7 +787,7 @@ prepareIteration :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [(b, XML.Element)] prepareIteration nsID name = keepingTheValue (findChildren nsID name) - >>§ distributeValue + >>% distributeValue -- | Applies a converter to every child element of a specific type. -- Collects results in a 'Monoid'. @@ -878,9 +877,9 @@ makeMatcherE nsID name c = ( second ( elemNameIs nsID name >>^ bool Nothing (Just tryC) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd -- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. @@ -900,14 +899,14 @@ makeMatcherC nsID name c = ( second ( contentToElem >>^ bool Nothing (Just cWithJump) ) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd where cWithJump = ( fst ^&&& ( second contentToElem >>> spreadChoice ^>>? executeThere c ) - >>§ recover) + >>% recover) &&&^ snd contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element contentToElem = arr $ \e -> case e of diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index e28056814..deb009998 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -107,4 +107,4 @@ nsIDs = [ ("http://www.w3.org/1999/xhtml" , NsXHtml ), ("http://www.w3.org/2002/xforms" , NsXForms ), ("http://www.w3.org/1999/xlink" , NsXLink ) - ]
\ No newline at end of file + ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 1cf87cc59..96cfed0b3 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -78,7 +78,6 @@ import qualified Data.Map as M import qualified Data.Set as S import Data.List ( unfoldr ) import Data.Default -import Data.Monoid import Data.Maybe import qualified Text.XML.Light as XML @@ -175,7 +174,7 @@ findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) findPitch = ( lookupAttr NsStyle "font-pitch" `ifFailedDo` findAttr NsStyle "font-name" >>? ( keepingTheValue getExtraState - >>§ M.lookup + >>% M.lookup >>^ maybeToChoice ) ) @@ -362,11 +361,11 @@ instance Read XslUnit where estimateInMillimeter :: Int -> XslUnit -> Int estimateInMillimeter n XslUnitMM = n estimateInMillimeter n XslUnitCM = n * 10 -estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4 -estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4 -estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4 -estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4 -estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4 +estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4 +estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4 +estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4 ---- @@ -385,7 +384,7 @@ getListLevelStyle level ListStyle{..} = let (lower , exactHit , _) = M.splitLookup level levelStyles in exactHit <|> fmap fst (M.maxView lower) -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] - -- ^ simpler, but in general less efficient + -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType , listItemPrefix :: Maybe String @@ -448,7 +447,7 @@ readAllStyles :: StyleReader _x Styles readAllStyles = ( readFontPitches >>?! ( readAutomaticStyles &&& readStyles )) - >>?§? chooseMax + >>?%? chooseMax -- all top elements are always on the same hierarchy level -- diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 980f63504..3be47cfd4 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de> +Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,19 +21,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014 Albert Krewinkel + Copyright : Copyright (C) 2014-2015 Albert Krewinkel License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) - , trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), + trimInlines ) import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -45,8 +46,6 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( Applicative, pure - , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks, local) @@ -55,7 +54,6 @@ import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) import Text.Pandoc.Error @@ -70,6 +68,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +instance HasIdentifierList OrgParserState where + extractIdentifierList = orgStateIdentifiers + updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } + +instance HasHeaderMap OrgParserState where + extractHeaderMap = orgStateHeaderMap + updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } + parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks @@ -135,6 +141,9 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateParserContext :: ParserContext + , orgStateIdentifiers :: [String] + , orgStateHeaderMap :: M.Map Inlines String } instance Default OrgParserLocal where @@ -174,6 +183,9 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateParserContext = NullState + , orgStateIdentifiers = [] + , orgStateHeaderMap = M.empty } recordAnchorId :: String -> OrgParser () @@ -282,6 +294,23 @@ blanklines = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do + ctx <- orgStateParserContext <$> getState + guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context + -> OrgParser a -- ^ Parser to run in that context + -> OrgParser a +withContext context parser = do + oldContext <- orgStateParserContext <$> getState + updateState $ \s -> s{ orgStateParserContext = context } + result <- parser + updateState $ \s -> s{ orgStateParserContext = oldContext } + return result + -- -- parsing blocks -- @@ -397,7 +426,7 @@ verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines content) + <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) exportsCode :: [(String, String)] -> Bool exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs @@ -504,10 +533,16 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgParamValue +blockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgParamValue + return (argKey, paramValue) inlineBlockOption :: OrgParser (String, String) -inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue +inlineBlockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgInlineParamValue + return (argKey, paramValue) orgArgKey :: OrgParser String orgArgKey = try $ @@ -516,11 +551,17 @@ orgArgKey = try $ orgParamValue :: OrgParser String orgParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':' ) + *> many1 (noneOf "\t\n\r ") + <* skipSpaces orgInlineParamValue :: OrgParser String orgInlineParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':') + *> many1 (noneOf "\t\n\r ]") + <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -668,7 +709,10 @@ header = try $ do title <- manyTill inline (lookAhead headerEnd) tags <- headerEnd let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags - return $ B.header level <$> inlns + st <- getState + let inlines = runF inlns st + attr <- registerHeader nullAttr inlines + return $ pure (B.headerWith attr level inlines) where tagToInlineF :: String -> F Inlines tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty @@ -687,7 +731,7 @@ headerTags = try $ headerStart :: OrgParser Int headerStart = try $ - (length <$> many1 (char '*')) <* many1 (char ' ') + (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos -- Don't use (or need) the reader wrapper here, we want hline to be @@ -879,9 +923,13 @@ noteBlock = try $ do paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do ils <- parseInlines - nl <- option False (newline >> return True) - try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) + nl <- option False (newline *> return True) + -- Read block as paragraph, except if we are in a list context and the block + -- is directly followed by a list item, in which case the block is read as + -- plain text. + try (guard nl + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) + *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) inlinesTillNewline :: OrgParser (F Inlines) @@ -946,19 +994,22 @@ definitionListItem :: OrgParser Int -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try $ string "::") + term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' + where + definitionMarker = + spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser (F Blocks) -listItem start = try $ do +listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) @@ -1537,8 +1588,11 @@ smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, dash, ellipses]) - where orgApostrophe = + choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) + where + orgDash = dash <* updatePositions '-' + orgEllipses = ellipses <* updatePositions '.' + orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos *> return (B.str "\x2019") @@ -1546,9 +1600,10 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do singleQuoteStart + updatePositions '\'' withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd + many1Till inline (singleQuoteEnd <* updatePositions '\'') -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote @@ -1556,6 +1611,7 @@ singleQuoted = try $ do doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart + updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4138d65ea..0e5bb2a87 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -44,13 +44,11 @@ import Data.List ( findIndex, intersperse, intercalate, import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure) -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B -import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) - +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. @@ -614,20 +612,22 @@ directive' = do return mempty -- TODO: --- - Silently ignores illegal fields -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - let (baseRole, baseFmt, baseAttr) = - maybe (parentRole, Nothing, nullAttr) id $ - M.lookup parentRole customRoles + let getBaseRole (r, f, a) roles = + case M.lookup r roles of + Just (r', f', a') -> getBaseRole (r', f', a') roles + Nothing -> (r, f, a) + (baseRole, baseFmt, baseAttr) = + getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt annotate :: [String] -> [String] annotate = maybe id (:) $ - if parentRole == "code" + if baseRole == "code" then lookup "language" fields else Nothing attr = let (ident, classes, keyValues) = baseAttr @@ -636,12 +636,12 @@ addNewRole roleString fields = do -- warn about syntax we ignore flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "language" -> when (baseRole /= "code") $ addWarning Nothing $ "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :code:" - "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + role ++ ": is :" ++ baseRole ++ ": not :code:" + "format" -> when (baseRole /= "raw") $ addWarning Nothing $ "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :raw:" + role ++ ": is :" ++ baseRole ++ ": not :raw:" _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ ": in definition of role :" ++ role ++ ": in" when (parentRole == "raw" && countKeys "format" > 1) $ @@ -1138,7 +1138,7 @@ referenceLink = try $ do Just val -> return val -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ B.linkWith src tit attr label' + return $ B.linkWith attr src tit label' autoURI :: RSTParser Inlines autoURI = do diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 07b414431..fc2bdc069 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -38,8 +38,6 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Data.Monoid (Monoid, mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad import Text.Printf (printf) import Debug.Trace (trace) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ec1da896d..3db01faf4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -64,9 +64,8 @@ import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM, when ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Printf -import Control.Applicative ((<$>), (*>), (<*), (<$)) -import Data.Monoid import Debug.Trace (trace) import Text.Pandoc.Error @@ -81,11 +80,12 @@ readTextile opts s = -- | Generate a Pandoc ADT from a textile document parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default + -- textile allows raw HTML and does smart punctuation by default, + -- but we do not enable smart punctuation unless it is explicitly + -- asked for, for better conversion to other light markup formats oldOpts <- stateOptions `fmap` getState updateState $ \state -> state{ stateOptions = - oldOpts{ readerSmart = True - , readerParseRaw = True + oldOpts{ readerParseRaw = True , readerOldDashes = True } } many blankline @@ -535,6 +535,7 @@ link = try $ do image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space + _ <- attributes -- ignore for now, until we have image attributes src <- manyTill anyChar' (lookAhead $ oneOf "!(") alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')'))) char '!' diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 304d6d4c5..58841f2ce 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,17 +33,15 @@ module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, (<>) - , trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) -import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>)) import Data.Char (toLower) import Data.List (transpose, intersperse, intercalate) import Data.Maybe (fromMaybe) -import Data.Monoid (Monoid, mconcat, mempty, mappend) --import Network.URI (isURI) -- Not sure whether to use this function import Control.Monad (void, guard, when) import Data.Default @@ -53,7 +51,7 @@ import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) import Data.Time.Format (formatTime) -import Text.Pandoc.Compat.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Time (defaultTimeLocale) import System.IO.Error (catchIOError) type T2T = ParserT String ParserState (Reader T2TMeta) |