aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs53
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs10
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs77
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs1
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs12
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs106
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs45
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs110
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs11
-rw-r--r--src/Text/Pandoc/Readers/Native.hs1
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs1
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs42
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs13
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs29
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs17
-rw-r--r--src/Text/Pandoc/Readers/Org.hs102
-rw-r--r--src/Text/Pandoc/Readers/RST.hs28
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs11
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
25 files changed, 425 insertions, 265 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index db438e26d..e8fe92e27 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -8,16 +8,15 @@ import Text.XML.Light
import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
import Data.Either (rights)
import Data.Generics
-import Data.Monoid
import Data.Char (isSpace)
import Control.Monad.State
-import Control.Applicative ((<$>))
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.Error (PandocError)
import Text.Pandoc.Compat.Except
import Data.Default
+import Data.Foldable (asum)
{-
@@ -194,7 +193,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] indexterm - A wrapper for terms to be indexed
[x] info - A wrapper for information about a component or other block. (DocBook v5)
[x] informalequation - A displayed mathematical equation without a title
-[ ] informalexample - A displayed example without a title
+[x] informalexample - A displayed example without a title
[ ] informalfigure - A untitled figure
[ ] informaltable - A table without a title
[ ] initializer - The initializer for a FieldSynopsis
@@ -498,7 +497,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] warning - An admonition set off from the text
[x] wordasword - A word meant specifically as a word and not representing
anything else
-[ ] xref - A cross reference to another part of the document
+[x] xref - A cross reference to another part of the document
[ ] year - The year of publication of a document
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
@@ -511,6 +510,7 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbAcceptsMeta :: Bool
, dbBook :: Bool
, dbFigureTitle :: Inlines
+ , dbContent :: [Content]
} deriving Show
instance Default DBState where
@@ -519,13 +519,14 @@ instance Default DBState where
, dbMeta = mempty
, dbAcceptsMeta = False
, dbBook = False
- , dbFigureTitle = mempty }
+ , dbFigureTitle = mempty
+ , dbContent = [] }
readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
- where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp'
- inp' = handleInstructions inp
+ where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree
+ tree = normalizeTree . parseXML . handleInstructions $ inp
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
@@ -611,6 +612,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
"important","caution","note","tip","warning","qandadiv",
"question","answer","abstract","itemizedlist","orderedlist",
"variablelist","article","book","table","informaltable",
+ "informalexample",
"screen","programlisting","example","calloutlist"]
isBlockElement _ = False
@@ -656,7 +658,7 @@ getMediaobject e = do
let (caption, title) = if isNull figTitle
then (getCaption e, "")
else (return figTitle, "fig:")
- liftM (imageWith imageUrl title attr) caption
+ liftM (imageWith attr imageUrl title) caption
getBlocks :: Element -> DB Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -775,6 +777,8 @@ parseBlock (Elem e) =
"book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
"table" -> parseTable
"informaltable" -> parseTable
+ "informalexample" -> divWith ("", ["informalexample"], []) <$>
+ getBlocks e
"literallayout" -> codeBlockWithLang
"screen" -> codeBlockWithLang
"programlisting" -> codeBlockWithLang
@@ -956,7 +960,13 @@ parseInline (Elem e) =
"keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
"menuchoice" -> menuchoice <$> (mapM parseInline $
filter isGuiMenu $ elContent e)
- "xref" -> return $ str "?" -- so at least you know something is there
+ "xref" -> do
+ content <- dbContent <$> get
+ let linkend = attrValue "linkend" e
+ let title = case attrValue "endterm" e of
+ "" -> maybe "???" xrefTitleByElem (findElementById linkend content)
+ endterm -> maybe "???" strContent (findElementById endterm content)
+ return $ link ('#' : linkend) "" (singleton (Str title))
"email" -> return $ link ("mailto:" ++ strContent e) ""
$ str $ strContent e
"uri" -> return $ link (strContent e) "" $ str $ strContent e
@@ -968,7 +978,7 @@ parseInline (Elem e) =
_ -> ('#' : attrValue "linkend" e)
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, words $ attrValue "role" e, [])
- return $ linkWith href "" attr ils'
+ return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
"bold" -> strong <$> innerInlines
@@ -1018,3 +1028,26 @@ parseInline (Elem e) =
isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x ||
named "guimenuitem" x
isGuiMenu _ = False
+
+ findElementById idString content
+ = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content]
+
+ -- Use the 'xreflabel' attribute for getting the title of a xref link;
+ -- if there's no such attribute, employ some heuristics based on what
+ -- docbook-xsl does.
+ xrefTitleByElem el
+ | not (null xrefLabel) = xrefLabel
+ | otherwise = case qName (elName el) of
+ "chapter" -> descendantContent "title" el
+ "sect1" -> descendantContent "title" el
+ "sect2" -> descendantContent "title" el
+ "sect3" -> descendantContent "title" el
+ "sect4" -> descendantContent "title" el
+ "sect5" -> descendantContent "title" el
+ "cmdsynopsis" -> descendantContent "command" el
+ "funcsynopsis" -> descendantContent "function" el
+ _ -> qName (elName el) ++ "_title"
+ where
+ xrefLabel = attrValue "xreflabel" el
+ descendantContent name = maybe "???" strContent
+ . findElement (QName name Nothing Nothing)
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index b80280553..439e2d3e4 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -85,14 +85,12 @@ import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.List (delete, (\\), intersect)
-import Data.Monoid
import Text.TeXMath (writeTeX)
import Data.Default (Default)
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
-import Control.Applicative ((<$>))
import Data.Sequence (ViewL(..), viewl)
import qualified Data.Sequence as Seq (null)
@@ -206,11 +204,15 @@ runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak
runElemToInlines (Tab) = space
+runElemToInlines (SoftHyphen) = text "\xad"
+runElemToInlines (NoBreakHyphen) = text "\x2011"
runElemToString :: RunElem -> String
runElemToString (TextRun s) = s
runElemToString (LnBrk) = ['\n']
runElemToString (Tab) = ['\t']
+runElemToString (SoftHyphen) = ['\xad']
+runElemToString (NoBreakHyphen) = ['\x2011']
runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems
@@ -501,6 +503,10 @@ bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do
]
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
+bodyPartToBlocks (DummyListItem pPr _ parparts) =
+ let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)}
+ in
+ bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
return $ para mempty
bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index cce80fb48..5910a476b 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -59,7 +59,7 @@ import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import qualified Text.Pandoc.UTF8 as UTF8
import Control.Monad.Reader
-import Control.Applicative ((<$>), (<|>))
+import Control.Applicative ((<|>))
import qualified Data.Map as M
import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
@@ -75,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
, envParStyles :: ParStyleMap
+ , envLocation :: DocumentLocation
}
deriving Show
@@ -87,7 +88,7 @@ instance Error DocxError where
type D = ExceptT DocxError (Reader ReaderEnv)
runD :: D a -> ReaderEnv -> Either DocxError a
-runD dx re = runReader (runExceptT dx ) re
+runD dx re = runReader (runExceptT dx) re
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
@@ -140,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level]
-- (ilvl, format, string, start)
type Level = (String, String, String, Maybe Integer)
-data Relationship = Relationship (RelId, Target)
+data DocumentLocation = InDocument | InFootnote | InEndnote
+ deriving (Eq,Show)
+
+data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
@@ -174,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
| ListItem ParagraphStyle String String Level [ParPart]
+ | DummyListItem ParagraphStyle String [ParPart]
| Tbl String TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -208,7 +213,7 @@ data Run = Run RunStyle [RunElem]
| InlineDrawing FilePath B.ByteString
deriving Show
-data RunElem = TextRun String | LnBrk | Tab
+data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
data VertAlign = BaseLn | SupScrpt | SubScrpt
@@ -238,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing
, rUnderline = Nothing
, rStyle = Nothing}
-
type Target = String
type Anchor = String
type URL = String
@@ -255,7 +259,8 @@ archiveToDocx archive = do
rels = archiveToRelationships archive
media = archiveToMedia archive
(styles, parstyles) = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
+ rEnv =
+ ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -362,29 +367,30 @@ archiveToNotes zf =
in
Notes ns fn en
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
- let (dir, name) = splitFileName fp
- in
- (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+filePathToRelType :: FilePath -> Maybe DocumentLocation
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
+relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
+relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
relId <- findAttr (QName "Id" Nothing Nothing) element
target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-
-
+ return $ Relationship relType relId target
+relElemToRelationship _ _ = Nothing
+
+filePathToRelationships :: Archive -> FilePath -> [Relationship]
+filePathToRelationships ar fp | Just relType <- filePathToRelType fp
+ , Just entry <- findEntryByPath fp ar
+ , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+ mapMaybe (relElemToRelationship relType) $ elChildren relElems
+filePathToRelationships _ _ = []
+
archiveToRelationships :: Archive -> [Relationship]
archiveToRelationships archive =
- let relPaths = filter filePathIsRel (filesInArchive archive)
- entries = mapMaybe (\f -> findEntryByPath f archive) relPaths
- relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
- rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems
- in
- rels
+ concatMap (filePathToRelationships archive) $ filesInArchive archive
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
@@ -409,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
return lvl
+
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element |
qName (elName element) == "num" &&
@@ -560,7 +567,7 @@ elemToBodyPart ns element
num <- asks envNumbering
case lookupLevel numId lvl num of
Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Nothing -> return $ DummyListItem parstyle lvl parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
sty <- asks envParStyles
@@ -573,7 +580,7 @@ elemToBodyPart ns element
Just levelInfo ->
return $ ListItem parstyle numId lvl levelInfo parparts
Nothing ->
- throwError WrongElem
+ return $ DummyListItem parstyle lvl parparts
Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
@@ -596,13 +603,16 @@ elemToBodyPart ns element
return $ Tbl caption grid tblLook rows
elemToBodyPart _ _ = throwError WrongElem
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
- lookup relid (map (\(Relationship pair) -> pair) rels)
+lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
+lookupRelationship docLocation relid rels =
+ lookup (docLocation, relid) pairs
+ where
+ pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
expandDrawingId :: String -> D (FilePath, B.ByteString)
expandDrawingId s = do
- target <- asks (lookupRelationship s . envRelationships)
+ location <- asks envLocation
+ target <- asks (lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -657,9 +667,10 @@ elemToParPart ns element
elemToParPart ns element
| isElem ns "w" "hyperlink" element
, Just relId <- findAttr (elemName ns "r" "id") element = do
+ location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
- case lookupRelationship relId rels of
+ case lookupRelationship location relId rels of
Just target -> do
case findAttr (elemName ns "w" "anchor") element of
Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
@@ -699,7 +710,7 @@ elemToRun ns element
, Just fnId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupFootnote fnId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Footnote bps
Nothing -> return $ Footnote []
elemToRun ns element
@@ -708,7 +719,7 @@ elemToRun ns element
, Just enId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupEndnote enId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Endnote bps
Nothing -> return $ Endnote []
elemToRun ns element
@@ -877,6 +888,8 @@ elemToRunElem ns element
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
+ | isElem ns "w" "softHyphen" element = return SoftHyphen
+ | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
| isElem ns "w" "sym" element = return (getSymChar ns element)
| otherwise = throwError WrongElem
where
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
index 8269ca88d..c93b40119 100644
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -8,7 +8,6 @@ module Text.Pandoc.Readers.Docx.Reducible ( concatReduce
import Text.Pandoc.Builder
-import Data.Monoid
import Data.List
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Sequence as Seq (null)
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index fb86f1286..79aa540f6 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.MIME (MimeType)
import qualified Text.Pandoc.Builder as B
import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
@@ -25,9 +26,7 @@ import System.FilePath ( takeFileName, (</>), dropFileName, normalise
, dropFileName
, splitFileName )
import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
-import Control.Applicative ((<$>))
import Control.Monad (guard, liftM, when)
-import Data.Monoid (mempty, (<>))
import Data.List (isPrefixOf, isInfixOf)
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Map as M (Map, lookup, fromList, elems)
@@ -181,7 +180,6 @@ getManifest archive = do
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
(walk $ renameImages root)
- . (walk normalisePath)
. (walk $ fixBlockIRs filename)
. (walk $ fixInlineIRs filename)
where
@@ -196,12 +194,6 @@ fixInlineIRs s (Link attr t ('#':url, tit)) =
Link attr t (addHash s url, tit)
fixInlineIRs _ v = v
-normalisePath :: Inline -> Inline
-normalisePath (Link attr t (url, tit)) =
- let (path, uid) = span (/= '#') url in
- Link attr t (takeFileName path ++ uid, tit)
-normalisePath s = s
-
prependHash :: [String] -> Inline -> Inline
prependHash ps l@(Link attr is (url, tit))
| or [s `isPrefixOf` url | s <- ps] =
@@ -223,7 +215,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP
addHash :: String -> String -> String
addHash _ "" = ""
-addHash s ident = s ++ "#" ++ ident
+addHash s ident = takeFileName s ++ "#" ++ ident
removeEPUBAttrs :: [(String, String)] -> [(String, String)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5a93e0d5b..85e9a0743 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,13 +50,14 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
+import qualified Data.Map as M
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero, void, unless )
import Control.Arrow ((***))
-import Control.Applicative ( (<$>), (<$), (<*), (*>), (<|>))
-import Data.Monoid (mconcat, Monoid, mempty, (<>), First (..))
+import Control.Applicative ( (<|>) )
+import Data.Monoid (First (..))
import Text.Printf (printf)
import Debug.Trace (trace)
import Text.TeXMath (readMathML, writeTeX)
@@ -64,7 +65,8 @@ import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Network.URI (isURI)
import Text.Pandoc.Error
-
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Parsec.Error
@@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
- runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
- "source" tags
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+ "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -100,7 +103,9 @@ data HTMLState =
HTMLState
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
- baseHref :: Maybe String
+ baseHref :: Maybe String,
+ identifiers :: [String],
+ headerMap :: M.Map Inlines String
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -252,6 +257,22 @@ pListItem nonItem = do
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
(liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal" = Decimal
+parseListStyleType _ = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _ = DefaultStyle
+
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +282,19 @@ pOrderedList = try $ do
sta' = if all isDigit sta
then read sta
else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ ->
- case lookup "type" attribs of
- Just "1" -> Decimal
- Just "I" -> UpperRoman
- Just "i" -> LowerRoman
- Just "A" -> UpperAlpha
- Just "a" -> LowerAlpha
- _ -> DefaultStyle
+
+ pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+ typeAttr = fromMaybe "" $ lookup "type" attribs
+ classAttr = fromMaybe "" $ lookup "class" attribs
+ styleAttr = fromMaybe "" $ lookup "style" attribs
+ listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+ sty' = foldOrElse DefaultStyle
+ [ parseTypeAttr typeAttr
+ , parseListStyleType classAttr
+ , parseListStyleType listStyle
+ ]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -330,9 +347,16 @@ pRawTag = do
pDiv :: TagParser Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
- TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
- contents <- pInTags "div" block
- return $ B.divWith (mkAttr attr) contents
+ let isDivLike "div" = True
+ isDivLike "section" = True
+ isDivLike _ = False
+ TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
+ contents <- pInTags tag block
+ let (ident, classes, kvs) = mkAttr attr
+ let classes' = if tag == "section"
+ then "section":classes
+ else classes
+ return $ B.divWith (ident, classes', kvs) contents
pRawHtmlBlock :: TagParser Blocks
pRawHtmlBlock = do
@@ -385,9 +409,10 @@ pHeader = try $ do
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
+ attr' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
then mempty -- skip a representation of the title in the body
- else B.headerWith (ident, classes, keyvals) level contents
+ else B.headerWith attr' level contents
pHrule :: TagParser Blocks
pHrule = do
@@ -587,7 +612,7 @@ pLink = try $ do
let uid = fromAttrib "id" tag
let cls = words $ fromAttrib "class" tag
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- return $ B.linkWith (escapeURI url) title (uid, cls, []) lab
+ return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
pImage :: TagParser Inlines
pImage = do
@@ -605,7 +630,7 @@ pImage = do
"" -> []
v -> [(k, v)]
let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
- return $ B.imageWith (escapeURI url) title (uid, cls, kvs) (B.text alt)
+ return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
pCode :: TagParser Inlines
pCode = try $ do
@@ -618,12 +643,11 @@ pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- let attr' = mkAttr attr
- return $ case attr' of
- ("",[],[("style",s)])
- | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
- B.smallcaps contents
- _ -> B.spanWith (mkAttr attr) contents
+ let isSmallCaps = fontVariant == "small-caps"
+ where styleAttr = fromMaybe "" $ lookup "style" attr
+ fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+ return $ tag contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
@@ -920,6 +944,7 @@ htmlTag f = try $ do
parseOptions{ optTagWarning = True } inp
guard $ f next
case next of
+ TagWarning _ -> fail "encountered TagWarning"
TagComment s
| "<!--" `isPrefixOf` inp -> do
count (length s + 4) anyChar
@@ -967,6 +992,14 @@ isSpace _ = False
-- Instances
+instance HasIdentifierList HTMLState where
+ extractIdentifierList = identifiers
+ updateIdentifierList f s = s{ identifiers = f (identifiers s) }
+
+instance HasHeaderMap HTMLState where
+ extractHeaderMap = headerMap
+ updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance HasQuoteContext st (Reader HTMLLocal) where
@@ -976,9 +1009,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where
instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
-instance Default HTMLState where
- def = HTMLState def [] Nothing
-
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index aa2534afc..16f3d7ef3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -16,8 +16,8 @@ module Text.Pandoc.Readers.Haddock
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Shared (trim, splitBy)
-import Data.Monoid
import Data.List (intersperse, stripPrefix)
import Data.Maybe (fromMaybe)
import Text.Pandoc.Definition
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index def429232..673deba14 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -46,8 +46,7 @@ import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
-import Control.Applicative
-import Data.Monoid
+import Control.Applicative ((<|>), many, optional)
import Data.Maybe (fromMaybe, maybeToList)
import System.Environment (getEnv)
import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
@@ -171,17 +170,23 @@ quoted' f starter ender = do
try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
doubleQuote :: LP Inlines
-doubleQuote =
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+doubleQuote = do
+ smart <- getOption readerSmart
+ if smart
+ then quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+ <|> quoted' doubleQuoted (string "“") (void $ char '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
+ <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+ else str <$> many1 (oneOf "`'“”\"")
singleQuote :: LP Inlines
-singleQuote =
- quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+singleQuote = do
+ smart <- getOption readerSmart
+ if smart
+ then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+ <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+ else str <$> many1 (oneOf "`\'‘’")
inline :: LP Inlines
inline = (mempty <$ comment)
@@ -235,7 +240,9 @@ blocks = mconcat <$> many block
getRawCommand :: String -> LP String
getRawCommand name' = do
- rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
+ rawargs <- withRaw (many (try (optional sp *> opt)) *>
+ option "" (try (optional sp *> dimenarg)) *>
+ many braced)
return $ '\\' : name' ++ snd rawargs
lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
@@ -528,6 +535,7 @@ inlineCommands = M.fromList $
mkImage options src)
, ("enquote", enquote)
, ("cite", citation "cite" AuthorInText False)
+ , ("Cite", citation "cite" AuthorInText False)
, ("citep", citation "citep" NormalCitation False)
, ("citep*", citation "citep*" NormalCitation False)
, ("citeal", citation "citeal" NormalCitation False)
@@ -597,8 +605,8 @@ mkImage options src = do
case takeExtension src of
"" -> do
defaultExt <- getOption readerDefaultImageExtension
- return $ imageWith (addExtension src defaultExt) "" attr alt
- _ -> return $ imageWith src "" attr alt
+ return $ imageWith attr (addExtension src defaultExt) "" alt
+ _ -> return $ imageWith attr src "" alt
inNote :: Inlines -> Inlines
inNote ils =
@@ -824,10 +832,10 @@ tok :: LP Inlines
tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
opt :: LP Inlines
-opt = bracketed inline <* optional sp
+opt = bracketed inline
skipopts :: LP ()
-skipopts = skipMany opt
+skipopts = skipMany (opt *> optional sp)
inlineText :: LP Inlines
inlineText = str <$> many1 inlineChar
@@ -893,7 +901,7 @@ verbatimEnv' = fmap snd <$>
string "\\begin"
name <- braced'
guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
- "minted", "alltt"]
+ "minted", "alltt", "comment"]
manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
blob' :: IncludeParser
@@ -1030,6 +1038,8 @@ environments = M.fromList
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
, ("tabular*", env "tabular" $ simpTable True)
@@ -1044,6 +1054,7 @@ environments = M.fromList
, ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
, ("verbatim", codeBlock <$> verbEnv "verbatim")
, ("Verbatim", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ebca7e83d..fd16a5f75 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,6 +39,7 @@ import Data.Ord ( comparing )
import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Yaml as Yaml
@@ -47,7 +48,7 @@ import qualified Data.HashMap.Strict as H
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Vector as V
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Text.Pandoc.Options
import Text.Pandoc.Shared
import Text.Pandoc.XML (fromEntities)
@@ -55,8 +56,6 @@ import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
-import Data.Monoid (mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>))
import Control.Monad
import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
@@ -64,6 +63,7 @@ import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
type MarkdownParser = Parser [Char] ParserState
@@ -328,23 +328,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
mmdTitleBlock :: MarkdownParser ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
- kvPairs <- many1 kvPair
+ firstPair <- kvPair False
+ restPairs <- many (kvPair True)
+ let kvPairs = firstPair : restPairs
blanklines
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: MarkdownParser (String, MetaValue)
-kvPair = try $ do
+kvPair :: Bool -> MarkdownParser (String, MetaValue)
+kvPair allowEmpty = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
- skipMany1 spaceNoNewline
- val <- manyTill anyChar
+ val <- trim <$> manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
- guard $ not . null . trim $ val
+ guard $ allowEmpty || not (null val)
let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
return (key',val')
- where
- spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r'))
parseMarkdown :: MarkdownParser Pandoc
parseMarkdown = do
@@ -506,9 +505,15 @@ block = do
header :: MarkdownParser (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
+atxChar :: MarkdownParser Char
+atxChar = do
+ exts <- getOption readerExtensions
+ return $ if Set.member Ext_literate_haskell exts
+ then '=' else '#'
+
atxHeader :: MarkdownParser (F Blocks)
atxHeader = try $ do
- level <- many1 (char '#') >>= return . length
+ level <- atxChar >>= many1 . char >>= return . length
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
skipSpaces
@@ -524,7 +529,7 @@ atxClosing :: MarkdownParser Attr
atxClosing = try $ do
attr' <- option nullAttr
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
- skipMany (char '#')
+ skipMany . char =<< atxChar
skipSpaces
attr <- option attr'
(guardEnabled Ext_header_attributes >> attributes)
@@ -636,7 +641,11 @@ keyValAttr = try $ do
val <- enclosed (char '"') (char '"') litChar
<|> enclosed (char '\'') (char '\'') litChar
<|> many (escapedChar' <|> noneOf " \t\n\r}")
- return $ \(id',cs,kvs) -> (id',cs,kvs ++ [(key,val)])
+ return $ \(id',cs,kvs) ->
+ case key of
+ "id" -> (val,cs,kvs)
+ "class" -> (id',cs ++ words val,kvs)
+ _ -> (id',cs,kvs ++ [(key,val)])
specialAttr :: MarkdownParser (Attr -> Attr)
specialAttr = do
@@ -1316,7 +1325,7 @@ removeOneLeadingSpace xs =
gridTableFooter :: MarkdownParser [Char]
gridTableFooter = blanklines
-pipeBreak :: MarkdownParser [Alignment]
+pipeBreak :: MarkdownParser ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1326,14 +1335,22 @@ pipeBreak = try $ do
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
- return (first:rest)
+ return $ unzip (first:rest)
pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
- (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak
- lines' <- sequence <$> many pipeTableRow
- let widths = replicate (length aligns) 0.0
- return $ (aligns, widths, heads, lines')
+ nonindentSpaces
+ lookAhead nonspaceChar
+ (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
+ (lines', rawRows) <- unzip <$> many (withRaw pipeTableRow)
+ let maxlength = maximum $ map length rawRows
+ numColumns <- getOption readerColumns
+ let widths = if maxlength > numColumns
+ then map (\len ->
+ fromIntegral (len + 1) / fromIntegral numColumns)
+ seplengths
+ else replicate (length aligns) 0.0
+ return $ (aligns, widths, heads, sequence lines')
sepPipe :: MarkdownParser ()
sepPipe = try $ do
@@ -1343,7 +1360,7 @@ sepPipe = try $ do
-- parse a row, also returning probable alignments for org-table cells
pipeTableRow :: MarkdownParser (F [Blocks])
pipeTableRow = do
- nonindentSpaces
+ skipMany spaceChar
openPipe <- (True <$ char '|') <|> return False
let cell = mconcat <$>
many (notFollowedBy (blankline <|> char '|') >> inline)
@@ -1362,19 +1379,20 @@ pipeTableRow = do
ils' | B.isNull ils' -> mempty
| otherwise -> B.plain $ ils') cells'
-pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
- many1 (char '-')
+ pipe <- many1 (char '-')
right <- optionMaybe (char ':')
skipMany spaceChar
+ let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
return $
- case (left,right) of
- (Nothing,Nothing) -> AlignDefault
- (Just _,Nothing) -> AlignLeft
- (Nothing,Just _) -> AlignRight
- (Just _,Just _) -> AlignCenter
+ ((case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter), len)
-- Succeed only if current line contains a pipe.
scanForPipe :: Parser [Char] st ()
@@ -1453,6 +1471,7 @@ inline = choice [ whitespace
, exampleRef
, smart
, return . B.singleton <$> charRef
+ , emoji
, symbol
, ltSign
] <?> "inline"
@@ -1652,7 +1671,7 @@ endline = try $ do
notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
- guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+ guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
@@ -1705,16 +1724,16 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-regLink :: (String -> String -> Attr -> Inlines -> Inlines)
+regLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> F Inlines -> MarkdownParser (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
attr <- option nullAttr $
guardEnabled Ext_common_link_attributes >> attributes
- return $ constructor src tit attr <$> lab
+ return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: (String -> String -> Attr -> Inlines -> Inlines)
+referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> (F Inlines, String) -> MarkdownParser (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
@@ -1743,10 +1762,10 @@ referenceLink constructor (lab, raw) = do
then do
headerKeys <- asksF stateHeaderKeys
case M.lookup key headerKeys of
- Just ((src, tit), _) -> constructor src tit nullAttr <$> lab
+ Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
- Just ((src,tit), attr) -> constructor src tit attr <$> lab
+ Just ((src,tit), attr) -> constructor attr src tit <$> lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
@@ -1780,9 +1799,9 @@ image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor src = case takeExtension src of
- "" -> B.imageWith (addExtension src defaultExt)
- _ -> B.imageWith src
+ let constructor attr' src = case takeExtension src of
+ "" -> B.imageWith attr' (addExtension src defaultExt)
+ _ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: MarkdownParser (F Inlines)
@@ -1886,6 +1905,21 @@ rawHtmlInline = do
else not . isTextTag
return $ return $ B.rawInline "html" result
+-- Emoji
+
+emojiChars :: [Char]
+emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
+
+emoji :: MarkdownParser (F Inlines)
+emoji = try $ do
+ guardEnabled Ext_emoji
+ char ':'
+ emojikey <- many1 (oneOf emojiChars)
+ char ':'
+ case M.lookup emojikey emojis of
+ Just s -> return (return (B.str s))
+ Nothing -> mzero
+
-- Citations
cite :: MarkdownParser (F Inlines)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 6f7da2586..24b3f5c7e 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -38,15 +38,14 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Options
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing hiding ( nested )
import Text.Pandoc.Walk ( walk )
import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
-import Data.Monoid (mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
import Data.List (intersperse, intercalate, isPrefixOf )
import Text.HTML.TagSoup
@@ -252,8 +251,8 @@ parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
char '='
- char '"'
- v <- many1Till (satisfy (/='\n')) (char '"')
+ v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
+ <|> many1 nonspaceChar
return (k,v)
tableStart :: MWParser ()
@@ -588,7 +587,7 @@ image = try $ do
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.imageWith fname ("fig:" ++ stringify caption) attr caption
+ return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
imageOption :: MWParser String
imageOption = try $ char '|' *> opt
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 94ea9e3a2..4ec164e19 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -34,7 +34,6 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.Error
-import Control.Applicative
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 19ddba36b..b2e5f2e67 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -9,9 +9,7 @@ import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
import Data.Generics
-import Data.Monoid
import Control.Monad.State
-import Control.Applicative ((<$>), (<$))
import Data.Default
import Text.Pandoc.Compat.Except
import Text.Pandoc.Error
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 1c8ec51bc..a925c1d84 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -36,7 +36,6 @@ import Codec.Archive.Zip
import qualified Text.XML.Light as XML
import qualified Data.ByteString.Lazy as B
-import Data.Monoid ( mempty )
import Text.Pandoc.Definition
import Text.Pandoc.Error
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 310ca028e..30f96c557 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -44,9 +44,9 @@ import qualified Control.Category as Cat
import Control.Arrow
import Control.Monad
-import Data.Monoid
import Data.Foldable
+import Text.Pandoc.Compat.Monoid
import Text.Pandoc.Readers.Odt.Arrows.Utils
import Text.Pandoc.Readers.Odt.Generic.Fallible
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
index 9710973b3..8c9ee0539 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -42,12 +42,11 @@ module Text.Pandoc.Readers.Odt.Arrows.Utils where
import Control.Arrow
import Control.Monad ( join, MonadPlus(..) )
-import Data.Monoid
import qualified Data.Foldable as F
import Text.Pandoc.Readers.Odt.Generic.Fallible
import Text.Pandoc.Readers.Odt.Generic.Utils
-
+import Text.Pandoc.Compat.Monoid
and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c')
and2 = (&&&)
@@ -130,24 +129,23 @@ joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z
joinOn = arr.uncurry
-- | Applies a function to the uncurried result-pair of an arrow-application.
--- (The §-symbol was chosen to evoke an association with pairs through the
--- shared first character)
-(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
-a >>§ f = a >>^ uncurry f
+-- (The %-symbol was chosen to evoke an association with pairs.)
+(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
+a >>% f = a >>^ uncurry f
--- | '(>>§)' with its arguments flipped
-(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
-(§<<) = flip (>>§)
+-- | '(>>%)' with its arguments flipped
+(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
+(%<<) = flip (>>%)
-- | Precomposition with an uncurried function
-(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
-f §>> a = uncurry f ^>> a
+(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
+f %>> a = uncurry f ^>> a
-- | Precomposition with an uncurried function (right to left variant)
-(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
-(<<§) = flip (§>>)
+(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
+(<<%) = flip (%>>)
-infixr 2 >>§, §<<, §>>, <<§
+infixr 2 >>%, %<<, %>>, <<%
-- | Duplicate a value and apply an arrow to the second instance.
@@ -272,7 +270,7 @@ newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c }
instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where
mempty = CoEval $ returnV mempty
- (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend
+ (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend
-- | Evaluates a collection of arrows in a parallel fashion.
--
@@ -434,29 +432,29 @@ a ^>>?^? f = a ^>> Left ^|||^ f
a >>?! f = a >>> right f
---
-(>>?§) :: (ArrowChoice a, Monoid f)
+(>>?%) :: (ArrowChoice a, Monoid f)
=> FallibleArrow a x f (b,b')
-> (b -> b' -> c)
-> FallibleArrow a x f c
-a >>?§ f = a >>?^ (uncurry f)
+a >>?% f = a >>?^ (uncurry f)
---
-(^>>?§) :: (ArrowChoice a, Monoid f)
+(^>>?%) :: (ArrowChoice a, Monoid f)
=> (x -> Either f (b,b'))
-> (b -> b' -> c)
-> FallibleArrow a x f c
-a ^>>?§ f = arr a >>?^ (uncurry f)
+a ^>>?% f = arr a >>?^ (uncurry f)
---
-(>>?§?) :: (ArrowChoice a, Monoid f)
+(>>?%?) :: (ArrowChoice a, Monoid f)
=> FallibleArrow a x f (b,b')
-> (b -> b' -> (Either f c))
-> FallibleArrow a x f c
-a >>?§? f = a >>?^? (uncurry f)
+a >>?%? f = a >>?^? (uncurry f)
infixr 1 >>?, >>?^, >>?^?
infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?!
-infixr 1 >>?§, ^>>?§, >>?§?
+infixr 1 >>?%, ^>>?%, >>?%?
-- | Keep values that are Right, replace Left values by a constant.
ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 9bb585b8e..1f1c57646 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -44,7 +44,6 @@ import Control.Applicative hiding ( liftA, liftA2, liftA3 )
import qualified Data.Map as M
import Data.List ( find )
-import Data.Monoid
import Data.Maybe
import qualified Text.XML.Light as XML
@@ -146,7 +145,7 @@ type OdtReaderSafe a b = XMLReaderSafe ReaderState a b
fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b
fromStyles f = keepingTheValue
(getExtraState >>^ styleSet)
- >>§ f
+ >>% f
--
getStyleByName :: OdtReader StyleName Style
@@ -163,7 +162,7 @@ lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice
--
switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle)
switchCurrentListStyle = keepingTheValue getExtraState
- >>§ swapCurrentListStyle
+ >>% swapCurrentListStyle
>>> first setExtraState
>>^ snd
@@ -171,7 +170,7 @@ switchCurrentListStyle = keepingTheValue getExtraState
pushStyle :: OdtReaderSafe Style Style
pushStyle = keepingTheValue (
( keepingTheValue getExtraState
- >>§ pushStyle'
+ >>% pushStyle'
)
>>> setExtraState
)
@@ -471,7 +470,7 @@ matchingElement :: (Monoid e)
matchingElement ns name reader = (ns, name, asResultAccumulator reader)
where
asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m)
- asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>)
+ asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>)
--
matchChildContent' :: (Monoid result)
@@ -498,14 +497,14 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
--
-- | Open Document allows several consecutive spaces if they are marked up
read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines
-read_plain_text = fst ^&&& read_plain_text' >>§ recover
+read_plain_text = fst ^&&& read_plain_text' >>% recover
where
-- fallible version
read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines
read_plain_text' = ( second ( arr extractText )
>>^ spreadChoice >>?! second text
)
- >>?§ (<>)
+ >>?% (<>)
--
extractText :: XML.Content -> Fallible String
extractText (XML.Text cData) = succeedWith (XML.cdData cData)
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
index 5922164c9..d0fdc228f 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -41,9 +41,8 @@ module Text.Pandoc.Readers.Odt.Generic.Fallible where
import Control.Applicative
import Control.Monad
-
+import Text.Pandoc.Compat.Monoid ((<>))
import qualified Data.Foldable as F
-import Data.Monoid
-- | Default for now. Will probably become a class at some point.
type Failure = ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index ec7e0ea5e..8c03d1a09 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -123,7 +123,6 @@ import Control.Arrow
import qualified Data.Map as M
import qualified Data.Foldable as F
import Data.Default
-import Data.Monoid ( Monoid )
import Data.Maybe
import qualified Text.XML.Light as XML
@@ -332,7 +331,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
where
setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v
modifyWithA = keepingTheValue (moreState ^>> a)
- >>^ spreadChoice >>?§ flip replaceExtraState
+ >>^ spreadChoice >>?% flip replaceExtraState
-- | First sets the extra state to the new value. Then produces a new
-- extra state with a converter that uses the new state. Finally, the
@@ -414,14 +413,14 @@ elemName :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x XML.QName
elemName nsID name = lookupNSiri nsID
&&& lookupNSprefix nsID
- >>§ XML.QName name
+ >>% XML.QName name
-- | Checks if a given element matches both a specified namespace id
-- and a specified element name
elemNameIs :: (NameSpaceID nsID)
=> nsID -> ElementName
-> XMLConverter nsID extraState XML.Element Bool
-elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName
+elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName
where hasThatName e iri = let elName = XML.elName e
in XML.qName elName == name
&& XML.qURI elName == iri
@@ -462,8 +461,8 @@ currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>>
(XML.qName >>^ (&&).(== name) )
^&&&^
(XML.qIRI >>^ (==) )
- ) >>§ (.)
- ) &&& lookupNSiri nsID >>§ ($)
+ ) >>% (.)
+ ) &&& lookupNSiri nsID >>% ($)
-}
--
@@ -488,7 +487,7 @@ findChildren :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x [XML.Element]
findChildren nsID name = elemName nsID name
&&& getCurrentElement
- >>§ XML.findChildren
+ >>% XML.findChildren
--
filterChildren :: (XML.Element -> Bool)
@@ -509,7 +508,7 @@ findChild' :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x (Maybe XML.Element)
findChild' nsID name = elemName nsID name
&&& getCurrentElement
- >>§ XML.findChild
+ >>% XML.findChild
--
findChild :: (NameSpaceID nsID)
@@ -597,7 +596,7 @@ isThatTheAttrValue :: (NameSpaceID nsID)
isThatTheAttrValue nsID attrName
= keepingTheValue
(findAttr nsID attrName)
- >>§ right.(==)
+ >>% right.(==)
-- | Lookup value in a dictionary, fail if no attribute found or value
-- not in dictionary
@@ -670,7 +669,7 @@ findAttr' :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x (Maybe AttributeValue)
findAttr' nsID attrName = elemName nsID attrName
&&& getCurrentElement
- >>§ XML.findAttr
+ >>% XML.findAttr
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
@@ -788,7 +787,7 @@ prepareIteration :: (NameSpaceID nsID)
-> XMLConverter nsID extraState b [(b, XML.Element)]
prepareIteration nsID name = keepingTheValue
(findChildren nsID name)
- >>§ distributeValue
+ >>% distributeValue
-- | Applies a converter to every child element of a specific type.
-- Collects results in a 'Monoid'.
@@ -878,9 +877,9 @@ makeMatcherE nsID name c = ( second (
elemNameIs nsID name
>>^ bool Nothing (Just tryC)
)
- >>§ (<|>)
+ >>% (<|>)
) &&&^ snd
- where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd
+ where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd
-- Helper function: The @c@ is actually a converter that is to be selected by
-- matching XML content to the first two parameters.
@@ -900,14 +899,14 @@ makeMatcherC nsID name c = ( second ( contentToElem
>>^ bool Nothing (Just cWithJump)
)
)
- >>§ (<|>)
+ >>% (<|>)
) &&&^ snd
where cWithJump = ( fst
^&&& ( second contentToElem
>>> spreadChoice
^>>? executeThere c
)
- >>§ recover)
+ >>% recover)
&&&^ snd
contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element
contentToElem = arr $ \e -> case e of
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index e28056814..deb009998 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -107,4 +107,4 @@ nsIDs = [
("http://www.w3.org/1999/xhtml" , NsXHtml ),
("http://www.w3.org/2002/xforms" , NsXForms ),
("http://www.w3.org/1999/xlink" , NsXLink )
- ] \ No newline at end of file
+ ]
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 1cf87cc59..96cfed0b3 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -78,7 +78,6 @@ import qualified Data.Map as M
import qualified Data.Set as S
import Data.List ( unfoldr )
import Data.Default
-import Data.Monoid
import Data.Maybe
import qualified Text.XML.Light as XML
@@ -175,7 +174,7 @@ findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch)
findPitch = ( lookupAttr NsStyle "font-pitch"
`ifFailedDo` findAttr NsStyle "font-name"
>>? ( keepingTheValue getExtraState
- >>§ M.lookup
+ >>% M.lookup
>>^ maybeToChoice
)
)
@@ -362,11 +361,11 @@ instance Read XslUnit where
estimateInMillimeter :: Int -> XslUnit -> Int
estimateInMillimeter n XslUnitMM = n
estimateInMillimeter n XslUnitCM = n * 10
-estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4
-estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4
-estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4
-estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4
-estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4
+estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4
+estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4
+estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4
+estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4
+estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4
----
@@ -385,7 +384,7 @@ getListLevelStyle level ListStyle{..} =
let (lower , exactHit , _) = M.splitLookup level levelStyles
in exactHit <|> fmap fst (M.maxView lower)
-- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1]
- -- ^ simpler, but in general less efficient
+ -- \^ simpler, but in general less efficient
data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
, listItemPrefix :: Maybe String
@@ -448,7 +447,7 @@ readAllStyles :: StyleReader _x Styles
readAllStyles = ( readFontPitches
>>?! ( readAutomaticStyles
&&& readStyles ))
- >>?§? chooseMax
+ >>?%? chooseMax
-- all top elements are always on the same hierarchy level
--
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..3be47cfd4 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
-Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de>
+Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,19 +21,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Org
- Copyright : Copyright (C) 2014 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2015 Albert Krewinkel
License : GNU GPL, version 2 or above
- Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
- , trimInlines )
+import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
+ trimInlines )
import Text.Pandoc.Definition
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -45,8 +46,6 @@ import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Applicative ( Applicative, pure
- , (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
@@ -55,7 +54,6 @@ import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust)
-import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
import Text.Pandoc.Error
@@ -70,6 +68,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
@@ -135,6 +141,9 @@ data OrgParserState = OrgParserState
, orgStateMeta :: Meta
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
+ , orgStateParserContext :: ParserContext
+ , orgStateIdentifiers :: [String]
+ , orgStateHeaderMap :: M.Map Inlines String
}
instance Default OrgParserLocal where
@@ -174,6 +183,9 @@ defaultOrgParserState = OrgParserState
, orgStateMeta = nullMeta
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
+ , orgStateParserContext = NullState
+ , orgStateIdentifiers = []
+ , orgStateHeaderMap = M.empty
}
recordAnchorId :: String -> OrgParser ()
@@ -282,6 +294,23 @@ blanklines =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+ ctx <- orgStateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+ -> OrgParser a -- ^ Parser to run in that context
+ -> OrgParser a
+withContext context parser = do
+ oldContext <- orgStateParserContext <$> getState
+ updateState $ \s -> s{ orgStateParserContext = context }
+ result <- parser
+ updateState $ \s -> s{ orgStateParserContext = oldContext }
+ return result
+
--
-- parsing blocks
--
@@ -397,7 +426,7 @@ verseBlock blkProp = try $ do
ignHeaders
content <- rawBlockContent blkProp
fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString parseInlines) (lines content)
+ <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
exportsCode :: [(String, String)] -> Bool
exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
@@ -504,10 +533,16 @@ rundocBlockClass :: String
rundocBlockClass = rundocPrefix ++ "block"
blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
+blockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgParamValue
+ return (argKey, paramValue)
inlineBlockOption :: OrgParser (String, String)
-inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
+inlineBlockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgInlineParamValue
+ return (argKey, paramValue)
orgArgKey :: OrgParser String
orgArgKey = try $
@@ -516,11 +551,17 @@ orgArgKey = try $
orgParamValue :: OrgParser String
orgParamValue = try $
- skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
+ skipSpaces
+ *> notFollowedBy (char ':' )
+ *> many1 (noneOf "\t\n\r ")
+ <* skipSpaces
orgInlineParamValue :: OrgParser String
orgInlineParamValue = try $
- skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
+ skipSpaces
+ *> notFollowedBy (char ':')
+ *> many1 (noneOf "\t\n\r ]")
+ <* skipSpaces
orgArgWordChar :: OrgParser Char
orgArgWordChar = alphaNum <|> oneOf "-_"
@@ -668,7 +709,10 @@ header = try $ do
title <- manyTill inline (lookAhead headerEnd)
tags <- headerEnd
let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
- return $ B.header level <$> inlns
+ st <- getState
+ let inlines = runF inlns st
+ attr <- registerHeader nullAttr inlines
+ return $ pure (B.headerWith attr level inlines)
where
tagToInlineF :: String -> F Inlines
tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
@@ -687,7 +731,7 @@ headerTags = try $
headerStart :: OrgParser Int
headerStart = try $
- (length <$> many1 (char '*')) <* many1 (char ' ')
+ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-- Don't use (or need) the reader wrapper here, we want hline to be
@@ -879,9 +923,13 @@ noteBlock = try $ do
paraOrPlain :: OrgParser (F Blocks)
paraOrPlain = try $ do
ils <- parseInlines
- nl <- option False (newline >> return True)
- try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
- return (B.para <$> ils))
+ nl <- option False (newline *> return True)
+ -- Read block as paragraph, except if we are in a list context and the block
+ -- is directly followed by a list item, in which case the block is read as
+ -- plain text.
+ try (guard nl
+ *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
+ *> return (B.para <$> ils))
<|> (return (B.plain <$> ils))
inlinesTillNewline :: OrgParser (F Inlines)
@@ -946,19 +994,22 @@ definitionListItem :: OrgParser Int
-> OrgParser (F (Inlines, [Blocks]))
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
- term <- manyTill (noneOf "\n\r") (try $ string "::")
+ term <- manyTill (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
cont <- concat <$> many (listContinuation markerLength)
term' <- parseFromString parseInlines term
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+ definitionMarker =
+ spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
-> OrgParser (F Blocks)
-listItem start = try $ do
+listItem start = try . withContext ListItemState $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
@@ -1537,8 +1588,11 @@ smart :: OrgParser (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, dash, ellipses])
- where orgApostrophe =
+ choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+ where
+ orgDash = dash <* updatePositions '-'
+ orgEllipses = ellipses <* updatePositions '.'
+ orgApostrophe =
(char '\'' <|> char '\8217') <* updateLastPreCharPos
<* updateLastForbiddenCharPos
*> return (B.str "\x2019")
@@ -1546,9 +1600,10 @@ smart = do
singleQuoted :: OrgParser (F Inlines)
singleQuoted = try $ do
singleQuoteStart
+ updatePositions '\''
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline singleQuoteEnd
+ many1Till inline (singleQuoteEnd <* updatePositions '\'')
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
@@ -1556,6 +1611,7 @@ singleQuoted = try $ do
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
+ updatePositions '"'
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 4138d65ea..0e5bb2a87 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -44,13 +44,11 @@ import Data.List ( findIndex, intersperse, intercalate,
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure)
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
import Data.Char (toLower, isHexDigit, isSpace)
-
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
-- | Parse reStructuredText string and return Pandoc document.
@@ -614,20 +612,22 @@ directive' = do
return mempty
-- TODO:
--- - Silently ignores illegal fields
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- let (baseRole, baseFmt, baseAttr) =
- maybe (parentRole, Nothing, nullAttr) id $
- M.lookup parentRole customRoles
+ let getBaseRole (r, f, a) roles =
+ case M.lookup r roles of
+ Just (r', f', a') -> getBaseRole (r', f', a') roles
+ Nothing -> (r, f, a)
+ (baseRole, baseFmt, baseAttr) =
+ getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
annotate :: [String] -> [String]
annotate = maybe id (:) $
- if parentRole == "code"
+ if baseRole == "code"
then lookup "language" fields
else Nothing
attr = let (ident, classes, keyValues) = baseAttr
@@ -636,12 +636,12 @@ addNewRole roleString fields = do
-- warn about syntax we ignore
flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "language" -> when (baseRole /= "code") $ addWarning Nothing $
"ignoring :language: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :code:"
- "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ role ++ ": is :" ++ baseRole ++ ": not :code:"
+ "format" -> when (baseRole /= "raw") $ addWarning Nothing $
"ignoring :format: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ role ++ ": is :" ++ baseRole ++ ": not :raw:"
_ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
": in definition of role :" ++ role ++ ": in"
when (parentRole == "raw" && countKeys "format" > 1) $
@@ -1138,7 +1138,7 @@ referenceLink = try $ do
Just val -> return val
-- if anonymous link, remove key so it won't be used again
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
- return $ B.linkWith src tit attr label'
+ return $ B.linkWith attr src tit label'
autoURI :: RSTParser Inlines
autoURI = do
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 07b414431..fc2bdc069 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -38,8 +38,6 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Data.Monoid (Monoid, mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
import Text.Printf (printf)
import Debug.Trace (trace)
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ec1da896d..3db01faf4 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -64,9 +64,8 @@ import Text.HTML.TagSoup.Match
import Data.List ( intercalate )
import Data.Char ( digitToInt, isUpper)
import Control.Monad ( guard, liftM, when )
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Printf
-import Control.Applicative ((<$>), (*>), (<*), (<$))
-import Data.Monoid
import Debug.Trace (trace)
import Text.Pandoc.Error
@@ -81,11 +80,12 @@ readTextile opts s =
-- | Generate a Pandoc ADT from a textile document
parseTextile :: Parser [Char] ParserState Pandoc
parseTextile = do
- -- textile allows raw HTML and does smart punctuation by default
+ -- textile allows raw HTML and does smart punctuation by default,
+ -- but we do not enable smart punctuation unless it is explicitly
+ -- asked for, for better conversion to other light markup formats
oldOpts <- stateOptions `fmap` getState
updateState $ \state -> state{ stateOptions =
- oldOpts{ readerSmart = True
- , readerParseRaw = True
+ oldOpts{ readerParseRaw = True
, readerOldDashes = True
} }
many blankline
@@ -535,6 +535,7 @@ link = try $ do
image :: Parser [Char] ParserState Inlines
image = try $ do
char '!' >> notFollowedBy space
+ _ <- attributes -- ignore for now, until we have image attributes
src <- manyTill anyChar' (lookAhead $ oneOf "!(")
alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')')))
char '!'
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 304d6d4c5..58841f2ce 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,17 +33,15 @@ module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
where
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, (<>)
- , trimInlines )
+import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL)
import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
-import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>))
import Data.Char (toLower)
import Data.List (transpose, intersperse, intercalate)
import Data.Maybe (fromMaybe)
-import Data.Monoid (Monoid, mconcat, mempty, mappend)
--import Network.URI (isURI) -- Not sure whether to use this function
import Control.Monad (void, guard, when)
import Data.Default
@@ -53,7 +51,7 @@ import Text.Pandoc.Error
import Data.Time.LocalTime (getZonedTime)
import Text.Pandoc.Compat.Directory(getModificationTime)
import Data.Time.Format (formatTime)
-import Text.Pandoc.Compat.Locale (defaultTimeLocale)
+import Text.Pandoc.Compat.Time (defaultTimeLocale)
import System.IO.Error (catchIOError)
type T2T = ParserT String ParserState (Reader T2TMeta)