aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs119
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs116
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs64
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs256
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs106
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs37
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs124
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs12
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs335
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs219
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs24
-rw-r--r--src/Text/Pandoc/Readers/Native.hs44
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs48
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs86
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs253
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs497
-rw-r--r--src/Text/Pandoc/Readers/Odt/Base.hs43
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs790
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs260
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs62
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs48
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs171
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs1064
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs110
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs737
-rw-r--r--src/Text/Pandoc/Readers/Org.hs257
-rw-r--r--src/Text/Pandoc/Readers/RST.hs150
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs527
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs4
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs41
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
32 files changed, 6020 insertions, 618 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
new file mode 100644
index 000000000..51a35c8ad
--- /dev/null
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -0,0 +1,119 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of CommonMark-formatted plain text to 'Pandoc' document.
+
+CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
+-}
+module Text.Pandoc.Readers.CommonMark (readCommonMark)
+where
+
+import CMark
+import Data.Text (unpack, pack)
+import Data.List (groupBy)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Error
+
+-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
+readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
+readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
+ where opts' = if readerSmart opts
+ then [optNormalize, optSmart]
+ else [optNormalize]
+
+nodeToPandoc :: Node -> Pandoc
+nodeToPandoc (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr addBlock [] nodes
+nodeToPandoc n = -- shouldn't happen
+ Pandoc nullMeta $ foldr addBlock [] [n]
+
+addBlocks :: [Node] -> [Block]
+addBlocks = foldr addBlock []
+
+addBlock :: Node -> [Block] -> [Block]
+addBlock (Node _ PARAGRAPH nodes) =
+ (Para (addInlines nodes) :)
+addBlock (Node _ HRULE _) =
+ (HorizontalRule :)
+addBlock (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks nodes) :)
+addBlock (Node _ (HTML t) _) =
+ (RawBlock (Format "html") (unpack t) :)
+addBlock (Node _ (CODE_BLOCK info t) _) =
+ (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+addBlock (Node _ (HEADER lev) nodes) =
+ (Header lev ("",[],[]) (addInlines nodes) :)
+addBlock (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks . children) nodes) :)
+ where constructor = case listType listAttrs of
+ BULLET_LIST -> BulletList
+ ORDERED_LIST -> OrderedList
+ (start, DefaultStyle, delim)
+ start = listStart listAttrs
+ setTightness = if listTight listAttrs
+ then map paraToPlain
+ else id
+ paraToPlain (Para xs) = Plain (xs)
+ paraToPlain x = x
+ delim = case listDelim listAttrs of
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock (Node _ ITEM _) = id -- handled in LIST
+addBlock _ = id
+
+children :: Node -> [Node]
+children (Node _ _ ns) = ns
+
+addInlines :: [Node] -> [Inline]
+addInlines = foldr addInline []
+
+addInline :: Node -> [Inline] -> [Inline]
+addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+ where raw = unpack t
+ clumps = groupBy samekind raw
+ samekind ' ' ' ' = True
+ samekind ' ' _ = False
+ samekind _ ' ' = False
+ samekind _ _ = True
+ toinl (' ':_) = Space
+ toinl xs = Str xs
+addInline (Node _ LINEBREAK _) = (LineBreak :)
+addInline (Node _ SOFTBREAK _) = (Space :)
+addInline (Node _ (INLINE_HTML t) _) =
+ (RawInline (Format "html") (unpack t) :)
+addInline (Node _ (CODE t) _) =
+ (Code ("",[],[]) (unpack t) :)
+addInline (Node _ EMPH nodes) =
+ (Emph (addInlines nodes) :)
+addInline (Node _ STRONG nodes) =
+ (Strong (addInlines nodes) :)
+addInline (Node _ (LINK url title) nodes) =
+ (Link (addInlines nodes) (unpack url, unpack title) :)
+addInline (Node _ (IMAGE url title) nodes) =
+ (Image (addInlines nodes) (unpack url, unpack title) :)
+addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 59ff3e717..3cc2a4479 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -15,6 +15,9 @@ import Control.Applicative ((<$>))
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Compat.Except
+import Data.Default
{-
@@ -70,8 +73,8 @@ List of all DocBook tags, with [x] indicating implemented,
[x] book - A book
[x] bookinfo - Meta-information for a Book
[x] bridgehead - A free-floating heading
-[ ] callout - A “called out” description of a marked Area
-[ ] calloutlist - A list of Callouts
+[x] callout - A “called out” description of a marked Area
+[x] calloutlist - A list of Callouts
[x] caption - A caption
[x] caution - A note of caution
[x] chapter - A chapter, as of a book
@@ -81,7 +84,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] citerefentry - A citation to a reference page
[ ] citetitle - The title of a cited work
[ ] city - The name of a city in an address
-[ ] classname - The name of a class, in the object-oriented programming sense
+[x] classname - The name of a class, in the object-oriented programming sense
[ ] classsynopsis - The syntax summary for a class definition
[ ] classsynopsisinfo - Information supplementing the contents of
a ClassSynopsis
@@ -164,21 +167,24 @@ List of all DocBook tags, with [x] indicating implemented,
[x] glossseealso - A cross-reference from one GlossEntry to another
[x] glossterm - A glossary term
[ ] graphic - A displayed graphical object (not an inline)
+ Note: in DocBook v5 `graphic` is discarded
[ ] graphicco - A graphic that contains callout areas
+ Note: in DocBook v5 `graphicco` is discarded
[ ] group - A group of elements in a CmdSynopsis
[ ] guibutton - The text on a button in a GUI
[ ] guiicon - Graphic and/or text appearing as a icon in a GUI
[ ] guilabel - The text of a label in a GUI
-[ ] guimenu - The name of a menu in a GUI
-[ ] guimenuitem - The name of a terminal menu item in a GUI
-[ ] guisubmenu - The name of a submenu in a GUI
+[x] guimenu - The name of a menu in a GUI
+[x] guimenuitem - The name of a terminal menu item in a GUI
+[x] guisubmenu - The name of a submenu in a GUI
[ ] hardware - A physical part of a computer system
[ ] highlights - A summary of the main points of the discussed component
[ ] holder - The name of the individual or organization that holds a copyright
[o] honorific - The title of a person
[ ] html:form - An HTML form
-[ ] imagedata - Pointer to external image data
-[ ] imageobject - A wrapper for image data and its associated meta-information
+[x] imagedata - Pointer to external image data (only `fileref` attribute
+ implemented but not `entityref` which would require parsing of the DTD)
+[x] imageobject - A wrapper for image data and its associated meta-information
[ ] imageobjectco - A wrapper for an image object with callouts
[x] important - An admonition set off from the text
[x] index - An index
@@ -206,10 +212,10 @@ List of all DocBook tags, with [x] indicating implemented,
other dingbat
[ ] itermset - A set of index terms in the meta-information of a document
[ ] jobtitle - The title of an individual in an organization
-[ ] keycap - The text printed on a key on a keyboard
+[x] keycap - The text printed on a key on a keyboard
[ ] keycode - The internal, frequently numeric, identifier for a key
on a keyboard
-[ ] keycombo - A combination of input actions
+[x] keycombo - A combination of input actions
[ ] keysym - The symbolic name of a key on a keyboard
[ ] keyword - One of a set of keywords describing the content of a document
[ ] keywordset - A set of keywords describing the content of a document
@@ -237,7 +243,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] mediaobject - A displayed media object (video, audio, image, etc.)
[ ] mediaobjectco - A media object that contains callouts
[x] member - An element of a simple list
-[ ] menuchoice - A selection or series of selections from a menu
+[x] menuchoice - A selection or series of selections from a menu
[ ] methodname - The name of a method
[ ] methodparam - Parameters to a method
[ ] methodsynopsis - A syntax summary for a method
@@ -471,7 +477,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] token - A unit of information
[x] tr - A row in an HTML table
[ ] trademark - A trademark
-[ ] type - The classification of a value
+[x] type - The classification of a value
[x] ulink - A link that addresses its target by means of a URL
(Uniform Resource Locator)
[x] uri - A Uniform Resource Identifier
@@ -497,7 +503,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
-type DB = State DBState
+type DB = ExceptT PandocError (State DBState)
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@@ -507,16 +513,18 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbFigureTitle :: Inlines
} deriving Show
-readDocBook :: ReaderOptions -> String -> Pandoc
-readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs)
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp')
- DBState{ dbSectionLevel = 0
- , dbQuoteType = DoubleQuote
- , dbMeta = mempty
- , dbAcceptsMeta = False
- , dbBook = False
- , dbFigureTitle = mempty
- }
+instance Default DBState where
+ def = DBState{ dbSectionLevel = 0
+ , dbQuoteType = DoubleQuote
+ , dbMeta = mempty
+ , dbAcceptsMeta = False
+ , dbBook = False
+ , dbFigureTitle = mempty }
+
+
+readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
+readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
+ where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp'
inp' = handleInstructions inp
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
@@ -603,7 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
"important","caution","note","tip","warning","qandadiv",
"question","answer","abstract","itemizedlist","orderedlist",
"variablelist","article","book","table","informaltable",
- "screen","programlisting","example"]
+ "screen","programlisting","example","calloutlist"]
isBlockElement _ = False
-- Trim leading and trailing newline characters
@@ -622,18 +630,24 @@ addToStart toadd bs =
-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
-getImage :: Element -> DB Inlines
-getImage e = do
+-- A DocBook mediaobject is a wrapper around a set of alternative presentations
+getMediaobject :: Element -> DB Inlines
+getMediaobject e = do
imageUrl <- case filterChild (named "imageobject") e of
Nothing -> return mempty
Just z -> case filterChild (named "imagedata") z of
Nothing -> return mempty
Just i -> return $ attrValue "fileref" i
- caption <- case filterChild
- (\x -> named "caption" x || named "textobject" x) e of
- Nothing -> gets dbFigureTitle
- Just z -> mconcat <$> (mapM parseInline $ elContent z)
- return $ image imageUrl "" caption
+ let getCaption el = case filterChild (\x -> named "caption" x
+ || named "textobject" x
+ || named "alt" x) el of
+ Nothing -> return mempty
+ Just z -> mconcat <$> (mapM parseInline $ elContent z)
+ figTitle <- gets dbFigureTitle
+ let (caption, title) = if isNull figTitle
+ then (getCaption e, "")
+ else (return figTitle, "fig:")
+ liftM (image imageUrl title) caption
getBlocks :: Element -> DB Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -712,6 +726,7 @@ parseBlock (Elem e) =
"question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e
"answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e
"abstract" -> blockQuote <$> getBlocks e
+ "calloutlist" -> bulletList <$> callouts
"itemizedlist" -> bulletList <$> listitems
"orderedlist" -> do
let listStyle = case attrValue "numeration" e of
@@ -728,7 +743,7 @@ parseBlock (Elem e) =
<$> listitems
"variablelist" -> definitionList <$> deflistitems
"figure" -> getFigure e
- "mediaobject" -> para <$> getImage e
+ "mediaobject" -> para <$> getMediaobject e
"caption" -> return mempty
"info" -> metaBlock
"articleinfo" -> metaBlock
@@ -772,11 +787,6 @@ parseBlock (Elem e) =
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
- strContentRecursive = strContent . (\e' -> e'{ elContent =
- map elementToStr $ elContent e' })
- elementToStr :: Content -> Content
- elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
- elementToStr x = x
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -785,6 +795,7 @@ parseBlock (Elem e) =
contents <- getBlocks e
return $ blockQuote (contents <> attrib)
listitems = mapM getBlocks $ filterChildren (named "listitem") e
+ callouts = mapM getBlocks $ filterChildren (named "callout") e
deflistitems = mapM parseVarListEntry $ filterChildren
(named "varlistentry") e
parseVarListEntry e' = do
@@ -866,18 +877,29 @@ parseBlock (Elem e) =
parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
sect n = do isbook <- gets dbBook
let n' = if isbook || n == 0 then n + 1 else n
- headerText <- case filterChild (named "title") e of
+ headerText <- case filterChild (named "title") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "title")) of
Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
+ let ident = attrValue "id" e
modify $ \st -> st{ dbSectionLevel = n - 1 }
- return $ header n' headerText <> b
+ return $ headerWith (ident,[],[]) n' headerText <> b
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: Element -> DB Inlines
getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
+strContentRecursive :: Element -> String
+strContentRecursive = strContent .
+ (\e' -> e'{ elContent = map elementToStr $ elContent e' })
+
+elementToStr :: Content -> Content
+elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+elementToStr x = x
+
parseInline :: Content -> DB Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
@@ -889,7 +911,7 @@ parseInline (Elem e) =
"inlineequation" -> equation math
"subscript" -> subscript <$> innerInlines
"superscript" -> superscript <$> innerInlines
- "inlinemediaobject" -> getImage e
+ "inlinemediaobject" -> getMediaobject e
"quote" -> do
qt <- gets dbQuoteType
let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
@@ -901,6 +923,7 @@ parseInline (Elem e) =
else doubleQuoted contents
"simplelist" -> simpleList
"segmentedlist" -> segmentedList
+ "classname" -> codeWithLang
"code" -> codeWithLang
"filename" -> codeWithLang
"literal" -> codeWithLang
@@ -920,6 +943,10 @@ parseInline (Elem e) =
"constant" -> codeWithLang
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
+ "keycap" -> return (str $ strContent e)
+ "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
+ "menuchoice" -> menuchoice <$> (mapM parseInline $
+ filter isGuiMenu $ elContent e)
"xref" -> return $ str "?" -- so at least you know something is there
"email" -> return $ link ("mailto:" ++ strContent e) ""
$ str $ strContent e
@@ -959,7 +986,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContent e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -974,3 +1001,10 @@ parseInline (Elem e) =
then mempty
else strong tit <> linebreak
return $ linebreak <> tit' <> segs
+ keycombo = spanWith ("",["keycombo"],[]) .
+ mconcat . intersperse (str "+")
+ menuchoice = spanWith ("",["menuchoice"],[]) .
+ mconcat . intersperse (text " > ")
+ isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x ||
+ named "guimenuitem" x
+ isGuiMenu _ = False
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 4b5fbfdfc..67a97ae85 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,7 @@ import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Data.Maybe (isJust)
-import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf)
+import Data.List (delete, (\\), intersect)
import Data.Monoid
import Text.TeXMath (writeTeX)
import Data.Default (Default)
@@ -97,14 +96,17 @@ import Control.Applicative ((<$>))
import Data.Sequence (ViewL(..), viewl)
import qualified Data.Sequence as Seq (null)
+import Text.Pandoc.Error
+import Text.Pandoc.Compat.Except
+
readDocx :: ReaderOptions
-> B.ByteString
- -> (Pandoc, MediaBag)
+ -> Either PandocError (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> (Pandoc meta blks, mediaBag) where
- (meta, blks, mediaBag) = (docxToOutput opts docx)
- Left _ -> error $ "couldn't parse docx file"
+ Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag))
+ <$> (docxToOutput opts docx)
+ Left _ -> Left (ParseFailure "couldn't parse docx file")
data DState = DState { docxAnchorMap :: M.Map String String
, docxMediaBag :: MediaBag
@@ -123,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions
instance Default DEnv where
def = DEnv def False
-type DocxContext = ReaderT DEnv (State DState)
+type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState))
-evalDocxContext :: DocxContext a -> DEnv -> DState -> a
-evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a
+evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx
-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
@@ -197,19 +199,9 @@ fixAuthors mv = mv
codeStyles :: [String]
codeStyles = ["VerbatimChar"]
-blockQuoteDivs :: [String]
-blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"]
-
codeDivs :: [String]
codeDivs = ["SourceCode"]
-
--- For the moment, we have English, Danish, German, and French. This
--- is fairly ad-hoc, and there might be a more systematic way to do
--- it, but it's better than nothing.
-headerPrefixes :: [String]
-headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"]
-
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak
@@ -288,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
, s `elem` codeStyles =
- return $ code $ concatMap runElemToString runElems
+ let rPr = resolveDependentRunStyle rs
+ codeString = code $ concatMap runElemToString runElems
+ in
+ return $ case rVertAlign rPr of
+ Just SupScrpt -> superscript codeString
+ Just SubScrpt -> subscript codeString
+ _ -> codeString
| otherwise = do
let ils = concatReduce (map runElemToInlines runElems)
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
@@ -408,7 +406,9 @@ singleParaToPlain blks
singleParaToPlain blks = blks
cellToBlocks :: Cell -> DocxContext Blocks
-cellToBlocks (Cell bps) = concatReduce <$> mapM bodyPartToBlocks bps
+cellToBlocks (Cell bps) = do
+ blks <- concatReduce <$> mapM bodyPartToBlocks bps
+ return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: Row -> DocxContext [Blocks]
rowToBlocksList (Row cells) = do
@@ -434,9 +434,9 @@ parStyleToTransform pPr
let pPr' = pPr { pStyle = cs, indentation = Nothing}
in
(divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (c:cs) <- pStyle pPr
- , c `elem` blockQuoteDivs =
- let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+ | (_:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr =
+ let pPr' = pPr { pStyle = cs }
in
blockQuote . (parStyleToTransform pPr')
| (_:cs) <- pStyle pPr =
@@ -467,12 +467,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ parStyleToTransform pPr
$ codeBlock
$ concatMap parPartToString parparts
- | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr
- , Just (prefix, n) <- isHeaderClass c = do
+ | Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
(concatReduce <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
- headerWith ("", delete (prefix ++ show n) cs, []) n ils
+ headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
@@ -555,16 +554,7 @@ bodyToOutput (Body bps) = do
blks',
mediaBag)
-docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag)
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
-
-isHeaderClass :: String -> Maybe (String, Int)
-isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes
- , Just s' <- stripPrefix pref s =
- case reads s' :: [(Int, String)] of
- [] -> Nothing
- ((n, "") : []) -> Just (pref, n)
- _ -> Nothing
-isHeaderClass _ = Nothing
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 2945a1eda..cce80fb48 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, ViewPatterns #-}
+{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -65,7 +65,8 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
-import Data.Char (readLitChar, ord, chr)
+import Text.Pandoc.Readers.Docx.Util
+import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envNumbering :: Numbering
@@ -73,6 +74,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envMedia :: Media
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
+ , envParStyles :: ParStyleMap
}
deriving Show
@@ -107,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
-type NameSpaces = [(String, String)]
-
data Docx = Docx Document
deriving Show
@@ -122,8 +122,12 @@ type Media = [(FilePath, B.ByteString)]
type CharStyle = (String, RunStyle)
+type ParStyle = (String, ParStyleData)
+
type CharStyleMap = M.Map String RunStyle
+type ParStyleMap = M.Map String ParStyleData
+
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -152,6 +156,9 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
+ , pHeading :: Maybe (String, Int)
+ , pNumInfo :: Maybe (String, String)
+ , pBlockQuote :: Maybe Bool
}
deriving Show
@@ -159,6 +166,9 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
+ , pHeading = Nothing
+ , pNumInfo = Nothing
+ , pBlockQuote = Nothing
}
@@ -213,6 +223,12 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, rStyle :: Maybe CharStyle}
deriving Show
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+ , isBlockQuote :: Maybe Bool
+ , numInfo :: Maybe (String, String)
+ , psStyle :: Maybe ParStyle}
+ deriving Show
+
defaultRunStyle :: RunStyle
defaultRunStyle = RunStyle { isBold = Nothing
, isItalic = Nothing
@@ -232,18 +248,14 @@ type ChangeId = String
type Author = String
type ChangeDate = String
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
- styles = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles
+ (styles, parstyles) = archiveToStyles archive
+ rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -252,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@@ -263,47 +275,69 @@ elemToBody ns element | isElem ns "w" "body" element =
(\bps -> return $ Body bps)
elemToBody _ _ = throwError WrongElem
-archiveToStyles :: Archive -> CharStyleMap
+archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
archiveToStyles zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
case stylesElem of
- Nothing -> M.empty
+ Nothing -> (M.empty, M.empty)
Just styElem ->
- let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ let namespaces = elemToNameSpaces styElem
in
- M.fromList $ buildBasedOnList namespaces styElem Nothing
+ ( M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe CharStyle),
+ M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe ParStyle) )
-isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool
+isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
findAttr (elemName ns "w" "val")
- , Just (parentId, _) <- parentStyle = (basedOnVal == parentId)
+ , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Nothing <- findChild (elemName ns "w" "basedOn") element
, Nothing <- parentStyle = True
| otherwise = False
-elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle
-elemToCharStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
- Just (styleId, elemToRunStyle ns element parentStyle)
- | otherwise = Nothing
-
-getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+class ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+ getStyleId :: a -> String
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToRunStyle ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToParStyleData ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
| isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToCharStyle ns e parentStyle) $
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
| otherwise = []
-buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
case (getStyleChildren ns element rootStyle) of
[] -> []
@@ -317,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@@ -420,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+ let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@@ -449,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == (lookup prefix ns)
-
-
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element
@@ -510,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger}
elemToParIndentation _ _ = Nothing
-
-elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element | isElem ns "w" "p" element = do
- let pPr = findChild (elemName ns "w" "pPr") element
- numPr = pPr >>= findChild (elemName ns "w" "numPr")
- lvl <- numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val")
- numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
- return (numId, lvl)
-elemToNumInfo _ _ = Nothing
-
testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
@@ -542,18 +553,28 @@ elemToBodyPart ns element
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
- , Just (numId, lvl) <- elemToNumInfo ns element = do
- let parstyle = elemToParagraphStyle ns element
+ , Just (numId, lvl) <- getNumInfo ns element = do
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
case lookupLevel numId lvl num of
- Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing -> throwError WrongElem
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- let parstyle = elemToParagraphStyle ns element
- parparts <- mapD (elemToParPart ns) (elChildren element)
- return $ Paragraph parstyle parparts
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ case pNumInfo parstyle of
+ Just (numId, lvl) -> do
+ num <- asks envNumbering
+ case lookupLevel numId lvl num of
+ Just levelInfo ->
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing ->
+ throwError WrongElem
+ Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChild (elemName ns "w" "tblPr") element
@@ -601,6 +622,16 @@ elemToParPart ns element
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
Nothing -> throwError WrongElem
+-- The below is an attempt to deal with images in deprecated vml format.
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just _ <- findChild (elemName ns "w" "pict") element =
+ let drawing = findElement (elemName ns "v" "imagedata") element
+ >>= findAttr (elemName ns "r" "id")
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+ Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element =
elemToRun ns element >>= (\r -> return $ PlainRun r)
@@ -625,17 +656,20 @@ elemToParPart ns element
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ , Just relId <- findAttr (elemName ns "r" "id") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
+ rels <- asks envRelationships
+ case lookupRelationship relId rels of
+ Just target -> do
+ case findAttr (elemName ns "w" "anchor") element of
+ Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ Nothing -> return $ ExternalHyperLink target runs
+ Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttr (elemName ns "r" "id") element = do
+ , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- rels <- asks envRelationships
- return $ case lookupRelationship relId rels of
- Just target -> ExternalHyperLink target runs
- Nothing -> ExternalHyperLink "" runs
+ return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "m" "oMath" element =
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
@@ -684,14 +718,30 @@ elemToRun ns element
return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element
+getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue field style
+ | Just value <- field style = Just value
+ | Just parentStyle <- psStyle style
+ = getParentStyleValue field (snd parentStyle)
+getParentStyleValue _ _ = Nothing
+
+getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
+ Maybe a
+getParStyleField field stylemap styles
+ | x <- mapMaybe (\x -> M.lookup x stylemap) styles
+ , (y:_) <- mapMaybe (getParentStyleValue field) x
+ = Just y
+getParStyleField _ _ _ = Nothing
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
+elemToParagraphStyle ns element sty
| Just pPr <- findChild (elemName ns "w" "pPr") element =
- ParagraphStyle
- {pStyle =
+ let style =
mapMaybe
(findAttr (elemName ns "w" "val"))
(findChildren (elemName ns "w" "pStyle") pPr)
+ in ParagraphStyle
+ {pStyle = style
, indentation =
findChild (elemName ns "w" "ind") pPr >>=
elemToParIndentation ns
@@ -703,8 +753,11 @@ elemToParagraphStyle ns element
Just "none" -> False
Just _ -> True
Nothing -> False
+ , pHeading = getParStyleField headingLev sty style
+ , pNumInfo = getParStyleField numInfo sty style
+ , pBlockQuote = getParStyleField isBlockQuote sty style
}
-elemToParagraphStyle _ _ = defaultParagraphStyle
+elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
@@ -758,6 +811,59 @@ elemToRunStyle ns element parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
+isNumericNotNull :: String -> Bool
+isNumericNotNull str = (str /= []) && (all isDigit str)
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- stripPrefix "Heading" styleId
+ , isNumericNotNull index = Just (styleId, read index)
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val") >>=
+ stripPrefix "heading "
+ , isNumericNotNull index = Just (styleId, read index)
+getHeaderLevel _ _ = Nothing
+
+blockQuoteStyleIds :: [String]
+blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
+
+blockQuoteStyleNames :: [String]
+blockQuoteStyleNames = ["Quote", "Block Text"]
+
+getBlockQuote :: NameSpaces -> Element -> Maybe Bool
+getBlockQuote ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , styleId `elem` blockQuoteStyleIds = Just True
+ | Just styleName <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val")
+ , styleName `elem` blockQuoteStyleNames = Just True
+getBlockQuote _ _ = Nothing
+
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChild (elemName ns "w" "pPr") element >>=
+ findChild (elemName ns "w" "numPr")
+ lvl = fromMaybe "0" (numPr >>=
+ findChild (elemName ns "w" "ilvl") >>=
+ findAttr (elemName ns "w" "val"))
+ numId <- numPr >>=
+ findChild (elemName ns "w" "numId") >>=
+ findAttr (elemName ns "w" "val")
+ return (numId, lvl)
+
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
+elemToParStyleData ns element parentStyle =
+ ParStyleData
+ {
+ headingLev = getHeaderLevel ns element
+ , isBlockQuote = getBlockQuote ns element
+ , numInfo = getNumInfo ns element
+ , psStyle = parentStyle
+ }
+
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..2901ea2a3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,106 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
+insert (Just k) (Just v) m = alterMap (M.insert k v) m
+insert _ _ m = m
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = State StyleMaps a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = execState genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
+ case styleType of
+ Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ _ -> return ()
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM (Maybe StyleType)
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ Just "paragraph" -> return $ Just ParaStyle
+ Just "character" -> return $ Just CharStyle
+ _ -> return Nothing
+
+getAttrType :: Element -> StateM (Maybe String)
+getAttrType el = do
+ name <- elemName' "type"
+ return $ findAttr name el
+
+getAttrStyleId :: Element -> StateM (Maybe String)
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ return $ findAttr name el
+
+getNameVal :: Element -> StateM (Maybe String)
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ return $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..891f107b0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,26 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index b061d8683..338540533 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
type Items = M.Map String (FilePath, MimeType)
-readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
+readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
-runEPUB :: Except String a -> a
-runEPUB = either error id . runExcept
+runEPUB :: Except PandocError a -> Either PandocError a
+runEPUB = runExcept
-- Note that internal reference are aggresively normalised so that all ids
-- are of the form "filename#id"
--
-archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
@@ -64,19 +66,20 @@ archiveToEPUB os archive = do
return $ (ast, mediaBag)
where
os' = os {readerParseRaw = True}
- parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc
+ parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc
+ mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
fname <- findEntryByPathE (root </> path) archive
- return $ fixInternalReferences path .
+ html <- either throwError return .
readHtml os' .
UTF8.toStringLazy $
fromEntry fname
+ return $ fixInternalReferences path html
mimeToReader s _ path
| s `elem` imageMimes = return $ imageToPandoc path
| otherwise = return $ mempty
@@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
-parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items)
+parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items)
parseManifest content = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
@@ -130,7 +133,7 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
-parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)]
+parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
@@ -141,7 +144,7 @@ parseSpine is e = do
guard linear
findAttr (emptyName "idref") ref
-parseMeta :: MonadError String m => Element -> m Meta
+parseMeta :: MonadError PandocError m => Element -> m Meta
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
@@ -159,7 +162,7 @@ renameMeta :: String -> String
renameMeta "creator" = "author"
renameMeta s = s
-getManifest :: MonadError String m => Archive -> m (String, Element)
+getManifest :: MonadError PandocError m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
@@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: MonadError String m => QName -> Element -> m String
+findAttrE :: MonadError PandocError m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
-findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
+findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
-parseXMLDocE :: MonadError String m => String -> m Element
+parseXMLDocE :: MonadError PandocError m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
-findElementE :: MonadError String m => QName -> Element -> m Element
+findElementE :: MonadError PandocError m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
-mkE :: MonadError String m => String -> Maybe a -> m a
-mkE s = maybe (throwError s) return
+mkE :: MonadError PandocError m => String -> Maybe a -> m a
+mkE s = maybe (throwError . ParseFailure $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4e0bb375a..fcba16e04 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
+ViewPatterns#-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -43,14 +44,14 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags'
- , escapeURI, safeRead )
+ , escapeURI, safeRead, mapLeft )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import Data.Maybe ( fromMaybe, isJust)
-import Data.List ( intercalate, isInfixOf )
+import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero, void, unless )
import Control.Arrow ((***))
@@ -61,16 +62,20 @@ import Debug.Trace (trace)
import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
+import Network.URI (isURI)
+import Text.Pandoc.Error
+
+import Text.Parsec.Error
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readHtml opts inp =
- case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
+ mapLeft (ParseFailure . getError) . flip runReader def $
+ runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
+ "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -78,6 +83,9 @@ readHtml opts inp =
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
replaceNotes :: [Block] -> TagParser [Block]
replaceNotes = walkM replaceNotes'
@@ -91,7 +99,8 @@ replaceNotes' x = return x
data HTMLState =
HTMLState
{ parserState :: ParserState,
- noteTable :: [(String, Blocks)]
+ noteTable :: [(String, Blocks)],
+ baseHref :: Maybe String
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -113,7 +122,7 @@ pBody :: TagParser Blocks
pBody = pInTags "body" block
pHead :: TagParser Blocks
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
where pTitle = pInTags "title" inline >>= setTitle . trimInlines
setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
pMetaTag = do
@@ -125,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
let content = fromAttrib "content" mt
updateState $ B.setMeta name (B.text content)
return mempty
+ pBaseTag = do
+ bt <- pSatisfy (~== TagOpen "base" [])
+ let baseH = fromAttrib "href" bt
+ if null baseH
+ then return mempty
+ else do
+ let baseH' = case reverse baseH of
+ '/':_ -> baseH
+ _ -> baseH ++ "/"
+ updateState $ \st -> st{ baseHref = Just baseH' }
+ return mempty
block :: TagParser Blocks
block = do
@@ -250,7 +270,14 @@ pOrderedList = try $ do
"lower-alpha" -> LowerAlpha
"upper-alpha" -> UpperAlpha
"decimal" -> Decimal
- _ -> DefaultStyle
+ _ ->
+ case lookup "type" attribs of
+ Just "1" -> Decimal
+ Just "I" -> UpperRoman
+ Just "i" -> LowerRoman
+ Just "A" -> UpperAlpha
+ Just "a" -> LowerAlpha
+ _ -> DefaultStyle
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -373,13 +400,21 @@ pTable = try $ do
skipMany pBlank
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
- widths' <- pColgroup <|> many pCol
- head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
- skipMany pBlank
- rows <- pOptInTag "tbody"
- $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
- skipMany pBlank
+ widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
+ let pTh = option [] $ pInTags "tr" (pCell "th")
+ pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = do pOptInTag "tbody" $ many1 pTr
+ head'' <- pOptInTag "thead" pTh
+ head' <- pOptInTag "tbody" $ do
+ if null head''
+ then pTh
+ else return head''
+ rowsLs <- many pTBody
+ rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
+ let rows = (concat rowsLs) ++ rows'
+ -- fail on empty table
+ guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
[Plain _] -> True
_ -> False
@@ -551,7 +586,11 @@ pAnchor = try $ do
pRelLink :: TagParser Inlines
pRelLink = try $ do
tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
- let url = fromAttrib "href" tag
+ mbBaseHref <- baseHref <$> getState
+ let url' = fromAttrib "href" tag
+ let url = case (isURI url', mbBaseHref) of
+ (False, Just h) -> h ++ url'
+ _ -> url'
let title = fromAttrib "title" tag
let uid = fromAttrib "id" tag
let spanC = case uid of
@@ -563,7 +602,11 @@ pRelLink = try $ do
pImage :: TagParser Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
- let url = fromAttrib "src" tag
+ mbBaseHref <- baseHref <$> getState
+ let url' = fromAttrib "src" tag
+ let url = case (isURI url', mbBaseHref) of
+ (False, Just h) -> h ++ url'
+ _ -> url'
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
return $ B.image (escapeURI url) title (B.text alt)
@@ -624,14 +667,17 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-pOptInTag :: String -> TagParser a
- -> TagParser a
-pOptInTag tagtype parser = try $ do
- open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
+-- parses p, preceeded by an optional opening tag
+-- and followed by an optional closing tags
+pOptInTag :: String -> TagParser a -> TagParser a
+pOptInTag tagtype p = try $ do
+ skipMany pBlank
+ optional $ pSatisfy (~== TagOpen tagtype [])
+ skipMany pBlank
+ x <- p
skipMany pBlank
- x <- parser
+ optional $ pSatisfy (~== TagClose tagtype)
skipMany pBlank
- when open $ pCloses tagtype
return x
pCloses :: String -> TagParser ()
@@ -740,7 +786,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space
--
eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["audio", "applet", "button", "iframe",
+eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
"del", "ins",
"progress", "map", "area", "noscript", "script",
"object", "svg", "video", "source"]
@@ -758,7 +804,7 @@ blockHtmlTags :: [String]
blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
"blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "embed", "fieldset", "figcaption", "figure",
+ "dl", "dt", "fieldset", "figcaption", "figure",
"footer", "form", "h1", "h2", "h3", "h4",
"h5", "h6", "head", "header", "hgroup", "hr", "html",
"isindex", "menu", "noframes", "ol", "output", "p", "pre",
@@ -815,6 +861,7 @@ isCommentTag = tagComment (const True)
closes :: String -> String -> Bool
_ `closes` "body" = False
_ `closes` "html" = False
+"body" `closes` "head" = True
"a" `closes` "a" = True
"li" `closes` "li" = True
"th" `closes` t | t `elem` ["th","td"] = True
@@ -856,7 +903,7 @@ htmlInBalanced :: (Monad m)
-> ParserT String st m String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
- guard $ '/' `notElem` tag -- not a self-closing tag
+ guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag
let stopper = htmlTag (~== TagClose t)
let anytag = snd <$> htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
@@ -869,17 +916,26 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
- lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
- (next : _) <- getInput >>= return . canonicalizeTags . parseTags
+ lookAhead (char '<')
+ inp <- getInput
+ let hasTagWarning (TagWarning _:_) = True
+ hasTagWarning _ = False
+ let (next : rest) = canonicalizeTags $ parseTagsOptions
+ parseOptions{ optTagWarning = True } inp
guard $ f next
- -- advance the parser
case next of
- TagComment s -> do
+ TagComment s
+ | "<!--" `isPrefixOf` inp -> do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
return (next, "<!--" ++ s ++ "-->")
+ | otherwise -> fail "bogus comment mode, HTML5 parse error"
_ -> do
+ -- we get a TagWarning on things like
+ -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+ -- which should NOT be parsed as an HTML tag, see #2277
+ guard $ not $ hasTagWarning rest
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")
@@ -925,7 +981,7 @@ instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
instance Default HTMLState where
- def = HTMLState def []
+ def = HTMLState def [] Nothing
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 4b46c869d..aa2534afc 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
{- |
Module : Text.Pandoc.Readers.Haddock
Copyright : Copyright (C) 2013 David Lazar
@@ -25,11 +26,18 @@ import Documentation.Haddock.Parser
import Documentation.Haddock.Types
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
- -> Pandoc
-readHaddock opts = B.doc . docHToBlocks . trace' . parseParas
+ -> Either PandocError Pandoc
+readHaddock opts =
+#if MIN_VERSION_haddock_library(1,2,0)
+ Right . B.doc . docHToBlocks . trace' . _doc . parseParas
+#else
+ Right . B.doc . docHToBlocks . trace' . parseParas
+#endif
where trace' x = if readerTrace opts
then trace (show x) x
else x
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9f51e9a8f..0da912ea6 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -42,26 +42,25 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Char ( chr, ord )
+import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
-import Data.Char (isLetter, isAlphaNum)
import Control.Applicative
import Data.Monoid
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, maybeToList)
import System.Environment (getEnv)
-import System.FilePath (replaceExtension, (</>))
-import Data.List (intercalate, intersperse)
+import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
+import Data.List (intercalate)
import qualified Data.Map as M
import qualified Control.Exception as E
-import System.FilePath (takeExtension, addExtension)
import Text.Pandoc.Highlighting (fromListingsLanguage)
+import Text.Pandoc.Error
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
@@ -73,17 +72,16 @@ parseLaTeX = do
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
-type LP = Parser [Char] ParserState
+type LP = Parser String ParserState
anyControlSeq :: LP String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
- name <- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
- return name
+ case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
controlSeq :: String -> LP String
controlSeq name = try $ do
@@ -103,7 +101,7 @@ dimenarg = try $ do
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
- <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
+ <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
@@ -161,50 +159,47 @@ mathInline :: LP String -> LP Inlines
mathInline p = math <$> (try p >>= applyMacros')
mathChars :: LP String
-mathChars = concat <$>
- many ( many1 (satisfy (\c -> c /= '$' && c /='\\'))
- <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
- )
+mathChars = (concat <$>) $
+ many $
+ many1 (satisfy (\c -> c /= '$' && c /='\\'))
+ <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar)
quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
quoted' f starter ender = do
startchs <- starter
try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
-double_quote :: LP Inlines
-double_quote =
- ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+doubleQuote :: LP Inlines
+doubleQuote =
+ quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "“") (void $ char '”')
-- the following is used by babel for localized quotes:
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
- )
-single_quote :: LP Inlines
-single_quote =
- ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+singleQuote :: LP Inlines
+singleQuote =
+ quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
<|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- )
inline :: LP Inlines
inline = (mempty <$ comment)
<|> (space <$ sp)
<|> inlineText
<|> inlineCommand
+ <|> inlineEnvironment
<|> inlineGroup
<|> (char '-' *> option (str "-")
- ((char '-') *> option (str "–") (str "—" <$ char '-')))
- <|> double_quote
- <|> single_quote
+ (char '-' *> option (str "–") (str "—" <$ char '-')))
+ <|> doubleQuote
+ <|> singleQuote
<|> (str "”" <$ try (string "''"))
<|> (str "”" <$ char '”')
<|> (str "’" <$ char '\'')
<|> (str "’" <$ char '’')
<|> (str "\160" <$ char '~')
- <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
- <|> (mathInline $ char '$' *> mathChars <* char '$')
- <|> (superscript <$> (char '^' *> tok))
- <|> (subscript <$> (char '_' *> tok))
+ <|> mathDisplay (string "$$" *> mathChars <* string "$$")
+ <|> mathInline (char '$' *> mathChars <* char '$')
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str . (:[]) <$> tildeEscape)
<|> (str . (:[]) <$> oneOf "[]")
@@ -237,20 +232,32 @@ block = (mempty <$ comment)
blocks :: LP Blocks
blocks = mconcat <$> many block
+getRawCommand :: String -> LP String
+getRawCommand name' = do
+ rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
+ return $ '\\' : name' ++ snd rawargs
+
+lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where
+ lookupList l m = msum $ map (`M.lookup` m) l
+
blockCommand :: LP Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
- case M.lookup name' blockCommands of
- Just p -> p
- Nothing -> case M.lookup name blockCommands of
- Just p -> p
- Nothing -> mzero
+ let raw = do
+ rawcommand <- getRawCommand name'
+ transformed <- applyMacros' rawcommand
+ guard $ transformed /= rawcommand
+ notFollowedBy $ parseFromString inlines transformed
+ parseFromString blocks transformed
+ lookupListDefault raw [name',name] blockCommands
inBrackets :: Inlines -> Inlines
-inBrackets x = (str "[") <> x <> (str "]")
+inBrackets x = str "[" <> x <> str "]"
-- eat an optional argument and one or more arguments in braces
ignoreInlines :: String -> (String, LP Inlines)
@@ -258,19 +265,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
, ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
@@ -301,10 +310,10 @@ blockCommands = M.fromList $
--
, ("hrule", pure horizontalRule)
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> loose_item)
+ , ("item", skipopts *> looseItem)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> tok >>= setCaption)
+ , ("caption", skipopts *> setCaption)
, ("PandocStartInclude", startInclude)
, ("PandocEndInclude", endInclude)
, ("bibliography", mempty <$ (skipopts *> braced >>=
@@ -327,6 +336,7 @@ blockCommands = M.fromList $
, "hyperdef"
, "markboth", "markright", "markleft"
, "hspace", "vspace"
+ , "newpage"
]
addMeta :: ToMetaValue a => String -> a -> LP ()
@@ -336,9 +346,16 @@ addMeta field val = updateState $ \st ->
splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-setCaption :: Inlines -> LP Blocks
-setCaption ils = do
- updateState $ \st -> st{ stateCaption = Just ils }
+setCaption :: LP Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces' >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ stateCaption = Just ils' }
return mempty
resetCaption :: LP ()
@@ -361,7 +378,7 @@ section (ident, classes, kvs) lvl = do
let lvl' = if hasChapters then lvl + 1 else lvl
skipopts
contents <- grouped inline
- lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced)
+ lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
@@ -374,25 +391,39 @@ inlineCommand = try $ do
star <- option "" (string "*")
let name' = name ++ star
let raw = do
- rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
- let rawcommand = '\\' : name ++ star ++ snd rawargs
+ rawcommand <- getRawCommand name'
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
- case M.lookup name' inlineCommands of
- Just p -> p <|> raw
- Nothing -> case M.lookup name inlineCommands of
- Just p -> p <|> raw
- Nothing -> raw
+ lookupListDefault mzero [name',name] inlineCommands
+ <|> raw
unlessParseRaw :: LP ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
-isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+isBlockCommand s = s `M.member` blockCommands
+
+
+inlineEnvironments :: M.Map String (LP Inlines)
+inlineEnvironments = M.fromList
+ [ ("displaymath", mathEnv id Nothing "displaymath")
+ , ("equation", mathEnv id Nothing "equation")
+ , ("equation*", mathEnv id Nothing "equation*")
+ , ("gather", mathEnv id (Just "gathered") "gather")
+ , ("gather*", mathEnv id (Just "gathered") "gather*")
+ , ("multline", mathEnv id (Just "gathered") "multline")
+ , ("multline*", mathEnv id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
+ , ("align", mathEnv id (Just "aligned") "align")
+ , ("align*", mathEnv id (Just "aligned") "align*")
+ , ("alignat", mathEnv id (Just "aligned") "alignat")
+ , ("alignat*", mathEnv id (Just "aligned") "alignat*")
+ ]
inlineCommands :: M.Map String (LP Inlines)
inlineCommands = M.fromList $
@@ -414,9 +445,14 @@ inlineCommands = M.fromList $
, ("sim", lit "~")
, ("label", unlessParseRaw >> (inBrackets <$> tok))
, ("ref", unlessParseRaw >> (inBrackets <$> tok))
+ , ("noindent", unlessParseRaw >> return mempty)
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
, ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
, ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline $ braced)
+ , ("ensuremath", mathInline braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
, ("$", lit "$")
@@ -464,7 +500,7 @@ inlineCommands = M.fromList $
, ("v", option (str "v") $ try $ tok >>= accent hacek)
, ("u", option (str "u") $ try $ tok >>= accent breve)
, ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
+ , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
, (",", pure mempty)
, ("@", pure mempty)
, (" ", lit "\160")
@@ -477,7 +513,7 @@ inlineCommands = M.fromList $
, ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
- , ("lstinline", doverb)
+ , ("lstinline", skipopts *> doverb)
, ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
@@ -494,6 +530,7 @@ inlineCommands = M.fromList $
, ("citealp", citation "citealp" NormalCitation False)
, ("citealp*", citation "citealp*" NormalCitation False)
, ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
, ("footcite", inNote <$> citation "footcite" NormalCitation False)
, ("parencite", citation "parencite" NormalCitation False)
, ("supercite", citation "supercite" NormalCitation False)
@@ -516,6 +553,7 @@ inlineCommands = M.fromList $
, ("supercites", citation "supercites" NormalCitation True)
, ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
, ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
, ("Footcite", citation "Footcite" NormalCitation False)
, ("Parencite", citation "Parencite" NormalCitation False)
, ("Supercite", citation "Supercite" NormalCitation False)
@@ -542,7 +580,7 @@ inlineCommands = M.fromList $
] ++ map ignoreInlines
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks:
- [ "noindent", "index" ]
+ [ "index" ]
mkImage :: String -> LP Inlines
mkImage src = do
@@ -559,7 +597,7 @@ inNote ils =
unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` "#$%&~_^\\{}"
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
@@ -585,7 +623,7 @@ lit = pure . str
accent :: (Char -> String) -> Inlines -> LP Inlines
accent f ils =
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys)
+ (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
[] -> mzero
_ -> return ils
@@ -774,7 +812,7 @@ breve 'u' = "ŭ"
breve c = [c]
tok :: LP Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
opt :: LP Inlines
opt = bracketed inline <* optional sp
@@ -786,15 +824,20 @@ inlineText :: LP Inlines
inlineText = str <$> many1 inlineChar
inlineChar :: LP Char
-inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n"
+inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
environment :: LP Blocks
environment = do
controlSeq "begin"
name <- braced
- case M.lookup name environments of
- Just p -> p <|> rawEnv name
- Nothing -> rawEnv name
+ M.findWithDefault mzero name environments
+ <|> rawEnv name
+
+inlineEnvironment :: LP Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- braced
+ M.findWithDefault mzero name inlineEnvironments
rawEnv :: String -> LP Blocks
rawEnv name = do
@@ -807,15 +850,11 @@ rawEnv name = do
----
-type IncludeParser = ParserT [Char] [String] IO String
+type IncludeParser = ParserT String [String] IO String
-- | Replace "include" commands with file contents.
-handleIncludes :: String -> IO String
-handleIncludes s = do
- res <- runParserT includeParser' [] "input" s
- case res of
- Right s' -> return s'
- Left e -> error $ show e
+handleIncludes :: String -> IO (Either PandocError String)
+handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s
includeParser' :: IncludeParser
includeParser' =
@@ -857,6 +896,12 @@ backslash' = string "\\"
braced' :: IncludeParser
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
include' :: IncludeParser
include' = do
fs' <- try $ do
@@ -865,11 +910,11 @@ include' = do
<|> try (string "input")
<|> string "usepackage"
-- skip options
- skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
+ skipMany $ try $ char '[' *> manyTill anyChar (char ']')
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
- then map (flip replaceExtension ".sty") fs
- else map (flip replaceExtension ".tex") fs
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
pos <- getPosition
containers <- getState
let fn = case containers of
@@ -938,14 +983,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']')
alltt :: String -> LP Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
- concat $ intersperse "\\\\\n" $ lines t)
+ intercalate "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
-rawLaTeXBlock :: Parser [Char] ParserState String
+rawLaTeXBlock :: LP String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-rawLaTeXInline :: Parser [Char] ParserState Inline
+rawLaTeXInline :: LP Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
@@ -954,41 +999,43 @@ addImageCaption :: Blocks -> LP Blocks
addImageCaption = walkM go
where go (Image alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Image (toList ils) (src, "fig:"))
- Nothing -> return (Image alt (src,tit))
+ return $ case mbcapt of
+ Just ils -> Image (toList ils) (src, "fig:")
+ Nothing -> Image alt (src,tit)
go x = return x
addTableCaption :: Blocks -> LP Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Table (toList ils) als ws hs rs)
- Nothing -> return (Table c als ws hs rs)
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
go x = return x
environments :: M.Map String (LP Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
- , ("letter", env "letter" letter_contents)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular", env "tabular" simpTable)
+ , ("tabular*", env "tabular" $ simpTable True)
+ , ("tabular", env "tabular" $ simpTable False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", ordered_list)
+ , ("enumerate", orderedList')
, ("alltt", alltt =<< verbEnv "alltt")
, ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
- , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
, ("Verbatim", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
@@ -996,17 +1043,17 @@ environments = M.fromList
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "Verbatim"))
+ codeBlockWith attr <$> verbEnv "Verbatim")
, ("lstlisting", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
- ++ maybe [] (:[]) (lookup "language" options
+ ++ maybeToList (lookup "language" options
>>= fromListingsLanguage)
let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
- codeBlockWith attr <$> (verbEnv "lstlisting"))
+ codeBlockWith attr <$> verbEnv "lstlisting")
, ("minted", do options <- option [] keyvals
lang <- grouped (many1 $ satisfy (/='}'))
let kvs = [ (if k == "firstnumber"
@@ -1016,27 +1063,27 @@ environments = M.fromList
[ "numberLines" |
lookup "linenos" options == Just "true" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "minted"))
+ codeBlockWith attr <$> verbEnv "minted")
, ("obeylines", parseFromString
(para . trimInlines . mconcat <$> many inline) =<<
intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnv Nothing "displaymath")
- , ("equation", mathEnv Nothing "equation")
- , ("equation*", mathEnv Nothing "equation*")
- , ("gather", mathEnv (Just "gathered") "gather")
- , ("gather*", mathEnv (Just "gathered") "gather*")
- , ("multline", mathEnv (Just "gathered") "multline")
- , ("multline*", mathEnv (Just "gathered") "multline*")
- , ("eqnarray", mathEnv (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv (Just "aligned") "eqnarray*")
- , ("align", mathEnv (Just "aligned") "align")
- , ("align*", mathEnv (Just "aligned") "align*")
- , ("alignat", mathEnv (Just "aligned") "alignat")
- , ("alignat*", mathEnv (Just "aligned") "alignat*")
+ , ("displaymath", mathEnv para Nothing "displaymath")
+ , ("equation", mathEnv para Nothing "equation")
+ , ("equation*", mathEnv para Nothing "equation*")
+ , ("gather", mathEnv para (Just "gathered") "gather")
+ , ("gather*", mathEnv para (Just "gathered") "gather*")
+ , ("multline", mathEnv para (Just "gathered") "multline")
+ , ("multline*", mathEnv para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
+ , ("align", mathEnv para (Just "aligned") "align")
+ , ("align*", mathEnv para (Just "aligned") "align*")
+ , ("alignat", mathEnv para (Just "aligned") "alignat")
+ , ("alignat*", mathEnv para (Just "aligned") "alignat*")
]
-letter_contents :: LP Blocks
-letter_contents = do
+letterContents :: LP Blocks
+letterContents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
@@ -1063,8 +1110,8 @@ closing = do
item :: LP Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
-loose_item :: LP Blocks
-loose_item = do
+looseItem :: LP Blocks
+looseItem = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
@@ -1092,8 +1139,8 @@ listenv name p = try $ do
updateState $ \st -> st{ stateParserContext = oldCtx }
return res
-mathEnv :: Maybe String -> String -> LP Blocks
-mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a
+mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
where inner x = case innerEnv of
Nothing -> x
Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
@@ -1107,8 +1154,8 @@ verbEnv name = do
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
-ordered_list :: LP Blocks
-ordered_list = do
+orderedList' :: LP Blocks
+orderedList' = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ char '[' *> anyOrderedListMarker <* char ']'
@@ -1120,7 +1167,7 @@ ordered_list = do
optional sp
num <- grouped (many1 digit)
spaces
- return $ (read num + 1 :: Int)
+ return (read num + 1 :: Int)
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
@@ -1134,14 +1181,14 @@ paragraph = do
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = (void comment)
- <|> (void sp)
- <|> (void blanklines)
- <|> (void macro)
- <|> (void blockCommand)
- <|> (void anyControlSeq)
- <|> (void braced)
- <|> (void anyChar)
+ preambleBlock = void comment
+ <|> void sp
+ <|> void blanklines
+ <|> void macro
+ <|> void blockCommand
+ <|> void anyControlSeq
+ <|> void braced
+ <|> void anyChar
-------
@@ -1183,7 +1230,7 @@ citationLabel = optional sp *>
<* optional sp
<* optional (char ',')
<* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
+ where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)
cites :: CitationMode -> Bool -> LP [Citation]
cites mode multi = try $ do
@@ -1217,7 +1264,7 @@ complexNatbibCitation mode = try $ do
suff <- ils
skipSpaces
optional $ char ';'
- return $ addPrefix pref $ addSuffix suff $ cits'
+ return $ addPrefix pref $ addSuffix suff cits'
(c:cits, raw) <- withRaw $ grouped parseOne
return $ cite (c{ citationMode = mode }:cits)
(rawInline "latex" $ "\\citetext" ++ raw)
@@ -1227,7 +1274,7 @@ complexNatbibCitation mode = try $ do
parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}")
+ let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
maybeBar
let cAlign = AlignCenter <$ char 'c'
let lAlign = AlignLeft <$ char 'l'
@@ -1241,13 +1288,22 @@ parseAligns = try $ do
return aligns'
hline :: LP ()
-hline = () <$ (try $ spaces >> controlSeq "hline")
+hline = try $ do
+ spaces'
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule"
+ spaces'
+ optional $ bracketed (many1 (satisfy (/=']')))
+ return ()
lbreak :: LP ()
-lbreak = () <$ (try $ spaces *> controlSeq "\\")
+lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces')
amp :: LP ()
-amp = () <$ (try $ spaces *> char '&')
+amp = () <$ try (spaces' *> char '&')
parseTableRow :: Int -- ^ number of columns
-> LP [Blocks]
@@ -1260,19 +1316,22 @@ parseTableRow cols = try $ do
guard $ cells' /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces
+ spaces'
return cells''
-simpTable :: LP Blocks
-simpTable = try $ do
- spaces
+spaces' :: LP ()
+spaces' = spaces *> skipMany (comment *> spaces)
+
+simpTable :: Bool -> LP Blocks
+simpTable hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ (spaces' >> tok)
+ skipopts
aligns <- parseAligns
let cols = length aligns
optional hline
header' <- option [] $ try (parseTableRow cols <* lbreak <* hline)
rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline)
- spaces
- skipMany (comment *> spaces)
+ spaces'
let header'' = if null header'
then replicate cols mempty
else header'
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 25a303f52..ae81ae7fc 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -35,7 +36,7 @@ import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
import qualified Data.Map as M
import Data.Scientific (coefficient, base10Exponent)
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum, toLower )
+import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import qualified Data.Text as T
@@ -55,7 +56,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Data.Monoid (mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>))
import Control.Monad
import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
@@ -63,13 +64,14 @@ import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
type MarkdownParser = Parser [Char] ParserState
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMarkdown opts s =
(readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
@@ -77,13 +79,9 @@ readMarkdown opts s =
-- and a list of warnings.
readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> (Pandoc, [String])
+ -> Either PandocError (Pandoc, [String])
readMarkdownWithWarnings opts s =
- (readWith parseMarkdownWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
- where parseMarkdownWithWarnings = do
- doc <- parseMarkdown
- warnings <- stateWarnings <$> getState
- return (doc, warnings)
+ (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
trimInlinesF :: F Inlines -> F Inlines
trimInlinesF = liftM trimInlines
@@ -117,6 +115,12 @@ isBlank _ = False
-- auxiliary functions
--
+-- | Succeeds when we're in list context.
+inList :: MarkdownParser ()
+inList = do
+ ctx <- stateParserContext <$> getState
+ guard (ctx == ListItemState)
+
isNull :: F Inlines -> Bool
isNull ils = B.isNull $ runF ils def
@@ -161,19 +165,23 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
-inlinesInBalancedBrackets = charsInBalancedBrackets >>=
- parseFromString (trimInlinesF . mconcat <$> many inline)
-
-charsInBalancedBrackets :: MarkdownParser [Char]
-charsInBalancedBrackets = do
+inlinesInBalancedBrackets = do
char '['
- result <- manyTill ( many1 (noneOf "`[]\n")
- <|> (snd <$> withRaw code)
- <|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets)
- <|> count 1 (satisfy (/='\n'))
- <|> (newline >> notFollowedBy blankline >> return "\n")
- ) (char ']')
- return $ concat result
+ (_, raw) <- withRaw $ charsInBalancedBrackets 1
+ guard $ not $ null raw
+ parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
+
+charsInBalancedBrackets :: Int -> MarkdownParser ()
+charsInBalancedBrackets 0 = return ()
+charsInBalancedBrackets openBrackets =
+ (char '[' >> charsInBalancedBrackets (openBrackets + 1))
+ <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
+ <|> (( (() <$ code)
+ <|> (() <$ (escapedChar'))
+ <|> (newline >> notFollowedBy blankline)
+ <|> skipMany1 (noneOf "[]`\n\\")
+ <|> (() <$ count 1 (oneOf "`\\"))
+ ) >> charsInBalancedBrackets openBrackets)
--
-- document structure
@@ -243,8 +251,9 @@ yamlMetaBlock = try $ do
H.foldrWithKey (\k v m ->
if ignorable k
then m
- else B.setMeta (T.unpack k)
- (yamlToMeta opts v) m)
+ else case yamlToMeta opts v of
+ Left _ -> m
+ Right v' -> B.setMeta (T.unpack k) v' m)
nullMeta hashmap
Right Yaml.Null -> return $ return nullMeta
Right _ -> do
@@ -276,33 +285,42 @@ yamlMetaBlock = try $ do
ignorable :: Text -> Bool
ignorable t = (T.pack "_") `T.isSuffixOf` t
-toMetaValue :: ReaderOptions -> Text -> MetaValue
-toMetaValue opts x =
- case readMarkdown opts (T.unpack x) of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
+toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
+toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
+ where
+ toMeta p =
+ case p of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
| endsWithNewline x -> MetaBlocks [Para xs]
| otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t
-
-yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
+ Pandoc _ bs -> MetaBlocks bs
+ endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+ opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
+ meta_exts = Set.fromList [ Ext_pandoc_title_block
+ , Ext_mmd_title_block
+ , Ext_yaml_metadata_block
+ ]
+
+yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
yamlToMeta opts (Yaml.String t) = toMetaValue opts t
yamlToMeta _ (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = MetaString $ show
+ | base10Exponent n >= 0 = return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
- | otherwise = MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
- $ V.toList xs
-yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
+ | otherwise = return $ MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
+ (V.toList xs)
+yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
if ignorable k
then m
- else M.insert (T.unpack k)
- (yamlToMeta opts v) m)
- M.empty o
-yamlToMeta _ _ = MetaString ""
+ else (do
+ v' <- yamlToMeta opts v
+ m' <- m
+ return (M.insert (T.unpack k) v' m')))
+ (return M.empty) o
+yamlToMeta _ _ = return $ MetaString ""
stopLine :: MarkdownParser ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -318,11 +336,15 @@ mmdTitleBlock = try $ do
kvPair :: MarkdownParser (String, MetaValue)
kvPair = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ skipMany1 spaceNoNewline
val <- manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ guard $ not . null . trim $ val
let key' = concat $ words $ map toLower key
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
return (key',val')
+ where
+ spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r'))
parseMarkdown :: MarkdownParser Pandoc
parseMarkdown = do
@@ -337,12 +359,6 @@ parseMarkdown = do
let Pandoc _ bs = B.doc $ runF blocks st
return $ Pandoc meta bs
-addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
-addWarning mbpos msg =
- updateState $ \st -> st{
- stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
- stateWarnings st }
-
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do
pos <- getPosition
@@ -458,12 +474,12 @@ block = do
, bulletList
, header
, lhsCodeBlock
- , rawTeXBlock
, divHtml
, htmlBlock
, table
- , lineBlock
, codeBlockIndented
+ , rawTeXBlock
+ , lineBlock
, blockQuote
, hrule
, orderedList
@@ -499,9 +515,12 @@ atxHeader = try $ do
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
+ (text, raw) <- withRaw $
+ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
- attr' <- registerHeader attr (runF text defaultParserState)
+ attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw ident
return $ B.headerWith attr' level <$> text
atxClosing :: MarkdownParser Attr
@@ -534,15 +553,25 @@ setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
+ skipSpaces
+ (text, raw) <- withRaw $
+ trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
attr <- setextHeaderEnd
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr' <- registerHeader attr (runF text defaultParserState)
+ attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw ident
return $ B.headerWith attr' level <$> text
+registerImplicitHeader :: String -> String -> MarkdownParser ()
+registerImplicitHeader raw ident = do
+ let key = toKey $ "[" ++ raw ++ "]"
+ updateState (\s -> s { stateHeaderKeys =
+ M.insert key ('#':ident,"") (stateHeaderKeys s) })
+
--
-- hrule block
--
@@ -741,9 +770,9 @@ anyOrderedListStart = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
res <- do guardDisabled Ext_fancy_lists
- many1 digit
+ start <- many1 digit >>= safeRead
char '.'
- return (1, DefaultStyle, DefaultDelim)
+ return (start, DefaultStyle, DefaultDelim)
<|> do (num, style, delim) <- anyOrderedListMarker
-- if it could be an abbreviated first name,
-- insist on more than one space
@@ -865,7 +894,7 @@ defListMarker = do
tabStop <- getOption readerTabStop
let remaining = tabStop - (length sps + 1)
if remaining > 0
- then count remaining (char ' ') <|> string "\t"
+ then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
else mzero
return ()
@@ -874,7 +903,7 @@ definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
- contents <- mapM (parseFromString parseBlocks) raw
+ contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
@@ -885,6 +914,7 @@ defRawBlock compact = try $ do
firstline <- anyLine
let dline = try
( do notFollowedBy blankline
+ notFollowedByHtmlCloser
if compact -- laziness not compatible with compact
then () <$ indentSpaces
else (() <$ indentSpaces)
@@ -901,7 +931,10 @@ defRawBlock compact = try $ do
definitionList :: MarkdownParser (F Blocks)
definitionList = try $ do
- lookAhead (anyLine >> optional blankline >> defListMarker)
+ lookAhead (anyLine >>
+ optional (blankline >> notFollowedBy (table >> return ())) >>
+ -- don't capture table caption as def list!
+ defListMarker)
compactDefinitionList <|> normalDefinitionList
compactDefinitionList :: MarkdownParser (F Blocks)
@@ -932,6 +965,8 @@ para = try $ do
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
+ -- Avoid creating a paragraph in a nested list.
+ notFollowedBy' inList >>
() <$ lookAhead listStart)
<|> do guardEnabled Ext_native_divs
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1062,7 +1097,9 @@ dashedLine :: Char
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
+ let lengthDashes = length dashes
+ lengthSp = length sp
+ return (lengthDashes, lengthDashes + lengthSp)
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
@@ -1216,7 +1253,8 @@ gridPart :: Char -> Parser [Char] st (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
- return (length dashes, length dashes + 1)
+ let lengthDashes = length dashes
+ return (lengthDashes, lengthDashes + 1)
gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
@@ -1295,12 +1333,8 @@ pipeBreak = try $ do
pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
- (heads,aligns) <- try ( pipeBreak >>= \als ->
- return (return $ replicate (length als) mempty, als))
- <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
-
- return (row, als) )
- lines' <- sequence <$> many1 pipeTableRow
+ (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak
+ lines' <- sequence <$> many pipeTableRow
let widths = replicate (length aligns) 0.0
return $ (aligns, widths, heads, lines')
@@ -1482,7 +1516,9 @@ code = try $ do
math :: MarkdownParser (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros'))
+ <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
+ ((getOption readerSmart >>= guard) *> (return <$> apostrophe)
+ <* notFollowedBy space)
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
@@ -1616,8 +1652,7 @@ endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts differently if in a list:
- st <- getState
- when (stateParserContext st == ListItemState) $ notFollowedBy listStart
+ notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
@@ -1684,9 +1719,11 @@ referenceLink :: (String -> String -> Inlines -> Inlines)
-> (F Inlines, String) -> MarkdownParser (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
- (ref,raw') <- try
- (skipSpaces >> optional (newline >> skipSpaces) >> reference)
- <|> return (mempty, "")
+ (_,raw') <- option (mempty, "") $
+ lookAhead (try (spnl >> normalCite >> return (mempty, "")))
+ <|>
+ try (spnl >> reference)
+ when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
@@ -1702,13 +1739,13 @@ referenceLink constructor (lab, raw) = do
return $ do
keys <- asksF stateKeys
case M.lookup key keys of
- Nothing -> do
- headers <- asksF stateHeaders
- ref' <- if labIsRef then lab else ref
+ Nothing ->
if implicitHeaderRefs
- then case M.lookup ref' headers of
- Just ident -> constructor ('#':ident) "" <$> lab
- Nothing -> makeFallback
+ then do
+ headerKeys <- asksF stateHeaderKeys
+ case M.lookup key headerKeys of
+ Just (src, tit) -> constructor src tit <$> lab
+ Nothing -> makeFallback
else makeFallback
Just (src,tit) -> constructor src tit <$> lab
@@ -1722,12 +1759,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB
bareURL :: MarkdownParser (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
+ getState >>= guard . stateAllowLinks
(orig, src) <- uri <|> emailAddress
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig)
autoLink :: MarkdownParser (F Inlines)
autoLink = try $ do
+ getState >>= guard . stateAllowLinks
char '<'
(orig, src) <- uri <|> emailAddress
-- in rare cases, something may remain after the uri parser
@@ -1874,8 +1913,20 @@ textualCite = try $ do
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
<$> rest
Nothing ->
- (do (cs, raw) <- withRaw $ bareloc first
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
+ (do
+ (cs, raw) <- withRaw $ bareloc first
+ let (spaces',raw') = span isSpace raw
+ spc | null spaces' = mempty
+ | otherwise = B.space
+ lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
+ fallback <- referenceLink B.link (lab,raw')
+ return $ do
+ fallback' <- fallback
+ cs' <- cs
+ return $
+ case B.toList fallback' of
+ Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
+ _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
<|> return (do st <- askF
return $ case M.lookup key (stateExamples st) of
Just n -> B.str (show n)
@@ -1885,10 +1936,12 @@ bareloc :: Citation -> MarkdownParser (F [Citation])
bareloc c = try $ do
spnl
char '['
+ notFollowedBy $ char '^'
suff <- suffix
rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
+ notFollowedBy $ oneOf "[("
return $ do
suff' <- suff
rest' <- rest
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e43b8a86c..2a5adab22 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{-
- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
+ Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.MediaWiki
- Copyright : Copyright (C) 2012-2014 John MacFarlane
+ Copyright : Copyright (C) 2012-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe)
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMediaWiki opts s =
- case runParser parseMediaWiki MWState{ mwOptions = opts
+ readWith parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = []
}
- "source" (s ++ "\n") of
- Left err' -> error $ "\nError:\n" ++ show err'
- Right result -> result
+ (s ++ "\n")
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
@@ -593,11 +593,17 @@ imageOption =
<|> try (many1 (oneOf "x0123456789") <* string "px")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+collapseUnderscores :: String -> String
+collapseUnderscores [] = []
+collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
+collapseUnderscores (x:xs) = x : collapseUnderscores xs
+
+addUnderscores :: String -> String
+addUnderscores = collapseUnderscores . intercalate "_" . words
+
internalLink :: MWParser Inlines
internalLink = try $ do
sym "[["
- let addUnderscores x = let (pref,suff) = break (=='#') x
- in pref ++ intercalate "_" (words suff)
pagename <- unwords . words <$> many (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index f4dfa62c1..94ea9e3a2 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,9 +1,9 @@
{-
-Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; Either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Native
- Copyright : Copyright (C) 2011-2014 John MacFarlane
+ Copyright : Copyright (C) 2011-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Error
+import Control.Applicative
+
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
-- an inline list, or an inline. Thus, for example,
@@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead)
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readNative s =
- case safeRead s of
- Just d -> d
- Nothing -> Pandoc nullMeta $ readBlocks s
+ -> Either PandocError Pandoc
+readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s)
-readBlocks :: String -> [Block]
-readBlocks s =
- case safeRead s of
- Just d -> d
- Nothing -> [readBlock s]
+readBlocks :: String -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
-readBlock :: String -> Block
-readBlock s =
- case safeRead s of
- Just d -> d
- Nothing -> Plain $ readInlines s
+readBlock :: String -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
-readInlines :: String -> [Inline]
-readInlines s =
- case safeRead s of
- Just d -> d
- Nothing -> [readInline s]
+readInlines :: String -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
-readInline :: String -> Inline
-readInline s =
- case safeRead s of
- Just d -> d
- Nothing -> error "Cannot parse document"
+readInline :: String -> Either PandocError Inline
+readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 35d01e877..19ddba36b 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Data.Char (toUpper)
import Text.Pandoc.Options
@@ -11,8 +12,11 @@ import Data.Generics
import Data.Monoid
import Control.Monad.State
import Control.Applicative ((<$>), (<$))
+import Data.Default
+import Text.Pandoc.Compat.Except
+import Text.Pandoc.Error
-type OPML = State OPMLState
+type OPML = ExceptT PandocError (State OPMLState)
data OPMLState = OPMLState{
opmlSectionLevel :: Int
@@ -21,17 +25,19 @@ data OPMLState = OPMLState{
, opmlDocDate :: Inlines
} deriving Show
-readOPML :: ReaderOptions -> String -> Pandoc
+instance Default OPMLState where
+ def = OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+
+readOPML :: ReaderOptions -> String -> Either PandocError Pandoc
readOPML _ inp = setTitle (opmlDocTitle st')
- $ setAuthors (opmlDocAuthors st')
- $ setDate (opmlDocDate st')
- $ doc $ mconcat bs
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
- OPMLState{ opmlSectionLevel = 0
- , opmlDocTitle = mempty
- , opmlDocAuthors = []
- , opmlDocDate = mempty
- }
+ . setAuthors (opmlDocAuthors st')
+ . setDate (opmlDocDate st')
+ . doc . mconcat <$> bs
+ where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp)
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
@@ -58,14 +64,16 @@ attrValue attr elt =
Just z -> z
Nothing -> ""
-asHtml :: String -> Inlines
-asHtml s = case readHtml def s of
- Pandoc _ [Plain ils] -> fromList ils
- _ -> mempty
+exceptT :: Either PandocError a -> OPML a
+exceptT = either throwError return
+
+asHtml :: String -> OPML Inlines
+asHtml s = (\(Pandoc _ bs) -> case bs of
+ [Plain ils] -> fromList ils
+ _ -> mempty) <$> exceptT (readHtml def s)
-asMarkdown :: String -> Blocks
-asMarkdown s = fromList bs
- where Pandoc _ bs = readMarkdown def s
+asMarkdown :: String -> OPML Blocks
+asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s)
getBlocks :: Element -> OPML Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -82,8 +90,8 @@ parseBlock (Elem e) =
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
- where sect n = do let headerText = asHtml $ attrValue "text" e
- let noteBlocks = asMarkdown $ attrValue "_note" e
+ where sect n = do headerText <- asHtml $ attrValue "text" e
+ noteBlocks <- asMarkdown $ attrValue "_note" e
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
new file mode 100644
index 000000000..1c8ec51bc
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -0,0 +1,86 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Entry point to the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt ( readOdt ) where
+
+import Codec.Archive.Zip
+import qualified Text.XML.Light as XML
+
+import qualified Data.ByteString.Lazy as B
+import Data.Monoid ( mempty )
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.MediaBag
+import qualified Text.Pandoc.UTF8 as UTF8
+
+import Text.Pandoc.Readers.Odt.ContentReader
+import Text.Pandoc.Readers.Odt.StyleReader
+
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+
+--
+readOdt :: ReaderOptions
+ -> B.ByteString
+ -> Either PandocError (Pandoc, MediaBag)
+readOdt _ bytes = case bytesToOdt bytes of
+ Right pandoc -> Right (pandoc , mempty)
+ Left err -> Left err
+
+--
+bytesToOdt :: B.ByteString -> Either PandocError Pandoc
+bytesToOdt bytes = archiveToOdt $ toArchive bytes
+
+--
+archiveToOdt :: Archive -> Either PandocError Pandoc
+archiveToOdt archive
+ | Just contentEntry <- findEntryByPath "content.xml" archive
+ , Just stylesEntry <- findEntryByPath "styles.xml" archive
+ , Just contentElem <- entryToXmlElem contentEntry
+ , Just stylesElem <- entryToXmlElem stylesEntry
+ , Right styles <- chooseMax (readStylesAt stylesElem )
+ (readStylesAt contentElem)
+ , startState <- readerState styles
+ , Right pandoc <- runConverter' read_body
+ startState
+ contentElem
+ = Right pandoc
+
+ | otherwise
+ -- Not very detailed, but I don't think more information would be helpful
+ = Left $ ParseFailure "Couldn't parse odt file."
+
+--
+entryToXmlElem :: Entry -> Maybe XML.Element
+entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
new file mode 100644
index 000000000..310ca028e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -0,0 +1,253 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Arrows.State
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+An arrow that transports a state. It is in essence a more powerful version of
+the standard state monad. As it is such a simple extension, there are
+other version out there that do exactly the same.
+The implementation is duplicated, though, to add some useful features.
+Most of these might be implemented without access to innards, but it's much
+faster and easier to implement this way.
+-}
+
+module Text.Pandoc.Readers.Odt.Arrows.State where
+
+import Prelude hiding ( foldr, foldl )
+
+import qualified Control.Category as Cat
+import Control.Arrow
+import Control.Monad
+
+import Data.Monoid
+import Data.Foldable
+
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+
+
+newtype ArrowState state a b = ArrowState
+ { runArrowState :: (state, a) -> (state, b) }
+
+-- | Constructor
+withState :: (state -> a -> (state, b)) -> ArrowState state a b
+withState = ArrowState . uncurry
+
+-- | Constructor
+withState' :: ((state, a) -> (state, b)) -> ArrowState state a b
+withState' = ArrowState
+
+-- | Constructor
+modifyState :: (state -> state ) -> ArrowState state a a
+modifyState = ArrowState . first
+
+-- | Constructor
+ignoringState :: ( a -> b ) -> ArrowState state a b
+ignoringState = ArrowState . second
+
+-- | Constructor
+fromState :: (state -> (state, b)) -> ArrowState state a b
+fromState = ArrowState . (.fst)
+
+-- | Constructor
+extractFromState :: (state -> b ) -> ArrowState state x b
+extractFromState f = ArrowState $ \(state,_) -> (state, f state)
+
+-- | Constructor
+withUnchangedState :: (state -> a -> b ) -> ArrowState state a b
+withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a)
+
+-- | Constructor
+tryModifyState :: (state -> Either f state)
+ -> ArrowState state a (Either f a)
+tryModifyState f = ArrowState $ \(state,a)
+ -> (state,).Left ||| (,Right a) $ f state
+
+instance Cat.Category (ArrowState s) where
+ id = ArrowState id
+ arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1)
+
+instance Arrow (ArrowState state) where
+ arr = ignoringState
+ first a = ArrowState $ \(s,(aF,aS))
+ -> second (,aS) $ runArrowState a (s,aF)
+ second a = ArrowState $ \(s,(aF,aS))
+ -> second (aF,) $ runArrowState a (s,aS)
+
+instance ArrowChoice (ArrowState state) where
+ left a = ArrowState $ \(s,e) -> case e of
+ Left l -> second Left $ runArrowState a (s,l)
+ Right r -> (s, Right r)
+ right a = ArrowState $ \(s,e) -> case e of
+ Left l -> (s, Left l)
+ Right r -> second Right $ runArrowState a (s,r)
+
+instance ArrowLoop (ArrowState state) where
+ loop a = ArrowState $ \(s, x)
+ -> let (s', (x', _d)) = runArrowState a (s, (x, _d))
+ in (s', x')
+
+instance ArrowApply (ArrowState state) where
+ app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b)
+
+
+-- | Embedding of a state arrow in a state arrow with a different state type.
+switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y
+switchState there back a = ArrowState $ first there
+ >>> runArrowState a
+ >>> first back
+
+-- | Lift a state arrow to modify the state of an arrow
+-- with a different state type.
+liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x
+liftToState unlift a = modifyState $ unlift &&& id
+ >>> runArrowState a
+ >>> snd
+
+-- | Switches the type of the state temporarily.
+-- Drops the intermediate result state, behaving like the identity arrow,
+-- save for side effects in the state.
+withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x
+withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst
+
+-- | Switches the type of the state temporarily.
+-- Returns the resulting sub-state.
+withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s'
+withSubState' unlift a = ArrowState $ runArrowState unlift
+ >>> switch
+ >>> runArrowState a
+ >>> switch
+ where switch (x,y) = (y,x)
+
+-- | Switches the type of the state temporarily.
+-- Drops the intermediate result state, behaving like a fallible
+-- identity arrow, save for side effects in the state.
+withSubStateF :: ArrowState s x (Either f s')
+ -> ArrowState s' s (Either f s )
+ -> ArrowState s x (Either f x )
+withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a)
+ >>^ spreadChoice
+ >>^ fmap fst
+
+-- | Switches the type of the state temporarily.
+-- Returns the resulting sub-state.
+withSubStateF' :: ArrowState s x (Either f s')
+ -> ArrowState s' s (Either f s )
+ -> ArrowState s x (Either f s')
+withSubStateF' unlift a = ArrowState go
+ where go p@(s,_) = tryRunning unlift
+ ( tryRunning a (second Right) )
+ p
+ where tryRunning a' b v = case runArrowState a' v of
+ (_ , Left f) -> (s, Left f)
+ (x , Right y) -> b (y,x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results
+-- in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
+foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f
+ where a' x (s',m) = second (m <>) $ runArrowState a (s',x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results
+-- in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
+foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f
+ where a' (s',m) x = second (m <>) $ runArrowState a (s',x)
+
+-- | Fold a fallible state arrow through something 'Foldable'. Collect the
+-- results in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+-- If the iteration fails, the state will be reset to the initial one.
+foldS' :: (Foldable f, Monoid m)
+ => ArrowState s x (Either e m)
+ -> ArrowState s (f x) (Either e m)
+foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f
+ where a' s x (s',Right m) = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'', Right (m <> m'))
+ (_ ,Left e ) -> (s , Left e)
+ a' _ _ e = e
+
+-- | Fold a fallible state arrow through something 'Foldable'. Collect the
+-- results in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+-- If the iteration fails, the state will be reset to the initial one.
+foldSL' :: (Foldable f, Monoid m)
+ => ArrowState s x (Either e m)
+ -> ArrowState s (f x) (Either e m)
+foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f
+ where a' s (s',Right m) x = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'', Right (m <> m'))
+ (_ ,Left e ) -> (s , Left e)
+ a' _ e _ = e
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results in a
+-- 'MonadPlus'.
+iterateS :: (Foldable f, MonadPlus m)
+ => ArrowState s x y
+ -> ArrowState s (f x) (m y)
+iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
+ where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results in a
+-- 'MonadPlus'.
+iterateSL :: (Foldable f, MonadPlus m)
+ => ArrowState s x y
+ -> ArrowState s (f x) (m y)
+iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+ where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x)
+
+
+-- | Fold a fallible state arrow through something 'Foldable'.
+-- Collect the results in a 'MonadPlus'.
+-- If the iteration fails, the state will be reset to the initial one.
+iterateS' :: (Foldable f, MonadPlus m)
+ => ArrowState s x (Either e y )
+ -> ArrowState s (f x) (Either e (m y))
+iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f
+ where a' s x (s',Right m) = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'',Right $ mplus m $ return m')
+ (_ ,Left e ) -> (s ,Left e )
+ a' _ _ e = e
+
+-- | Fold a fallible state arrow through something 'Foldable'.
+-- Collect the results in a 'MonadPlus'.
+-- If the iteration fails, the state will be reset to the initial one.
+iterateSL' :: (Foldable f, MonadPlus m)
+ => ArrowState s x (Either e y )
+ -> ArrowState s (f x) (Either e (m y))
+iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f
+ where a' s (s',Right m) x = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'',Right $ mplus m $ return m')
+ (_ ,Left e ) -> (s ,Left e )
+ a' _ e _ = e
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
new file mode 100644
index 000000000..9710973b3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -0,0 +1,497 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Arrows.Utils
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Utility functions for Arrows (Kleisli monads).
+
+Some general notes on notation:
+
+* "^" is meant to stand for a pure function that is lifted into an arrow
+based on its usage for that purpose in "Control.Arrow".
+* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function
+with an equivalent return value.
+* "_" stands for the dropping of a value.
+-}
+
+-- We export everything
+module Text.Pandoc.Readers.Odt.Arrows.Utils where
+
+import Control.Arrow
+import Control.Monad ( join, MonadPlus(..) )
+
+import Data.Monoid
+import qualified Data.Foldable as F
+
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
+
+
+and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c')
+and2 = (&&&)
+
+and3 :: (Arrow a)
+ => a b c0->a b c1->a b c2
+ -> a b (c0,c1,c2 )
+and4 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3
+ -> a b (c0,c1,c2,c3 )
+and5 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4
+ -> a b (c0,c1,c2,c3,c4 )
+and6 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5
+ -> a b (c0,c1,c2,c3,c4,c5 )
+and7 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6
+ -> a b (c0,c1,c2,c3,c4,c5,c6 )
+and8 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7
+ -> a b (c0,c1,c2,c3,c4,c5,c6,c7)
+
+and3 a b c = (and2 a b ) &&& c
+ >>^ \((z,y ) , x) -> (z,y,x )
+and4 a b c d = (and3 a b c ) &&& d
+ >>^ \((z,y,x ) , w) -> (z,y,x,w )
+and5 a b c d e = (and4 a b c d ) &&& e
+ >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v )
+and6 a b c d e f = (and5 a b c d e ) &&& f
+ >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u )
+and7 a b c d e f g = (and6 a b c d e f ) &&& g
+ >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t )
+and8 a b c d e f g h = (and7 a b c d e f g) &&& h
+ >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s)
+
+liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z
+liftA2 f a b = a &&& b >>^ uncurry f
+
+liftA3 :: (Arrow a) => (z->y->x -> r)
+ -> a b z->a b y->a b x
+ -> a b r
+liftA4 :: (Arrow a) => (z->y->x->w -> r)
+ -> a b z->a b y->a b x->a b w
+ -> a b r
+liftA5 :: (Arrow a) => (z->y->x->w->v -> r)
+ -> a b z->a b y->a b x->a b w->a b v
+ -> a b r
+liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u
+ -> a b r
+liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u->a b t
+ -> a b r
+liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s
+ -> a b r
+
+liftA3 fun a b c = and3 a b c >>^ uncurry3 fun
+liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun
+liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun
+liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun
+liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun
+liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun
+
+liftA :: (Arrow a) => (y -> z) -> a b y -> a b z
+liftA fun a = a >>^ fun
+
+
+-- | Duplicate a value to subsequently feed it into different arrows.
+-- Can almost always be replaced with '(&&&)', 'keepingTheValue',
+-- or even '(|||)'.
+-- Aequivalent to
+-- > returnA &&& returnA
+duplicate :: (Arrow a) => a b (b,b)
+duplicate = arr $ join (,)
+
+-- | Lifts the combination of two values into an arrow.
+joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z
+joinOn = arr.uncurry
+
+-- | Applies a function to the uncurried result-pair of an arrow-application.
+-- (The §-symbol was chosen to evoke an association with pairs through the
+-- shared first character)
+(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
+a >>§ f = a >>^ uncurry f
+
+-- | '(>>§)' with its arguments flipped
+(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
+(§<<) = flip (>>§)
+
+-- | Precomposition with an uncurried function
+(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
+f §>> a = uncurry f ^>> a
+
+-- | Precomposition with an uncurried function (right to left variant)
+(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
+(<<§) = flip (§>>)
+
+infixr 2 >>§, §<<, §>>, <<§
+
+
+-- | Duplicate a value and apply an arrow to the second instance.
+-- Aequivalent to
+-- > \a -> duplicate >>> second a
+-- or
+-- > \a -> returnA &&& a
+keepingTheValue :: (Arrow a) => a b c -> a b (b,c)
+keepingTheValue a = returnA &&& a
+
+-- | Duplicate a value and apply an arrow to the first instance.
+-- Aequivalent to
+-- > \a -> duplicate >>> first a
+-- or
+-- > \a -> a &&& returnA
+keepingTheValue' :: (Arrow a) => a b c -> a b (c,b)
+keepingTheValue' a = a &&& returnA
+
+-- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'.
+-- Actually, it's the more complex '(>=>)', because 'bind' alone does not
+-- combine as nicely in arrow form.
+-- The current implementation is not the most efficient one, because it can
+-- not return directly if a 'Nothing' is encountered. That in turn follows
+-- from the type system, as 'Nothing' has an "invisible" type parameter that
+-- can not be dropped early.
+--
+-- Also, there probably is a way to generalize this to other monads
+-- or applicatives, but I'm leaving that as an exercise to the reader.
+-- I have a feeling there is a new Arrow-typeclass to be found that is less
+-- restrictive than 'ArrowApply'. If it is already out there,
+-- I have not seen it yet. ('ArrowPlus' for example is not general enough.)
+(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c)
+a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join
+
+infixr 2 >>>=
+
+-- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required.
+-- (But still different from a true bind)
+(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b)
+(>++<) = liftA2 mplus
+
+-- | Left-compose with a pure function
+leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r)
+leftLift = left.arr
+
+-- | Right-compose with a pure function
+rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r')
+rightLift = right.arr
+
+
+( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c')
+( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c')
+( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c')
+
+l ^+++ r = leftLift l >>> right r
+l +++^ r = left l >>> rightLift r
+l ^+++^ r = leftLift l >>> rightLift r
+
+infixr 2 ^+++, +++^, ^+++^
+
+( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d
+( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d
+( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d
+
+l ^||| r = arr l ||| r
+l |||^ r = l ||| arr r
+l ^|||^ r = arr l ||| arr r
+
+infixr 2 ^||| , |||^, ^|||^
+
+( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c')
+( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c')
+( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c')
+
+l ^&&& r = arr l &&& r
+l &&&^ r = l &&& arr r
+l ^&&&^ r = arr l &&& arr r
+
+infixr 3 ^&&&, &&&^, ^&&&^
+
+( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c')
+( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c')
+( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c')
+
+l ^*** r = arr l *** r
+l ***^ r = l *** arr r
+l ^***^ r = arr l *** arr r
+
+infixr 3 ^***, ***^, ^***^
+
+-- | A version of
+--
+-- >>> \p -> arr (\x -> if p x the Right x else Left x)
+--
+-- but with p being an arrow
+choose :: (ArrowChoice a) => a b Bool -> a b (Either b b)
+choose checkValue = keepingTheValue checkValue >>^ select
+ where select (x,True ) = Right x
+ select (x,False ) = Left x
+
+-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@.
+choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r)
+choiceToMaybe = arr eitherToMaybe
+
+-- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@.
+maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b)
+maybeToChoice = arr maybeToEither
+
+-- | Lifts a constant value into an arrow
+returnV :: (Arrow a) => c -> a x c
+returnV = arr.const
+
+-- | 'returnA' dropping everything
+returnA_ :: (Arrow a) => a _b ()
+returnA_ = returnV ()
+
+-- | Wrapper for an arrow that can be evaluated im parallel. All
+-- Arrows can be evaluated in parallel, as long as they return a
+-- monoid.
+newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c }
+ deriving (Eq, Ord, Show)
+
+instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where
+ mempty = CoEval $ returnV mempty
+ (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend
+
+-- | Evaluates a collection of arrows in a parallel fashion.
+--
+-- This is in essence a fold of '(&&&)' over the collection,
+-- so the actual execution order and parallelity depends on the
+-- implementation of '(&&&)' in the arrow in question.
+-- The default implementation of '(&&&)' for example keeps the
+-- order as given in the collection.
+--
+-- This function can be seen as a generalization of
+-- 'Control.Applicative.sequenceA' to arrows or as an alternative to
+-- a fold with 'Control.Applicative.WrappedArrow', which
+-- substitutes the monoid with function application.
+--
+coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m
+coEval = evalParallelArrow . (F.foldMap CoEval)
+
+-- | Defines Left as failure, Right as success
+type FallibleArrow a input failure success = a input (Either failure success)
+
+type ReFallibleArrow a failure success success'
+ = FallibleArrow a (Either failure success) failure success'
+
+-- | Wrapper for fallible arrows. Fallible arrows are all arrows that return
+-- an Either value where left is a faliure and right is a success value.
+newtype AlternativeArrow a input failure success
+ = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success }
+
+
+instance (ArrowChoice a, Monoid failure)
+ => Monoid (AlternativeArrow a input failure success) where
+ mempty = TryArrow $ returnV $ Left mempty
+ (TryArrow a) `mappend` (TryArrow b)
+ = TryArrow $ a &&& b
+ >>^ \(a',~b')
+ -> ( (\a'' -> left (mappend a'') b') ||| Right )
+ a'
+
+-- | Evaluates a collection of fallible arrows, trying each one in succession.
+-- Left values are interpreted as failures, right values as successes.
+--
+-- The evaluation is stopped once an arrow succeeds.
+-- Up to that point, all failures are collected in the failure-monoid.
+-- Note that '()' is a monoid, and thus can serve as a failure-collector if
+-- you are uninterested in the exact failures.
+--
+-- This is in essence a fold of '(&&&)' over the collection, enhanced with a
+-- little bit of repackaging, so the actual execution order depends on the
+-- implementation of '(&&&)' in the arrow in question.
+-- The default implementation of '(&&&)' for example keeps the
+-- order as given in the collection.
+--
+tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure)
+ => f (FallibleArrow a b failure success)
+ -> FallibleArrow a b failure success
+tryArrows = evalAlternativeArrow . (F.foldMap TryArrow)
+
+--
+liftSuccess :: (ArrowChoice a)
+ => (success -> success')
+ -> ReFallibleArrow a failure success success'
+liftSuccess = rightLift
+
+--
+liftAsSuccess :: (ArrowChoice a)
+ => a x success
+ -> FallibleArrow a x failure success
+liftAsSuccess a = a >>^ Right
+
+--
+asFallibleArrow :: (ArrowChoice a)
+ => a x success
+ -> FallibleArrow a x failure success
+asFallibleArrow a = a >>^ Right
+
+-- | Raises an error into a 'ReFallibleArrow' if the arrow is already in
+-- "error mode"
+liftError :: (ArrowChoice a, Monoid failure)
+ => failure
+ -> ReFallibleArrow a failure success success
+liftError e = leftLift (e <>)
+
+-- | Raises an error into a 'FallibleArrow', droping both the arrow input
+-- and any previously stored error value.
+_raiseA :: (ArrowChoice a)
+ => failure
+ -> FallibleArrow a x failure success
+_raiseA e = returnV (Left e)
+
+-- | Raises an empty error into a 'FallibleArrow', droping both the arrow input
+-- and any previously stored error value.
+_raiseAEmpty :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+_raiseAEmpty = _raiseA mempty
+
+-- | Raises an error into a 'ReFallibleArrow', possibly appending the new error
+-- to an existing one
+raiseA :: (ArrowChoice a, Monoid failure)
+ => failure
+ -> ReFallibleArrow a failure success success
+raiseA e = arr $ Left.(either (<> e) (const e))
+
+-- | Raises an empty error into a 'ReFallibleArrow'. If there already is an
+-- error, nothing changes.
+-- (Note that this function is only aequivalent to @raiseA mempty@ iff the
+-- failure monoid follows the monoid laws.)
+raiseAEmpty :: (ArrowChoice a, Monoid failure)
+ => ReFallibleArrow a failure success success
+raiseAEmpty = arr (fromRight (const mempty) >>> Left)
+
+
+-- | Execute the second arrow if the first succeeds
+(>>?) :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+ -> FallibleArrow a success failure success'
+ -> FallibleArrow a x failure success'
+a >>? b = a >>> Left ^||| b
+
+-- | Execute the lifted second arrow if the first succeeds
+(>>?^) :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+ -> (success -> success')
+ -> FallibleArrow a x failure success'
+a >>?^ f = a >>^ Left ^|||^ Right . f
+
+-- | Execute the lifted second arrow if the first succeeds
+(>>?^?) :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+ -> (success -> Either failure success')
+ -> FallibleArrow a x failure success'
+a >>?^? b = a >>> Left ^|||^ b
+
+-- | Execute the second arrow if the lifted first arrow succeeds
+(^>>?) :: (ArrowChoice a, Monoid failure)
+ => (x -> Either failure success)
+ -> FallibleArrow a success failure success'
+ -> FallibleArrow a x failure success'
+a ^>>? b = a ^>> Left ^||| b
+
+-- | Execute the lifted second arrow if the lifted first arrow succeeds
+(^>>?^) :: (ArrowChoice a, Monoid failure)
+ => (x -> Either failure success)
+ -> (success -> success')
+ -> FallibleArrow a x failure success'
+a ^>>?^ f = arr $ a >>> right f
+
+-- | Execute the lifted second arrow if the lifted first arrow succeeds
+(^>>?^?) :: (ArrowChoice a, Monoid failure)
+ => (x -> Either failure success)
+ -> (success -> Either failure success')
+ -> FallibleArrow a x failure success'
+a ^>>?^? f = a ^>> Left ^|||^ f
+
+-- | Execute the second, non-fallible arrow if the first arrow succeeds
+(>>?!) :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+ -> a success success'
+ -> FallibleArrow a x failure success'
+a >>?! f = a >>> right f
+
+---
+(>>?§) :: (ArrowChoice a, Monoid f)
+ => FallibleArrow a x f (b,b')
+ -> (b -> b' -> c)
+ -> FallibleArrow a x f c
+a >>?§ f = a >>?^ (uncurry f)
+
+---
+(^>>?§) :: (ArrowChoice a, Monoid f)
+ => (x -> Either f (b,b'))
+ -> (b -> b' -> c)
+ -> FallibleArrow a x f c
+a ^>>?§ f = arr a >>?^ (uncurry f)
+
+---
+(>>?§?) :: (ArrowChoice a, Monoid f)
+ => FallibleArrow a x f (b,b')
+ -> (b -> b' -> (Either f c))
+ -> FallibleArrow a x f c
+a >>?§? f = a >>?^? (uncurry f)
+
+infixr 1 >>?, >>?^, >>?^?
+infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?!
+infixr 1 >>?§, ^>>?§, >>?§?
+
+-- | Keep values that are Right, replace Left values by a constant.
+ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v
+ifFailedUse v = arr $ either (const v) id
+
+-- | '(&&)' lifted into an arrow
+(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
+(<&&>) = liftA2 (&&)
+
+-- | '(||)' lifted into an arrow
+(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
+(<||>) = liftA2 (||)
+
+-- | An equivalent of '(&&)' in a fallible arrow
+(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s
+ -> FallibleArrow a x f s'
+ -> FallibleArrow a x f (s,s')
+(>&&<) = liftA2 chooseMin
+
+-- | An equivalent of '(||)' in some forms of fallible arrows
+(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s
+ -> FallibleArrow a x f s
+ -> FallibleArrow a x f s
+(>||<) = liftA2 chooseMax
+
+-- | An arrow version of a short-circuit (<|>)
+ifFailedDo :: (ArrowChoice a)
+ => FallibleArrow a x f y
+ -> FallibleArrow a x f y
+ -> FallibleArrow a x f y
+ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right)
+ where repackage (x , Left _) = Left x
+ repackage (_ , Right y) = Right y
+
+infixr 4 <&&>, <||>, >&&<, >||<
+infixr 1 `ifFailedDo`
+
+
diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs
new file mode 100644
index 000000000..1f095bade
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Base.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Base
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Core types of the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt.Base where
+
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Namespaces
+
+type OdtConverterState s = XMLConverterState Namespace s
+
+type XMLReader s a b = FallibleXMLConverter Namespace s a b
+
+type XMLReaderSafe s a b = XMLConverter Namespace s a b
+
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
new file mode 100644
index 000000000..9bb585b8e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -0,0 +1,790 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE RecordWildCards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.ContentReader
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+The core of the odt reader that converts odt features into Pandoc types.
+-}
+
+module Text.Pandoc.Readers.Odt.ContentReader
+( readerState
+, read_body
+) where
+
+import Control.Arrow
+import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+
+import qualified Data.Map as M
+import Data.List ( find )
+import Data.Monoid
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared
+
+import Text.Pandoc.Readers.Odt.Base
+import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.StyleReader
+
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
+
+
+--------------------------------------------------------------------------------
+-- State
+--------------------------------------------------------------------------------
+
+type Anchor = String
+
+data ReaderState
+ = ReaderState { -- | A collection of styles read somewhere else.
+ -- It is only queried here, not modified.
+ styleSet :: Styles
+ -- | A stack of the styles of parent elements.
+ -- Used to look up inherited style properties.
+ , styleTrace :: [Style]
+ -- | Keeps track of the current depth in nested lists
+ , currentListLevel :: ListLevel
+ -- | Lists may provide their own style, but they don't have
+ -- to. If they do not, the style of a parent list may be used
+ -- or even a default list style from the paragraph style.
+ -- This value keeps track of the closest list style there
+ -- currently is.
+ , currentListStyle :: Maybe ListStyle
+ -- | A map from internal anchor names to "pretty" ones.
+ -- The mapping is a purely cosmetic one.
+ , bookmarkAnchors :: M.Map Anchor Anchor
+
+-- , sequences
+-- , trackedChangeIDs
+ }
+ deriving ( Show )
+
+readerState :: Styles -> ReaderState
+readerState styles = ReaderState styles [] 0 Nothing M.empty
+
+--
+pushStyle' :: Style -> ReaderState -> ReaderState
+pushStyle' style state = state { styleTrace = style : styleTrace state }
+
+--
+popStyle' :: ReaderState -> ReaderState
+popStyle' state = case styleTrace state of
+ _:trace -> state { styleTrace = trace }
+ _ -> state
+
+--
+modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState)
+modifyListLevel f state = state { currentListLevel = f (currentListLevel state) }
+
+--
+shiftListLevel :: ListLevel -> (ReaderState -> ReaderState)
+shiftListLevel diff = modifyListLevel (+ diff)
+
+--
+swapCurrentListStyle :: Maybe ListStyle -> ReaderState
+ -> (ReaderState, Maybe ListStyle)
+swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle }
+ , currentListStyle state
+ )
+
+--
+lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor
+lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors
+
+--
+putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState
+putPrettyAnchor ugly pretty state@ReaderState{..}
+ = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors }
+
+--
+usedAnchors :: ReaderState -> [Anchor]
+usedAnchors ReaderState{..} = M.elems bookmarkAnchors
+
+--------------------------------------------------------------------------------
+-- Reader type and associated tools
+--------------------------------------------------------------------------------
+
+type OdtReader a b = XMLReader ReaderState a b
+
+type OdtReaderSafe a b = XMLReaderSafe ReaderState a b
+
+-- | Extract something from the styles
+fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b
+fromStyles f = keepingTheValue
+ (getExtraState >>^ styleSet)
+ >>§ f
+
+--
+getStyleByName :: OdtReader StyleName Style
+getStyleByName = fromStyles lookupStyle >>^ maybeToChoice
+
+--
+findStyleFamily :: OdtReader Style StyleFamily
+findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice
+
+--
+lookupListStyle :: OdtReader StyleName ListStyle
+lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice
+
+--
+switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle)
+switchCurrentListStyle = keepingTheValue getExtraState
+ >>§ swapCurrentListStyle
+ >>> first setExtraState
+ >>^ snd
+
+--
+pushStyle :: OdtReaderSafe Style Style
+pushStyle = keepingTheValue (
+ ( keepingTheValue getExtraState
+ >>§ pushStyle'
+ )
+ >>> setExtraState
+ )
+ >>^ fst
+
+--
+popStyle :: OdtReaderSafe x x
+popStyle = keepingTheValue (
+ getExtraState
+ >>> arr popStyle'
+ >>> setExtraState
+ )
+ >>^ fst
+
+--
+getCurrentListLevel :: OdtReaderSafe _x ListLevel
+getCurrentListLevel = getExtraState >>^ currentListLevel
+
+
+type AnchorPrefix = String
+
+-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
+-- unique identifier but without assuming that the id should be for a header.
+-- Second argument is a list of already used identifiers.
+uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
+uniqueIdentFrom baseIdent usedIdents =
+ let numIdent n = baseIdent ++ "-" ++ show n
+ in if baseIdent `elem` usedIdents
+ then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
+ Just x -> numIdent x
+ Nothing -> baseIdent -- if we have more than 60,000, allow repeats
+ else baseIdent
+
+-- | First argument: basis for a new "pretty" anchor if none exists yet
+-- Second argument: a key ("ugly" anchor)
+-- Returns: saved "pretty" anchor or created new one
+getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor
+getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
+ state <- getExtraState -< ()
+ case lookupPrettyAnchor uglyAnchor state of
+ Just prettyAnchor -> returnA -< prettyAnchor
+ Nothing -> do
+ let newPretty = uniqueIdentFrom baseIdent (usedAnchors state)
+ modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty
+
+-- | Input: basis for a new header anchor
+-- Ouput: saved new anchor
+getHeaderAnchor :: OdtReaderSafe Inlines Anchor
+getHeaderAnchor = proc title -> do
+ state <- getExtraState -< ()
+ let anchor = uniqueIdent (toList title) (usedAnchors state)
+ modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
+
+
+--------------------------------------------------------------------------------
+-- Working with styles
+--------------------------------------------------------------------------------
+
+--
+readStyleByName :: OdtReader _x Style
+readStyleByName = findAttr NsText "style-name" >>? getStyleByName
+
+--
+isStyleToTrace :: OdtReader Style Bool
+isStyleToTrace = findStyleFamily >>?^ (==FaText)
+
+--
+withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines
+withNewStyle a = proc x -> do
+ fStyle <- readStyleByName -< ()
+ case fStyle of
+ Right style -> do
+ mFamily <- arr styleFamily -< style
+ fTextProps <- arr ( maybeToChoice
+ . textProperties
+ . styleProperties
+ ) -< style
+ case fTextProps of
+ Right textProps -> do
+ state <- getExtraState -< ()
+ let triple = (state, textProps, mFamily)
+ modifier <- arr modifierFromStyleDiff -< triple
+ fShouldTrace <- isStyleToTrace -< style
+ case fShouldTrace of
+ Right shouldTrace -> do
+ if shouldTrace
+ then do
+ pushStyle -< style
+ inlines <- a -< x
+ popStyle -< ()
+ arr modifier -<< inlines
+ else
+ -- In case anything goes wrong
+ a -< x
+ Left _ -> a -< x
+ Left _ -> a -< x
+ Left _ -> a -< x
+
+
+type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
+type InlineModifier = Inlines -> Inlines
+
+-- | Given data about the local style changes, calculates how to modify
+-- an instance of 'Inlines'
+modifierFromStyleDiff :: PropertyTriple -> InlineModifier
+modifierFromStyleDiff propertyTriple =
+ composition $
+ (getVPosModifier propertyTriple)
+ : map (first ($ propertyTriple) >>> ifThen_else ignore)
+ [ (hasEmphChanged , emph )
+ , (hasChanged isStrong , strong )
+ , (hasChanged strikethrough , strikeout )
+ ]
+ where
+ ifThen_else else' (if',then') = if if' then then' else else'
+
+ ignore = id :: InlineModifier
+
+ getVPosModifier :: PropertyTriple -> InlineModifier
+ getVPosModifier triple@(_,textProps,_) =
+ let getVPos = Just . verticalPosition
+ in case lookupPreviousValueM getVPos triple of
+ Nothing -> ignore
+ Just oldVPos -> getVPosModifier' (oldVPos,verticalPosition textProps)
+
+ getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore
+ getVPosModifier' ( _ , VPosSub ) = subscript
+ getVPosModifier' ( _ , VPosSuper ) = superscript
+ getVPosModifier' ( _ , _ ) = ignore
+
+ hasEmphChanged :: PropertyTriple -> Bool
+ hasEmphChanged = swing any [ hasChanged isEmphasised
+ , hasChangedM pitch
+ , hasChanged underline
+ ]
+
+ hasChanged property triple@(_, property -> newProperty, _) =
+ maybe True (/=newProperty) (lookupPreviousValue property triple)
+
+ hasChangedM property triple@(_, textProps,_) =
+ fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple
+
+ lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties)
+
+ lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties)
+
+ lookupPreviousStyleValue f (ReaderState{..},_,mFamily)
+ = ( findBy f $ extendedStylePropertyChain styleTrace styleSet )
+ <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily )
+
+
+type ParaModifier = Blocks -> Blocks
+
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5
+
+-- | Returns either 'id' or 'blockQuote' depending on the current indentation
+getParaModifier :: Style -> ParaModifier
+getParaModifier Style{..} | Just props <- paraProperties styleProperties
+ , isBlockQuote (indentation props)
+ (margin_left props)
+ = blockQuote
+ | otherwise
+ = id
+ where
+ isBlockQuote mIndent mMargin
+ | LengthValueMM indent <- mIndent
+ , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+ = True
+ | LengthValueMM margin <- mMargin
+ , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+ = True
+ | LengthValueMM indent <- mIndent
+ , LengthValueMM margin <- mMargin
+ = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+
+ | PercentValue indent <- mIndent
+ , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+ = True
+ | PercentValue margin <- mMargin
+ , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+ = True
+ | PercentValue indent <- mIndent
+ , PercentValue margin <- mMargin
+ = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+
+ | otherwise
+ = False
+
+--
+constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks
+constructPara reader = proc blocks -> do
+ fStyle <- readStyleByName -< blocks
+ case fStyle of
+ Left _ -> reader -< blocks
+ Right style -> do
+ let modifier = getParaModifier style
+ blocks' <- reader -< blocks
+ arr modifier -<< blocks'
+
+
+
+type ListConstructor = [Blocks] -> Blocks
+
+getListConstructor :: ListLevelStyle -> ListConstructor
+getListConstructor ListLevelStyle{..} =
+ case listLevelType of
+ LltBullet -> bulletList
+ LltImage -> bulletList
+ LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat
+ listNumberDelim = toListNumberDelim listItemPrefix
+ listItemSuffix
+ in orderedListWith (1, listNumberStyle, listNumberDelim)
+ where
+ toListNumberStyle LinfNone = DefaultStyle
+ toListNumberStyle LinfNumber = Decimal
+ toListNumberStyle LinfRomanLC = LowerRoman
+ toListNumberStyle LinfRomanUC = UpperRoman
+ toListNumberStyle LinfAlphaLC = LowerAlpha
+ toListNumberStyle LinfAlphaUC = UpperAlpha
+ toListNumberStyle (LinfString _) = Example
+
+ toListNumberDelim Nothing (Just ".") = Period
+ toListNumberDelim (Just "" ) (Just ".") = Period
+ toListNumberDelim Nothing (Just ")") = OneParen
+ toListNumberDelim (Just "" ) (Just ")") = OneParen
+ toListNumberDelim (Just "(") (Just ")") = TwoParens
+ toListNumberDelim _ _ = DefaultDelim
+
+
+-- | Determines which style to use for a list, which level to use of that
+-- style, and which type of list to create as a result of this information.
+-- Then prepares the state for eventual child lists and constructs the list from
+-- the results.
+-- Two main cases are handled: The list may provide its own style or it may
+-- rely on a parent list's style. I the former case the current style in the
+-- state must be switched before and after the call to the child converter
+-- while in the latter the child converter can be called directly.
+-- If anything goes wrong, a default ordered-list-constructor is used.
+constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks
+constructList reader = proc x -> do
+ modifyExtraState (shiftListLevel 1) -< ()
+ listLevel <- getCurrentListLevel -< ()
+ fStyleName <- findAttr NsText "style-name" -< ()
+ case fStyleName of
+ Right styleName -> do
+ fListStyle <- lookupListStyle -< styleName
+ case fListStyle of
+ Right listStyle -> do
+ fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
+ case fLLS of
+ Just listLevelStyle -> do
+ oldListStyle <- switchCurrentListStyle -< Just listStyle
+ blocks <- constructListWith listLevelStyle -<< x
+ switchCurrentListStyle -< oldListStyle
+ returnA -< blocks
+ Nothing -> constructOrderedList -< x
+ Left _ -> constructOrderedList -< x
+ Left _ -> do
+ state <- getExtraState -< ()
+ mListStyle <- arr currentListStyle -< state
+ case mListStyle of
+ Just listStyle -> do
+ fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
+ case fLLS of
+ Just listLevelStyle -> constructListWith listLevelStyle -<< x
+ Nothing -> constructOrderedList -< x
+ Nothing -> constructOrderedList -< x
+ where
+ constructOrderedList =
+ reader
+ >>> modifyExtraState (shiftListLevel (-1))
+ >>^ orderedList
+ constructListWith listLevelStyle =
+ reader
+ >>> getListConstructor listLevelStyle
+ ^>> modifyExtraState (shiftListLevel (-1))
+
+--------------------------------------------------------------------------------
+-- Readers
+--------------------------------------------------------------------------------
+
+type ElementMatcher result = (Namespace, ElementName, OdtReader result result)
+
+type InlineMatcher = ElementMatcher Inlines
+
+type BlockMatcher = ElementMatcher Blocks
+
+
+--
+matchingElement :: (Monoid e)
+ => Namespace -> ElementName
+ -> OdtReaderSafe e e
+ -> ElementMatcher e
+matchingElement ns name reader = (ns, name, asResultAccumulator reader)
+ where
+ asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m)
+ asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>)
+
+--
+matchChildContent' :: (Monoid result)
+ => [ElementMatcher result]
+ -> OdtReaderSafe _x result
+matchChildContent' ls = returnV mempty >>> matchContent' ls
+
+--
+matchChildContent :: (Monoid result)
+ => [ElementMatcher result]
+ -> OdtReaderSafe (result, XML.Content) result
+ -> OdtReaderSafe _x result
+matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
+
+
+--------------------------------------------
+-- Matchers
+--------------------------------------------
+
+----------------------
+-- Basics
+----------------------
+
+--
+-- | Open Document allows several consecutive spaces if they are marked up
+read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines
+read_plain_text = fst ^&&& read_plain_text' >>§ recover
+ where
+ -- fallible version
+ read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines
+ read_plain_text' = ( second ( arr extractText )
+ >>^ spreadChoice >>?! second text
+ )
+ >>?§ (<>)
+ --
+ extractText :: XML.Content -> Fallible String
+ extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+ extractText _ = failEmpty
+
+
+-- specifically. I honor that, although the current implementation of '(<>)'
+-- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein.
+-- The rational is to be prepared for future modifications.
+read_spaces :: InlineMatcher
+read_spaces = matchingElement NsText "s" (
+ readAttrWithDefault NsText "c" 1 -- how many spaces?
+ >>^ fromList.(`replicate` Space)
+ )
+--
+read_line_break :: InlineMatcher
+read_line_break = matchingElement NsText "line-break"
+ $ returnV linebreak
+
+--
+read_span :: InlineMatcher
+read_span = matchingElement NsText "span"
+ $ withNewStyle
+ $ matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text
+
+--
+read_paragraph :: BlockMatcher
+read_paragraph = matchingElement NsText "p"
+ $ constructPara
+ $ liftA para
+ $ withNewStyle
+ $ matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text
+
+
+----------------------
+-- Headers
+----------------------
+
+--
+read_header :: BlockMatcher
+read_header = matchingElement NsText "h"
+ $ proc blocks -> do
+ level <- ( readAttrWithDefault NsText "outline-level" 1
+ ) -< blocks
+ children <- ( matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text
+ ) -< blocks
+ anchor <- getHeaderAnchor -< children
+ let idAttr = (anchor, [], []) -- no classes, no key-value pairs
+ arr (uncurry3 headerWith) -< (idAttr, level, children)
+
+----------------------
+-- Lists
+----------------------
+
+--
+read_list :: BlockMatcher
+read_list = matchingElement NsText "list"
+-- $ withIncreasedListLevel
+ $ constructList
+-- $ liftA bulletList
+ $ matchChildContent' [ read_list_item
+ ]
+--
+read_list_item :: ElementMatcher [Blocks]
+read_list_item = matchingElement NsText "list-item"
+ $ liftA (compactify'.(:[]))
+ ( matchChildContent' [ read_paragraph
+ , read_header
+ , read_list
+ ]
+ )
+
+
+----------------------
+-- Links
+----------------------
+
+read_link :: InlineMatcher
+read_link = matchingElement NsText "a"
+ $ liftA3 link
+ ( findAttrWithDefault NsXLink "href" "" )
+ ( findAttrWithDefault NsOffice "title" "" )
+ ( matchChildContent [ read_span
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text )
+
+
+-------------------------
+-- Footnotes
+-------------------------
+
+read_note :: InlineMatcher
+read_note = matchingElement NsText "note"
+ $ liftA note
+ $ matchChildContent' [ read_note_body ]
+
+read_note_body :: BlockMatcher
+read_note_body = matchingElement NsText "note-body"
+ $ matchChildContent' [ read_paragraph ]
+
+-------------------------
+-- Citations
+-------------------------
+
+read_citation :: InlineMatcher
+read_citation = matchingElement NsText "bibliography-mark"
+ $ liftA2 cite
+ ( liftA2 makeCitation
+ ( findAttrWithDefault NsText "identifier" "" )
+ ( readAttrWithDefault NsText "number" 0 )
+ )
+ ( matchChildContent [] read_plain_text )
+ where
+ makeCitation :: String -> Int -> [Citation]
+ makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
+
+
+----------------------
+-- Tables
+----------------------
+
+--
+read_table :: BlockMatcher
+read_table = matchingElement NsTable "table"
+ $ liftA (simpleTable [])
+ $ matchChildContent' [ read_table_row
+ ]
+
+--
+read_table_row :: ElementMatcher [[Blocks]]
+read_table_row = matchingElement NsTable "table-row"
+ $ liftA (:[])
+ $ matchChildContent' [ read_table_cell
+ ]
+
+--
+read_table_cell :: ElementMatcher [Blocks]
+read_table_cell = matchingElement NsTable "table-cell"
+ $ liftA (compactify'.(:[]))
+ $ matchChildContent' [ read_paragraph
+ ]
+
+----------------------
+-- Internal links
+----------------------
+
+_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ = "anchor"
+
+--
+readAnchorAttr :: OdtReader _x Anchor
+readAnchorAttr = findAttr NsText "name"
+
+-- | Beware: may fail
+findAnchorName :: OdtReader AnchorPrefix Anchor
+findAnchorName = ( keepingTheValue readAnchorAttr
+ >>^ spreadChoice
+ ) >>?! getPrettyAnchor
+
+
+--
+maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix
+ -> OdtReaderSafe Inlines Inlines
+maybeAddAnchorFrom anchorReader =
+ keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem)
+ >>>
+ proc (inlines, fAnchorElem) -> do
+ case fAnchorElem of
+ Right anchorElem ->
+ arr (anchorElem <>) -<< inlines
+ Left _ -> returnA -< inlines
+ where
+ toAnchorElem :: Anchor -> Inlines
+ toAnchorElem anchorID = spanWith (anchorID, [], []) mempty
+ -- no classes, no key-value pairs
+
+--
+read_bookmark :: InlineMatcher
+read_bookmark = matchingElement NsText "bookmark"
+ $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
+
+--
+read_bookmark_start :: InlineMatcher
+read_bookmark_start = matchingElement NsText "bookmark-start"
+ $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
+
+--
+read_reference_start :: InlineMatcher
+read_reference_start = matchingElement NsText "reference-mark-start"
+ $ maybeAddAnchorFrom readAnchorAttr
+
+-- | Beware: may fail
+findAnchorRef :: OdtReader _x Anchor
+findAnchorRef = ( findAttr NsText "ref-name"
+ >>?^ (_ANCHOR_PREFIX_,)
+ ) >>?! getPrettyAnchor
+
+
+--
+maybeInAnchorRef :: OdtReaderSafe Inlines Inlines
+maybeInAnchorRef = proc inlines -> do
+ fRef <- findAnchorRef -< ()
+ case fRef of
+ Right anchor ->
+ arr (toAnchorRef anchor) -<< inlines
+ Left _ -> returnA -< inlines
+ where
+ toAnchorRef :: Anchor -> Inlines -> Inlines
+ toAnchorRef anchor = link ('#':anchor) "" -- no title
+
+--
+read_bookmark_ref :: InlineMatcher
+read_bookmark_ref = matchingElement NsText "bookmark-ref"
+ $ maybeInAnchorRef
+ <<< matchChildContent [] read_plain_text
+
+--
+read_reference_ref :: InlineMatcher
+read_reference_ref = matchingElement NsText "reference-ref"
+ $ maybeInAnchorRef
+ <<< matchChildContent [] read_plain_text
+
+
+----------------------
+-- Entry point
+----------------------
+
+--read_plain_content :: OdtReaderSafe _x Inlines
+--read_plain_content = strContent >>^ text
+
+read_text :: OdtReaderSafe _x Pandoc
+read_text = matchChildContent' [ read_header
+ , read_paragraph
+ , read_list
+ , read_table
+ ]
+ >>^ doc
+
+read_body :: OdtReader _x Pandoc
+read_body = executeIn NsOffice "body"
+ $ executeIn NsOffice "text"
+ $ liftAsSuccess read_text
+
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
new file mode 100644
index 000000000..5922164c9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -0,0 +1,260 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.Fallible
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Data types and utilities representing failure. Most of it is based on the
+"Either" type in its usual configuration (left represents failure).
+
+In most cases, the failure type is implied or required to be a "Monoid".
+
+The choice of "Either" instead of a custom type makes it easier to write
+compatible instances of "ArrowChoice".
+-}
+
+-- We export everything
+module Text.Pandoc.Readers.Odt.Generic.Fallible where
+
+import Control.Applicative
+import Control.Monad
+
+import qualified Data.Foldable as F
+import Data.Monoid
+
+-- | Default for now. Will probably become a class at some point.
+type Failure = ()
+
+type Fallible a = Either Failure a
+
+
+-- | False -> Left (), True -> Right ()
+boolToEither :: Bool -> Fallible ()
+boolToEither False = Left ()
+boolToEither True = Right ()
+
+-- | False -> Left (), True -> Right ()
+boolToChoice :: Bool -> Fallible ()
+boolToChoice False = Left ()
+boolToChoice True = Right ()
+
+--
+maybeToEither :: Maybe a -> Fallible a
+maybeToEither (Just a) = Right a
+maybeToEither Nothing = Left ()
+
+--
+eitherToMaybe :: Either _l a -> Maybe a
+eitherToMaybe (Left _) = Nothing
+eitherToMaybe (Right a) = Just a
+
+-- | > untagEither === either id id
+untagEither :: Either a a -> a
+untagEither (Left a) = a
+untagEither (Right a) = a
+
+-- | > fromLeft f === either f id
+fromLeft :: (a -> b) -> Either a b -> b
+fromLeft f (Left a) = f a
+fromLeft _ (Right b) = b
+
+-- | > fromRight f === either id f
+fromRight :: (a -> b) -> Either b a -> b
+fromRight _ (Left b) = b
+fromRight f (Right a) = f a
+
+-- | > recover a === fromLeft (const a) === either (const a) id
+recover :: a -> Either _f a -> a
+recover a (Left _) = a
+recover _ (Right a) = a
+
+-- | I would love to use 'fail'. Alas, 'Monad.fail'...
+failWith :: failure -> Either failure _x
+failWith f = Left f
+
+--
+failEmpty :: (Monoid failure) => Either failure _x
+failEmpty = failWith mempty
+
+--
+succeedWith :: a -> Either _x a
+succeedWith = Right
+
+--
+collapseEither :: Either failure (Either failure x)
+ -> Either failure x
+collapseEither (Left f ) = Left f
+collapseEither (Right (Left f)) = Left f
+collapseEither (Right (Right x)) = Right x
+
+-- | If either of the values represents an error, the result is a
+-- (possibly combined) error. If both values represent a success,
+-- both are returned.
+chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b')
+chooseMin = chooseMinWith (,)
+
+-- | If either of the values represents an error, the result is a
+-- (possibly combined) error. If both values represent a success,
+-- a combination is returned.
+chooseMinWith :: (Monoid a) => (b -> b' -> c)
+ -> Either a b
+ -> Either a b'
+ -> Either a c
+chooseMinWith (><) (Right a) (Right b) = Right $ a >< b
+chooseMinWith _ (Left a) (Left b) = Left $ a <> b
+chooseMinWith _ (Left a) _ = Left a
+chooseMinWith _ _ (Left b) = Left b
+
+-- | If either of the values represents a non-error, the result is a
+-- (possibly combined) non-error. If both values represent an error, an error
+-- is returned.
+chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b
+chooseMax = chooseMaxWith (<>)
+
+-- | If either of the values represents a non-error, the result is a
+-- (possibly combined) non-error. If both values represent an error, an error
+-- is returned.
+chooseMaxWith :: (Monoid a) => (b -> b -> b)
+ -> Either a b
+ -> Either a b
+ -> Either a b
+chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b
+chooseMaxWith _ (Left a) (Left b) = Left $ a <> b
+chooseMaxWith _ (Right a) _ = Right a
+chooseMaxWith _ _ (Right b) = Right b
+
+
+-- | Class of containers that can escalate contained 'Either's.
+-- The word "Vector" is meant in the sense of a disease transmitter.
+class ChoiceVector v where
+ spreadChoice :: v (Either f a) -> Either f (v a)
+
+-- Let's do a few examples first
+
+instance ChoiceVector Maybe where
+ spreadChoice (Just (Left f)) = Left f
+ spreadChoice (Just (Right x)) = Right (Just x)
+ spreadChoice Nothing = Right Nothing
+
+instance ChoiceVector (Either l) where
+ spreadChoice (Right (Left f)) = Left f
+ spreadChoice (Right (Right x)) = Right (Right x)
+ spreadChoice (Left x ) = Right (Left x)
+
+instance ChoiceVector ((,) a) where
+ spreadChoice (_, Left f) = Left f
+ spreadChoice (x, Right y) = Right (x,y)
+ -- Wasn't there a newtype somewhere with the elements flipped?
+
+--
+-- More instances later, first some discussion.
+--
+-- I'll have to freshen up on type system details to see how (or if) to do
+-- something like
+--
+-- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where
+-- > :
+--
+-- But maybe it would be even better to use something like
+--
+-- > class ChoiceVector v v' f | v -> v' f where
+-- > spreadChoice :: v -> Either f v'
+--
+-- That way, more places in @v@ could spread the cheer, e.g.:
+--
+-- As before:
+-- -- ( a , Either f b) (a , b) f
+-- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where
+-- > spreadChoice (_, Left f) = Left f
+-- > spreadChoice (a, Right b) = Right (a,b)
+--
+-- But also:
+-- -- ( Either f a , b) (a , b) f
+-- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where
+-- > spreadChoice (Right a,b) = Right (a,b)
+-- > spreadChoice (Left f,_) = Left f
+--
+-- And maybe even:
+-- -- ( Either f a , Either f b) (a , b) f
+-- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where
+-- > spreadChoice (Right a , Right b) = Right (a,b)
+-- > spreadChoice (Left f , _ ) = Left f
+-- > spreadChoice ( _ , Left f) = Left f
+--
+-- Of course that would lead to a lot of overlapping instances...
+-- But I can't think of a different way. A selector function might help,
+-- but not even a "Data.Traversable" is powerful enough for that.
+-- But maybe someone has already solved all this with a lens library.
+--
+-- Well, it's an interesting academic question. But for practical purposes,
+-- I have more than enough right now.
+
+instance ChoiceVector ((,,) a b) where
+ spreadChoice (_,_, Left f) = Left f
+ spreadChoice (a,b, Right x) = Right (a,b,x)
+
+instance ChoiceVector ((,,,) a b c) where
+ spreadChoice (_,_,_, Left f) = Left f
+ spreadChoice (a,b,c, Right x) = Right (a,b,c,x)
+
+instance ChoiceVector ((,,,,) a b c d) where
+ spreadChoice (_,_,_,_, Left f) = Left f
+ spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x)
+
+instance ChoiceVector (Const a) where
+ spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types
+
+-- | Fails on the first error
+instance ChoiceVector [] where
+ spreadChoice = sequence -- using the monad instance of Either.
+ -- Could be generalized to "Data.Traversable" - but why play
+ -- with UndecidableInstances unless this is really needed.
+
+-- | Wrapper for a list. While the normal list instance of 'ChoiceVector'
+-- fails whenever it can, this type will never fail.
+newtype SuccessList a = SuccessList { collectNonFailing :: [a] }
+ deriving ( Eq, Ord, Show )
+
+instance ChoiceVector SuccessList where
+ spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
+ where unTagRight (Right x) = (x:)
+ unTagRight _ = id
+
+-- | Like 'catMaybes', but for 'Either'.
+collectRights :: [Either _l r] -> [r]
+collectRights = collectNonFailing . untag . spreadChoice . SuccessList
+ where untag = fromLeft (error "Unexpected Left")
+
+-- | A version of 'collectRights' generalized to other containers. The
+-- container must be both "reducible" and "buildable". Most general containers
+-- should fullfill these requirements, but there is no single typeclass
+-- (that I know of) for that.
+-- Therefore, they are split between 'Foldable' and 'MonadPlus'.
+-- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.)
+collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r
+collectRightsF = F.foldr unTagRight mzero
+ where unTagRight (Right x) = mplus $ return x
+ unTagRight _ = id
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
new file mode 100644
index 000000000..82ae3e20e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -0,0 +1,62 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.Namespaces
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A class containing a set of namespace identifiers. Used to convert between
+typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.Namespaces where
+
+import qualified Data.Map as M
+
+--
+type NameSpaceIRI = String
+
+--
+type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
+
+--
+class (Eq nsID, Ord nsID) => NameSpaceID nsID where
+
+ -- | Given a IRI, possibly update the map and return the id of the namespace.
+ -- May fail if the namespace is unknown and the application does not
+ -- allow unknown namespaces.
+ getNamespaceID :: NameSpaceIRI
+ -> NameSpaceIRIs nsID
+ -> Maybe (NameSpaceIRIs nsID, nsID)
+ -- | Given a namespace id, lookup its IRI. May be overriden for performance.
+ getIRI :: nsID
+ -> NameSpaceIRIs nsID
+ -> Maybe NameSpaceIRI
+ -- | The root element of an XML document has a namespace, too, and the
+ -- "XML.Light-parser" is eager to remove the corresponding namespace
+ -- attribute.
+ -- As a result, at least this root namespace must be provided.
+ getInitialIRImap :: NameSpaceIRIs nsID
+
+ getIRI = M.lookup
+ getInitialIRImap = M.empty
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
new file mode 100644
index 000000000..afd7d616c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
@@ -0,0 +1,48 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.SetMap
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A map of values to sets of values.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.SetMap where
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+type SetMap k v = M.Map k (S.Set v)
+
+empty :: SetMap k v
+empty = M.empty
+
+fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v
+fromList = foldr (uncurry insert) empty
+
+insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v
+insert key value setMap = M.insertWith S.union key (S.singleton value) setMap
+
+union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v
+union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
new file mode 100644
index 000000000..6c10ed61d
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -0,0 +1,171 @@
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt.Generic.Utils
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+General utility functions for the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.Utils
+( uncurry3
+, uncurry4
+, uncurry5
+, uncurry6
+, uncurry7
+, uncurry8
+, swap
+, reverseComposition
+, bool
+, tryToRead
+, Lookupable(..)
+, readLookupables
+, readLookupable
+, readPercent
+, findBy
+, swing
+, composition
+) where
+
+import Control.Category ( Category, (>>>), (<<<) )
+import qualified Control.Category as Cat ( id )
+import Control.Monad ( msum )
+
+import qualified Data.Foldable as F ( Foldable, foldr )
+import Data.Maybe
+
+
+-- | Aequivalent to
+-- > foldr (.) id
+-- where '(.)' are 'id' are the ones from "Control.Category"
+-- and 'foldr' is the one from "Data.Foldable".
+-- The noun-form was chosen to be consistend with 'sum', 'product' etc
+-- based on the discussion at
+-- <https://groups.google.com/forum/#!topic/haskell-cafe/VkOZM1zaHOI>
+-- (that I was not part of)
+composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
+composition = F.foldr (<<<) Cat.id
+
+-- | Aequivalent to
+-- > foldr (flip (.)) id
+-- where '(.)' are 'id' are the ones from "Control.Category"
+-- and 'foldr' is the one from "Data.Foldable".
+-- A reversed version of 'composition'.
+reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
+reverseComposition = F.foldr (>>>) Cat.id
+
+-- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'.
+-- Note that the first value is selected if the boolean value is 'False'.
+-- That makes 'bool' consistent with the other two. Also, 'bool' now takes its
+-- arguments in the exact opposite order compared to the normal if construct.
+bool :: a -> a -> Bool -> a
+bool x _ False = x
+bool _ x True = x
+
+-- | This function often makes it possible to switch values with the functions
+-- that are applied to them.
+--
+-- Examples:
+-- > swing map :: [a -> b] -> a -> [b]
+-- > swing any :: [a -> Bool] -> a -> Bool
+-- > swing foldr :: b -> a -> [a -> b -> b] -> b
+-- > swing scanr :: c -> a -> [a -> c -> c] -> c
+-- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c]
+-- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool)
+--
+-- Stolen from <https://wiki.haskell.org/Pointfree>
+swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d
+swing = flip.(.flip id)
+-- swing f c a = f ($ a) c
+
+
+-- | Alternative to 'read'/'reads'. The former of these throws errors
+-- (nobody wants that) while the latter returns "to much" for simple purposes.
+-- This function instead applies 'reads' and returns the first match (if any)
+-- in a 'Maybe'.
+tryToRead :: (Read r) => String -> Maybe r
+tryToRead = reads >>> listToMaybe >>> fmap fst
+
+-- | A version of 'reads' that requires a '%' sign after the number
+readPercent :: ReadS Int
+readPercent s = [ (i,s') | (i , r ) <- reads s
+ , ("%" , s') <- lex r
+ ]
+
+-- | Data that can be looked up.
+-- This is mostly a utility to read data with kind *.
+class Lookupable a where
+ lookupTable :: [(String, a)]
+
+-- | The idea is to use this function as if there was a declaration like
+--
+-- > instance (Lookupable a) => (Read a) where
+-- > readsPrec _ = readLookupables
+-- .
+-- But including this code in this form would need UndecideableInstances.
+-- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
+-- can be used directly in almost any case.
+readLookupables :: (Lookupable a) => String -> [(a,String)]
+readLookupables s = [ (a,rest) | (word,rest) <- lex s,
+ let result = lookup word lookupTable,
+ isJust result,
+ let Just a = result
+ ]
+
+-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
+readLookupable :: (Lookupable a) => String -> Maybe a
+readLookupable s = msum
+ $ map ((`lookup` lookupTable).fst)
+ $ lex s
+
+uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
+uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
+uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z
+uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z
+uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z
+uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z
+
+uncurry3 fun (a,b,c ) = fun a b c
+uncurry4 fun (a,b,c,d ) = fun a b c d
+uncurry5 fun (a,b,c,d,e ) = fun a b c d e
+uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f
+uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g
+uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h
+
+swap :: (a,b) -> (b,a)
+swap (a,b) = (b,a)
+
+-- | A version of "Data.List.find" that uses a converter to a Maybe instance.
+-- The returned value is the first which the converter returns in a 'Just'
+-- wrapper.
+findBy :: (a -> Maybe b) -> [a] -> Maybe b
+findBy _ [] = Nothing
+findBy f ((f -> Just x):_ ) = Just x
+findBy f ( _:xs) = findBy f xs
+
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
new file mode 100644
index 000000000..ec7e0ea5e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -0,0 +1,1064 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RecordWildCards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A generalized XML parser based on stateful arrows.
+It might be sufficient to define this reader as a comonad, but there is
+not a lot of use in trying.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.XMLConverter
+( ElementName
+, XMLConverterState
+, XMLConverter
+, FallibleXMLConverter
+, swapPosition
+, runConverter
+, runConverter''
+, runConverter'
+, runConverterF'
+, runConverterF
+, getCurrentElement
+, getExtraState
+, setExtraState
+, modifyExtraState
+, convertingExtraState
+, producingExtraState
+, lookupNSiri
+, lookupNSprefix
+, readNSattributes
+, elemName
+, elemNameIs
+, strContent
+, elContent
+, currentElem
+, currentElemIs
+, expectElement
+, elChildren
+, findChildren
+, filterChildren
+, filterChildrenName
+, findChild'
+, findChild
+, filterChild'
+, filterChild
+, filterChildName'
+, filterChildName
+, isSet
+, isSet'
+, isSetWithDefault
+, hasAttrValueOf'
+, failIfNotAttrValueOf
+, isThatTheAttrValue
+, searchAttrIn
+, searchAttrWith
+, searchAttr
+, lookupAttr
+, lookupAttr'
+, lookupAttrWithDefault
+, lookupDefaultingAttr
+, findAttr'
+, findAttr
+, findAttrWithDefault
+, readAttr
+, readAttr'
+, readAttrWithDefault
+, getAttr
+-- , (>/<)
+-- , (?>/<)
+, executeIn
+, collectEvery
+, withEveryL
+, withEvery
+, tryAll
+, tryAll'
+, IdXMLConverter
+, MaybeEConverter
+, ElementMatchConverter
+, MaybeCConverter
+, ContentMatchConverter
+, makeMatcherE
+, makeMatcherC
+, prepareMatchersE
+, prepareMatchersC
+, matchChildren
+, matchContent''
+, matchContent'
+, matchContent
+) where
+
+import Control.Applicative hiding ( liftA, liftA2 )
+import Control.Monad ( MonadPlus )
+import Control.Arrow
+
+import qualified Data.Map as M
+import qualified Data.Foldable as F
+import Data.Default
+import Data.Monoid ( Monoid )
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Readers.Odt.Arrows.State
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+
+import Text.Pandoc.Readers.Odt.Generic.Namespaces
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+
+--------------------------------------------------------------------------------
+-- Basis types for readability
+--------------------------------------------------------------------------------
+
+--
+type ElementName = String
+type AttributeName = String
+type AttributeValue = String
+
+--
+type NameSpacePrefix = String
+
+--
+type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
+
+--------------------------------------------------------------------------------
+-- Main converter state
+--------------------------------------------------------------------------------
+
+-- GADT so some of the NameSpaceID restrictions can be deduced
+data XMLConverterState nsID extraState where
+ XMLConverterState :: NameSpaceID nsID
+ => { -- | A stack of parent elements. The top element is the current one.
+ -- Arguably, a real Zipper would be better. But that is an
+ -- optimization that can be made at a later time, e.g. when
+ -- replacing Text.XML.Light.
+ parentElements :: [XML.Element]
+ -- | A map from internal namespace IDs to the namespace prefixes
+ -- used in XML elements
+ , namespacePrefixes :: NameSpacePrefixes nsID
+ -- | A map from internal namespace IDs to namespace IRIs
+ -- (Only necessary for matching namespace IDs and prefixes)
+ , namespaceIRIs :: NameSpaceIRIs nsID
+ -- | A place to put "something else". This feature is used heavily
+ -- to keep the main code cleaner. More specifically, the main reader
+ -- is divided into different stages. Each stage lifts something up
+ -- here, which the next stage can then use. This could of course be
+ -- generalized to a state-tree or used for the namespace IRIs. The
+ -- border between states and values is an imaginary one, after all.
+ -- But the separation as it is seems to be enough for now.
+ , moreState :: extraState
+ }
+ -> XMLConverterState nsID extraState
+
+--
+createStartState :: (NameSpaceID nsID)
+ => XML.Element
+ -> extraState
+ -> XMLConverterState nsID extraState
+createStartState element extraState =
+ XMLConverterState
+ { parentElements = [element]
+ , namespacePrefixes = M.empty
+ , namespaceIRIs = getInitialIRImap
+ , moreState = extraState
+ }
+
+-- | Functor over extra state
+instance Functor (XMLConverterState nsID) where
+ fmap f ( XMLConverterState parents prefixes iRIs extraState )
+ = XMLConverterState parents prefixes iRIs (f extraState)
+
+--
+replaceExtraState :: extraState
+ -> XMLConverterState nsID _x
+ -> XMLConverterState nsID extraState
+replaceExtraState x s
+ = fmap (const x) s
+
+--
+currentElement :: XMLConverterState nsID extraState
+ -> XML.Element
+currentElement state = head (parentElements state)
+
+-- | Replace the current position by another, modifying the extra state
+-- in the process
+swapPosition :: (extraState -> extraState')
+ -> [XML.Element]
+ -> XMLConverterState nsID extraState
+ -> XMLConverterState nsID extraState'
+swapPosition f stack state
+ = state { parentElements = stack
+ , moreState = f (moreState state)
+ }
+
+-- | Replace the current position by another, modifying the extra state
+-- in the process
+swapStack' :: XMLConverterState nsID extraState
+ -> [XML.Element]
+ -> ( XMLConverterState nsID extraState , [XML.Element] )
+swapStack' state stack
+ = ( state { parentElements = stack }
+ , parentElements state
+ )
+
+--
+pushElement :: XML.Element
+ -> XMLConverterState nsID extraState
+ -> XMLConverterState nsID extraState
+pushElement e state = state { parentElements = e:(parentElements state) }
+
+-- | Pop the top element from the call stack, unless it is the last one.
+popElement :: XMLConverterState nsID extraState
+ -> Maybe (XMLConverterState nsID extraState)
+popElement state
+ | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es }
+ | otherwise = Nothing
+
+--------------------------------------------------------------------------------
+-- Main type
+--------------------------------------------------------------------------------
+
+-- It might be a good idea to pack the converters in a GADT
+-- Downside: data instead of type
+-- Upside: 'Failure' could be made a parameter as well.
+
+--
+type XMLConverter nsID extraState input output
+ = ArrowState (XMLConverterState nsID extraState ) input output
+
+type FallibleXMLConverter nsID extraState input output
+ = XMLConverter nsID extraState input (Fallible output)
+
+--
+runConverter :: XMLConverter nsID extraState input output
+ -> XMLConverterState nsID extraState
+ -> input
+ -> output
+runConverter converter state input = snd $ runArrowState converter (state,input)
+
+--
+runConverter'' :: (NameSpaceID nsID)
+ => XMLConverter nsID extraState (Fallible ()) output
+ -> extraState
+ -> XML.Element
+ -> output
+runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) ()
+
+runConverter' :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState () success
+ -> extraState
+ -> XML.Element
+ -> Fallible success
+runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) ()
+
+--
+runConverterF' :: FallibleXMLConverter nsID extraState x y
+ -> XMLConverterState nsID extraState
+ -> Fallible x -> Fallible y
+runConverterF' a s e = runConverter (returnV e >>? a) s e
+
+--
+runConverterF :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState XML.Element x
+ -> extraState
+ -> Fallible XML.Element -> Fallible x
+runConverterF a s = either failWith
+ (\e -> runConverter a (createStartState e s) e)
+
+--
+getCurrentElement :: XMLConverter nsID extraState x XML.Element
+getCurrentElement = extractFromState currentElement
+
+--
+getExtraState :: XMLConverter nsID extraState x extraState
+getExtraState = extractFromState moreState
+
+--
+setExtraState :: XMLConverter nsID extraState extraState extraState
+setExtraState = withState $ \state extra
+ -> (replaceExtraState extra state , extra)
+
+
+-- | Lifts a function to the extra state.
+modifyExtraState :: (extraState -> extraState)
+ -> XMLConverter nsID extraState x x
+modifyExtraState = modifyState.fmap
+
+
+-- | First sets the extra state to the new value. Then modifies the original
+-- extra state with a converter that uses the new state. Finally, the
+-- intermediate state is dropped and the extra state is lifted into the
+-- state as it was at the beginning of the function.
+-- As a result, exactly the extra state and nothing else is changed.
+-- The resulting converter even behaves like an identity converter on the
+-- value level.
+--
+-- (The -ing form is meant to be mnemonic in a sequence of arrows as in
+-- convertingExtraState () converter >>> doOtherStuff)
+--
+convertingExtraState :: extraState'
+ -> FallibleXMLConverter nsID extraState' extraState extraState
+ -> FallibleXMLConverter nsID extraState x x
+convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
+ where
+ setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v
+ modifyWithA = keepingTheValue (moreState ^>> a)
+ >>^ spreadChoice >>?§ flip replaceExtraState
+
+-- | First sets the extra state to the new value. Then produces a new
+-- extra state with a converter that uses the new state. Finally, the
+-- intermediate state is dropped and the extra state is lifted into the
+-- state as it was at the beginning of the function.
+-- As a result, exactly the extra state and nothing else is changed.
+-- The resulting converter even behaves like an identity converter on the
+-- value level.
+--
+-- Aequivalent to
+--
+-- > \v x a -> convertingExtraState v (returnV x >>> a)
+--
+-- (The -ing form is meant to be mnemonic in a sequence of arrows as in
+-- producingExtraState () () producer >>> doOtherStuff)
+--
+producingExtraState :: extraState'
+ -> a
+ -> FallibleXMLConverter nsID extraState' a extraState
+ -> FallibleXMLConverter nsID extraState x x
+producingExtraState v x a = convertingExtraState v (returnV x >>> a)
+
+
+--------------------------------------------------------------------------------
+-- Work in namespaces
+--------------------------------------------------------------------------------
+
+-- | Arrow version of 'getIRI'
+lookupNSiri :: (NameSpaceID nsID)
+ => nsID
+ -> XMLConverter nsID extraState x (Maybe NameSpaceIRI)
+lookupNSiri nsID = extractFromState
+ $ \state -> getIRI nsID $ namespaceIRIs state
+
+--
+lookupNSprefix :: (NameSpaceID nsID)
+ => nsID
+ -> XMLConverter nsID extraState x (Maybe NameSpacePrefix)
+lookupNSprefix nsID = extractFromState
+ $ \state -> M.lookup nsID $ namespacePrefixes state
+
+-- | Extracts namespace attributes from the current element and tries to
+-- update the current mapping accordingly
+readNSattributes :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState x ()
+readNSattributes = fromState $ \state -> maybe (state, failEmpty )
+ ( , succeedWith ())
+ (extractNSAttrs state )
+ where
+ extractNSAttrs :: (NameSpaceID nsID)
+ => XMLConverterState nsID extraState
+ -> Maybe (XMLConverterState nsID extraState)
+ extractNSAttrs startState
+ = foldl (\state d -> state >>= addNS d)
+ (Just startState)
+ nsAttribs
+ where nsAttribs = mapMaybe readNSattr (XML.elAttribs element)
+ element = currentElement startState
+ readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri)
+ = Just (name, iri)
+ readNSattr _ = Nothing
+ addNS (prefix, iri) state = fmap updateState
+ $ getNamespaceID iri
+ $ namespaceIRIs state
+ where updateState (iris,nsID)
+ = state { namespaceIRIs = iris
+ , namespacePrefixes = M.insert nsID prefix
+ $ namespacePrefixes state
+ }
+
+--------------------------------------------------------------------------------
+-- Common namespace accessors
+--------------------------------------------------------------------------------
+
+-- | Given a namespace id and an element name, creates a 'XML.QName' for
+-- internal use
+elemName :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x XML.QName
+elemName nsID name = lookupNSiri nsID
+ &&& lookupNSprefix nsID
+ >>§ XML.QName name
+
+-- | Checks if a given element matches both a specified namespace id
+-- and a specified element name
+elemNameIs :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState XML.Element Bool
+elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName
+ where hasThatName e iri = let elName = XML.elName e
+ in XML.qName elName == name
+ && XML.qURI elName == iri
+
+--------------------------------------------------------------------------------
+-- General content
+--------------------------------------------------------------------------------
+
+--
+strContent :: XMLConverter nsID extraState x String
+strContent = getCurrentElement
+ >>^ XML.strContent
+
+--
+elContent :: XMLConverter nsID extraState x [XML.Content]
+elContent = getCurrentElement
+ >>^ XML.elContent
+
+--------------------------------------------------------------------------------
+-- Current element
+--------------------------------------------------------------------------------
+
+--
+currentElem :: XMLConverter nsID extraState x (XML.QName)
+currentElem = getCurrentElement
+ >>^ XML.elName
+
+currentElemIs :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x Bool
+currentElemIs nsID name = getCurrentElement
+ >>> elemNameIs nsID name
+
+
+
+{-
+currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>>
+ (XML.qName >>^ (&&).(== name) )
+ ^&&&^
+ (XML.qIRI >>^ (==) )
+ ) >>§ (.)
+ ) &&& lookupNSiri nsID >>§ ($)
+-}
+
+--
+expectElement :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState x ()
+expectElement nsID name = currentElemIs nsID name
+ >>^ boolToChoice
+
+--------------------------------------------------------------------------------
+-- Chilren
+--------------------------------------------------------------------------------
+
+--
+elChildren :: XMLConverter nsID extraState x [XML.Element]
+elChildren = getCurrentElement
+ >>^ XML.elChildren
+
+--
+findChildren :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x [XML.Element]
+findChildren nsID name = elemName nsID name
+ &&& getCurrentElement
+ >>§ XML.findChildren
+
+--
+filterChildren :: (XML.Element -> Bool)
+ -> XMLConverter nsID extraState x [XML.Element]
+filterChildren p = getCurrentElement
+ >>^ XML.filterChildren p
+
+--
+filterChildrenName :: (XML.QName -> Bool)
+ -> XMLConverter nsID extraState x [XML.Element]
+filterChildrenName p = getCurrentElement
+ >>^ XML.filterChildrenName p
+
+--
+findChild' :: (NameSpaceID nsID)
+ => nsID
+ -> ElementName
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+findChild' nsID name = elemName nsID name
+ &&& getCurrentElement
+ >>§ XML.findChild
+
+--
+findChild :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState x XML.Element
+findChild nsID name = findChild' nsID name
+ >>> maybeToChoice
+
+--
+filterChild' :: (XML.Element -> Bool)
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+filterChild' p = getCurrentElement
+ >>^ XML.filterChild p
+
+--
+filterChild :: (XML.Element -> Bool)
+ -> FallibleXMLConverter nsID extraState x XML.Element
+filterChild p = filterChild' p
+ >>> maybeToChoice
+
+--
+filterChildName' :: (XML.QName -> Bool)
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+filterChildName' p = getCurrentElement
+ >>^ XML.filterChildName p
+
+--
+filterChildName :: (XML.QName -> Bool)
+ -> FallibleXMLConverter nsID extraState x XML.Element
+filterChildName p = filterChildName' p
+ >>> maybeToChoice
+
+
+--------------------------------------------------------------------------------
+-- Attributes
+--------------------------------------------------------------------------------
+
+--
+isSet :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> (Either Failure Bool)
+ -> FallibleXMLConverter nsID extraState x Bool
+isSet nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ maybe deflt stringToBool
+
+--
+isSet' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe Bool)
+isSet' nsID attrName = findAttr' nsID attrName
+ >>^ (>>= stringToBool')
+
+isSetWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> Bool
+ -> XMLConverter nsID extraState x Bool
+isSetWithDefault nsID attrName def'
+ = isSet' nsID attrName
+ >>^ fromMaybe def'
+
+--
+hasAttrValueOf' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> XMLConverter nsID extraState x Bool
+hasAttrValueOf' nsID attrName attrValue
+ = findAttr nsID attrName
+ >>> ( const False ^|||^ (==attrValue))
+
+--
+failIfNotAttrValueOf :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> FallibleXMLConverter nsID extraState x ()
+failIfNotAttrValueOf nsID attrName attrValue
+ = hasAttrValueOf' nsID attrName attrValue
+ >>^ boolToChoice
+
+-- | Is the value that is currently transported in the arrow the value of
+-- the specified attribute?
+isThatTheAttrValue :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState AttributeValue Bool
+isThatTheAttrValue nsID attrName
+ = keepingTheValue
+ (findAttr nsID attrName)
+ >>§ right.(==)
+
+-- | Lookup value in a dictionary, fail if no attribute found or value
+-- not in dictionary
+searchAttrIn :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> [(AttributeValue,a)]
+ -> FallibleXMLConverter nsID extraState x a
+searchAttrIn nsID attrName dict
+ = findAttr nsID attrName
+ >>?^? maybeToChoice.(`lookup` dict )
+
+
+-- | Lookup value in a dictionary. Fail if no attribute found. If value not in
+-- dictionary, return default value
+searchAttrWith :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> a
+ -> [(AttributeValue,a)]
+ -> FallibleXMLConverter nsID extraState x a
+searchAttrWith nsID attrName defV dict
+ = findAttr nsID attrName
+ >>?^ (fromMaybe defV).(`lookup` dict )
+
+-- | Lookup value in a dictionary. If attribute or value not found,
+-- return default value
+searchAttr :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> a
+ -> [(AttributeValue,a)]
+ -> XMLConverter nsID extraState x a
+searchAttr nsID attrName defV dict
+ = searchAttrIn nsID attrName dict
+ >>> const defV ^|||^ id
+
+-- | Read a 'Lookupable' attribute. Fail if no match.
+lookupAttr :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x a
+lookupAttr nsID attrName = lookupAttr' nsID attrName
+ >>^ maybeToChoice
+
+
+-- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'.
+lookupAttr' :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe a)
+lookupAttr' nsID attrName
+ = findAttr' nsID attrName
+ >>^ (>>= readLookupable)
+
+-- | Read a 'Lookupable' attribute with explicit default
+lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> a
+ -> XMLConverter nsID extraState x a
+lookupAttrWithDefault nsID attrName deflt
+ = lookupAttr' nsID attrName
+ >>^ fromMaybe deflt
+
+-- | Read a 'Lookupable' attribute with implicit default
+lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x a
+lookupDefaultingAttr nsID attrName
+ = lookupAttrWithDefault nsID attrName def
+
+-- | Return value as a (Maybe String)
+findAttr' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe AttributeValue)
+findAttr' nsID attrName = elemName nsID attrName
+ &&& getCurrentElement
+ >>§ XML.findAttr
+
+-- | Return value as string or fail
+findAttr :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x AttributeValue
+findAttr nsID attrName = findAttr' nsID attrName
+ >>> maybeToChoice
+
+-- | Return value as string or return provided default value
+findAttrWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> XMLConverter nsID extraState x AttributeValue
+findAttrWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ fromMaybe deflt
+
+-- | Read and return value or fail
+readAttr :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x attrValue
+readAttr nsID attrName = readAttr' nsID attrName
+ >>> maybeToChoice
+
+-- | Read and return value or return Nothing
+readAttr' :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe attrValue)
+readAttr' nsID attrName = findAttr' nsID attrName
+ >>^ (>>= tryToRead)
+
+-- | Read and return value or return provided default value
+readAttrWithDefault :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> attrValue
+ -> XMLConverter nsID extraState x attrValue
+readAttrWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ (>>= tryToRead)
+ >>^ fromMaybe deflt
+
+-- | Read and return value or return default value from 'Default' instance
+getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x attrValue
+getAttr nsID attrName = readAttrWithDefault nsID attrName def
+
+--------------------------------------------------------------------------------
+-- Movements
+--------------------------------------------------------------------------------
+
+--
+jumpThere :: XMLConverter nsID extraState XML.Element XML.Element
+jumpThere = withState (\state element
+ -> ( pushElement element state , element )
+ )
+
+--
+swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element]
+swapStack = withState swapStack'
+
+--
+jumpBack :: FallibleXMLConverter nsID extraState _x _x
+jumpBack = tryModifyState (popElement >>> maybeToChoice)
+
+-- | Support function for "procedural" converters: jump to an element, execute
+-- a converter, jump back.
+-- This version is safer than 'executeThere', because it does not rely on the
+-- internal stack. As a result, the converter can not move around in arbitrary
+-- ways. The downside is of course that some of the environment is not
+-- accessible to the converter.
+switchingTheStack :: XMLConverter nsID moreState a b
+ -> XMLConverter nsID moreState (a, XML.Element) b
+switchingTheStack a = second ( (:[]) ^>> swapStack )
+ >>> first a
+ >>> second swapStack
+ >>^ fst
+
+-- | Support function for "procedural" converters: jumps to an element, executes
+-- a converter, jumps back.
+-- Make sure that the converter is well-behaved; that is it should
+-- return to the exact position it started from in /every possible path/ of
+-- execution, even if it "fails". If it does not, you may encounter
+-- strange bugs. If you are not sure about the behaviour or want to use
+-- shortcuts, you can often use 'switchingTheStack' instead.
+executeThere :: FallibleXMLConverter nsID moreState a b
+ -> FallibleXMLConverter nsID moreState (a, XML.Element) b
+executeThere a = second jumpThere
+ >>> fst
+ ^>> a
+ >>> jumpBack -- >>? jumpBack would not ensure the jump.
+ >>^ collapseEither
+
+-- | Do something in a sub-element, tnen come back
+executeIn :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState f s
+ -> FallibleXMLConverter nsID extraState f s
+executeIn nsID name a = keepingTheValue
+ (findChild nsID name)
+ >>> ignoringState liftFailure
+ >>? switchingTheStack a
+ where liftFailure (_, (Left f)) = Left f
+ liftFailure (x, (Right e)) = Right (x, e)
+
+--------------------------------------------------------------------------------
+-- Iterating over children
+--------------------------------------------------------------------------------
+
+-- Helper converter to prepare different types of iterations.
+-- It lifts the children (of a certain type) of the current element
+-- into the value level and pairs each one with the current input value.
+prepareIteration :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState b [(b, XML.Element)]
+prepareIteration nsID name = keepingTheValue
+ (findChildren nsID name)
+ >>§ distributeValue
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects results in a 'Monoid'.
+-- Fails completely if any conversion fails.
+collectEvery :: (NameSpaceID nsID, Monoid m)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a m
+ -> FallibleXMLConverter nsID extraState a m
+collectEvery nsID name a = prepareIteration nsID name
+ >>> foldS' (switchingTheStack a)
+
+--
+withEveryL :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a b
+ -> FallibleXMLConverter nsID extraState a [b]
+withEveryL = withEvery
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects results in a 'MonadPlus'.
+-- Fails completely if any conversion fails.
+withEvery :: (NameSpaceID nsID, MonadPlus m)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a b
+ -> FallibleXMLConverter nsID extraState a (m b)
+withEvery nsID name a = prepareIteration nsID name
+ >>> iterateS' (switchingTheStack a)
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects all successful results in a list.
+tryAll :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState b a
+ -> XMLConverter nsID extraState b [a]
+tryAll nsID name a = prepareIteration nsID name
+ >>> iterateS (switchingTheStack a)
+ >>^ collectRights
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects all successful results.
+tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState b a
+ -> XMLConverter nsID extraState b (c a)
+tryAll' nsID name a = prepareIteration nsID name
+ >>> iterateS (switchingTheStack a)
+ >>^ collectRightsF
+
+--------------------------------------------------------------------------------
+-- Matching children
+--------------------------------------------------------------------------------
+
+type IdXMLConverter nsID moreState x
+ = XMLConverter nsID moreState x x
+
+type MaybeEConverter nsID moreState x
+ = Maybe (IdXMLConverter nsID moreState (x, XML.Element))
+
+-- Chainable converter that helps deciding which converter to actually use.
+type ElementMatchConverter nsID extraState x
+ = IdXMLConverter nsID
+ extraState
+ (MaybeEConverter nsID extraState x, XML.Element)
+
+type MaybeCConverter nsID moreState x
+ = Maybe (IdXMLConverter nsID moreState (x, XML.Content))
+
+-- Chainable converter that helps deciding which converter to actually use.
+type ContentMatchConverter nsID extraState x
+ = IdXMLConverter nsID
+ extraState
+ (MaybeCConverter nsID extraState x, XML.Content)
+
+-- Helper function: The @c@ is actually a converter that is to be selected by
+-- matching XML elements to the first two parameters.
+-- The fold used to match elements however is very simple, so to use it,
+-- this function wraps the converter in another converter that unifies
+-- the accumulator. Think of a lot of converters with the resulting type
+-- chained together. The accumulator not only transports the element
+-- unchanged to the next matcher, it also does the actual selecting by
+-- combining the intermediate results with '(<|>)'.
+makeMatcherE :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a a
+ -> ElementMatchConverter nsID extraState a
+makeMatcherE nsID name c = ( second (
+ elemNameIs nsID name
+ >>^ bool Nothing (Just tryC)
+ )
+ >>§ (<|>)
+ ) &&&^ snd
+ where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd
+
+-- Helper function: The @c@ is actually a converter that is to be selected by
+-- matching XML content to the first two parameters.
+-- The fold used to match elements however is very simple, so to use it,
+-- this function wraps the converter in another converter that unifies
+-- the accumulator. Think of a lot of converters with the resulting type
+-- chained together. The accumulator not only transports the element
+-- unchanged to the next matcher, it also does the actual selecting by
+-- combining the intermediate results with '(<|>)'.
+makeMatcherC :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a a
+ -> ContentMatchConverter nsID extraState a
+makeMatcherC nsID name c = ( second ( contentToElem
+ >>> returnV Nothing
+ ||| ( elemNameIs nsID name
+ >>^ bool Nothing (Just cWithJump)
+ )
+ )
+ >>§ (<|>)
+ ) &&&^ snd
+ where cWithJump = ( fst
+ ^&&& ( second contentToElem
+ >>> spreadChoice
+ ^>>? executeThere c
+ )
+ >>§ recover)
+ &&&^ snd
+ contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element
+ contentToElem = arr $ \e -> case e of
+ XML.Elem e' -> succeedWith e'
+ _ -> failEmpty
+
+-- Creates and chains a bunch of matchers
+prepareMatchersE :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
+ -> ElementMatchConverter nsID extraState x
+--prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE)
+prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE)
+
+-- Creates and chains a bunch of matchers
+prepareMatchersC :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
+ -> ContentMatchConverter nsID extraState x
+--prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC)
+prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC)
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all children of the current element
+-- * Matches each group to each child in order (at most one group per child)
+-- * Filters non-matched children
+-- * Chains all found converters in child-order
+-- * Applies the chain to the input element
+matchChildren :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchChildren lookups = let matcher = prepareMatchersE lookups
+ in keepingTheValue (
+ elChildren
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m)
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the element and drop the element
+ -- in the return value
+ swallowElem element converter = (,element) ^>> converter >>^ fst
+
+--
+matchContent'' :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchContent'' lookups = let matcher = prepareMatchersC lookups
+ in keepingTheValue (
+ elContent
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m)
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the content and drop the content
+ -- in the return value
+ swallowContent content converter = (,content) ^>> converter >>^ fst
+
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all content of the current element
+-- * Matches each group to each piece of content in order
+-- (at most one group per piece of content)
+-- * Filters non-matched content
+-- * Chains all found converters in content-order
+-- * Applies the chain to the input element
+matchContent' :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchContent' lookups = matchContent lookups (arr fst)
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all content of the current element
+-- * Matches each group to each piece of content in order
+-- (at most one group per piece of content)
+-- * Adds a default converter for all non-matched content
+-- * Chains all found converters in content-order
+-- * Applies the chain to the input element
+matchContent :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState (a,XML.Content) a
+ -> XMLConverter nsID extraState a a
+matchContent lookups fallback
+ = let matcher = prepareMatchersC lookups
+ in keepingTheValue (
+ elContent
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ map swallowOrFallback
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the content and drop the content
+ -- in the return value
+ swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst
+ swallowOrFallback (Nothing ,content) = (,content) ^>> fallback
+
+--------------------------------------------------------------------------------
+-- Internals
+--------------------------------------------------------------------------------
+
+stringToBool :: (Monoid failure) => String -> Either failure Bool
+stringToBool val -- stringToBool' val >>> maybeToChoice
+ | val `elem` trueValues = succeedWith True
+ | val `elem` falseValues = succeedWith False
+ | otherwise = failEmpty
+ where trueValues = ["true" ,"on" ,"1"]
+ falseValues = ["false","off","0"]
+
+stringToBool' :: String -> Maybe Bool
+stringToBool' val | val `elem` trueValues = Just True
+ | val `elem` falseValues = Just False
+ | otherwise = Nothing
+ where trueValues = ["true" ,"on" ,"1"]
+ falseValues = ["false","off","0"]
+
+
+distributeValue :: a -> [b] -> [(a,b)]
+distributeValue = map.(,)
+
+--------------------------------------------------------------------------------
+
+{-
+NOTES
+It might be a good idea to refactor the namespace stuff.
+E.g.: if a namespace constructor took a string as a parameter, things like
+> a ?>/< (NsText,"body")
+would be nicer.
+Together with a rename and some trickery, something like
+> |< NsText "body" >< NsText "p" ?> a </> </>|
+might even be possible.
+
+Some day, XML.Light should be replaced by something better.
+While doing that, it might be useful to replace String as the type of element
+names with something else, too. (Of course with OverloadedStrings).
+While doing that, maybe the types can be created in a way that something like
+> NsText:"body"
+could be used. Overloading (:) does not sounds like the best idea, but if the
+element name type was a list, this might be possible.
+Of course that would be a bit hackish, so the "right" way would probably be
+something like
+> InNS NsText "body"
+but isn't that a bit boring? ;)
+-}
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
new file mode 100644
index 000000000..e28056814
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -0,0 +1,110 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt.Namespaces
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Namespaces used in odt files.
+-}
+
+module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
+ ) where
+
+import Data.List ( isPrefixOf )
+import Data.Maybe ( fromMaybe, listToMaybe )
+import qualified Data.Map as M ( empty, insert )
+
+import Text.Pandoc.Readers.Odt.Generic.Namespaces
+
+
+instance NameSpaceID Namespace where
+
+ getInitialIRImap = nsIDmap
+
+ getNamespaceID "" m = Just(m, NsXML)
+ getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri)
+ where asPair nsID = Just (M.insert nsID iri m, nsID)
+
+
+findID :: NameSpaceIRI -> Maybe Namespace
+findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri]
+
+nsIDmap :: NameSpaceIRIs Namespace
+nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
+
+data Namespace = -- Open Document core
+ NsOffice | NsStyle | NsText | NsTable | NsForm
+ | NsDraw | Ns3D | NsAnim | NsChart | NsConfig
+ | NsDB | NsMeta | NsNumber | NsScript | NsManifest
+ | NsPresentation
+ -- Metadata
+ | NsODF
+ -- Compatible elements
+ | NsXSL_FO | NsSVG | NsSmil
+ -- External standards
+ | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL
+ | NsDublinCore
+ -- Metadata manifest
+ | NsPKG
+ -- Others
+ | NsOpenFormula
+ -- Core XML (basically only for the 'id'-attribute)
+ | NsXML
+ -- Fallback
+ | NsOther String
+ deriving ( Eq, Ord, Show )
+
+-- | Not the actual iri's, but large prefixes of them - this way there are
+-- less versioning problems and the like.
+nsIDs :: [(String,Namespace)]
+nsIDs = [
+ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
+ ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
+ ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ),
+ ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ),
+ ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ),
+ ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ),
+ ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ),
+ ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ),
+ ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ),
+ ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ),
+ ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ),
+ ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ),
+ ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ),
+ ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ),
+ ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ),
+ ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ),
+ ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ),
+ ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ),
+ ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ),
+ ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ),
+ ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ),
+ ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ),
+ ("http://purl.org/dc/elements" , NsDublinCore ),
+ ("http://www.w3.org/2003/g/data-view" , NsGRDDL ),
+ ("http://www.w3.org/1998/Math/MathML" , NsMathML ),
+ ("http://www.w3.org/1999/xhtml" , NsXHtml ),
+ ("http://www.w3.org/2002/xforms" , NsXForms ),
+ ("http://www.w3.org/1999/xlink" , NsXLink )
+ ] \ No newline at end of file
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
new file mode 100644
index 000000000..1cf87cc59
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -0,0 +1,737 @@
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE Arrows #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.StyleReader
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Reader for the style information in an odt document.
+-}
+
+module Text.Pandoc.Readers.Odt.StyleReader
+( Style (..)
+, StyleName
+, StyleFamily (..)
+, Styles (..)
+, StyleProperties (..)
+, TextProperties (..)
+, ParaProperties (..)
+, VerticalTextPosition (..)
+, ListItemNumberFormat (..)
+, ListLevel
+, ListStyle (..)
+, ListLevelStyle (..)
+, ListLevelType (..)
+, LengthOrPercent (..)
+, lookupStyle
+, getTextProperty
+, getTextProperty'
+, getParaProperty
+, getListStyle
+, getListLevelStyle
+, getStyleFamily
+, lookupDefaultStyle
+, lookupDefaultStyle'
+, lookupListStyleByName
+, getPropertyChain
+, textPropertyChain
+, stylePropertyChain
+, stylePropertyChain'
+, getStylePropertyChain
+, extendedStylePropertyChain
+, extendedStylePropertyChain'
+, liftStyles
+, readStylesAt
+) where
+
+import Control.Arrow
+import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+
+import qualified Data.Foldable as F
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.List ( unfoldr )
+import Data.Default
+import Data.Monoid
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Readers.Odt.Arrows.State
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+
+import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.Base
+
+
+readStylesAt :: XML.Element -> Fallible Styles
+readStylesAt e = runConverter' readAllStyles mempty e
+
+--------------------------------------------------------------------------------
+-- Reader for font declarations and font pitches
+--------------------------------------------------------------------------------
+
+-- Pandoc has no support for different font pitches. Yet knowing them can be
+-- very helpful in cases where Pandoc has more semantics than OpenDocument.
+-- In these cases, the pitch can help deciding as what to define a block of
+-- text. So let's start with a type for font pitches:
+
+data FontPitch = PitchVariable | PitchFixed
+ deriving ( Eq, Show )
+
+instance Lookupable FontPitch where
+ lookupTable = [ ("variable" , PitchVariable)
+ , ("fixed" , PitchFixed )
+ ]
+
+instance Default FontPitch where
+ def = PitchVariable
+
+-- The font pitch can be specifed in a style directly. Normally, however,
+-- it is defined in the font. That is also the specs' recommendation.
+--
+-- Thus, we want
+
+type FontFaceName = String
+
+type FontPitches = M.Map FontFaceName FontPitch
+
+-- To get there, the fonts have to be read and the pitches extracted.
+-- But the resulting map are only needed at one later place, so it should not be
+-- transported on the value level, especially as we already use a state arrow.
+-- So instead, the resulting map is lifted into the state of the reader.
+-- (An alternative might be ImplicitParams, but again, we already have a state.)
+--
+-- So the main style readers will have the types
+type StyleReader a b = XMLReader FontPitches a b
+-- and
+type StyleReaderSafe a b = XMLReaderSafe FontPitches a b
+-- respectively.
+--
+-- But before we can work with these, we need to define the reader that reads
+-- the fonts:
+
+-- | A reader for font pitches
+fontPitchReader :: XMLReader _s _x FontPitches
+fontPitchReader = executeIn NsOffice "font-face-decls" (
+ ( withEveryL NsStyle "font-face" $ liftAsSuccess (
+ findAttr' NsStyle "name"
+ &&&
+ lookupDefaultingAttr NsStyle "font-pitch"
+ )
+ )
+ >>?^ ( M.fromList . (foldl accumLegalPitches []) )
+ )
+ where accumLegalPitches ls (Nothing,_) = ls
+ accumLegalPitches ls (Just n,p) = (n,p):ls
+
+
+-- | A wrapper around the font pitch reader that lifts the result into the
+-- state.
+readFontPitches :: StyleReader x x
+readFontPitches = producingExtraState () () fontPitchReader
+
+
+-- | Looking up a pitch in the state of the arrow.
+--
+-- The function does the following:
+-- * Look for the font pitch in an attribute.
+-- * If that fails, look for the font name, look up the font in the state
+-- and use the pitch from there.
+-- * Return the result in a Maybe
+--
+findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch)
+findPitch = ( lookupAttr NsStyle "font-pitch"
+ `ifFailedDo` findAttr NsStyle "font-name"
+ >>? ( keepingTheValue getExtraState
+ >>§ M.lookup
+ >>^ maybeToChoice
+ )
+ )
+ >>> choiceToMaybe
+
+--------------------------------------------------------------------------------
+-- Definitions of main data
+--------------------------------------------------------------------------------
+
+type StyleName = String
+
+-- | There are two types of styles: named styles with a style family and an
+-- optional style parent, and default styles for each style family,
+-- defining default style properties
+data Styles = Styles
+ { stylesByName :: M.Map StyleName Style
+ , listStylesByName :: M.Map StyleName ListStyle
+ , defaultStyleMap :: M.Map StyleFamily StyleProperties
+ }
+ deriving ( Show )
+
+-- Styles from a monoid under union
+instance Monoid Styles where
+ mempty = Styles M.empty M.empty M.empty
+ mappend (Styles sBn1 dSm1 lsBn1)
+ (Styles sBn2 dSm2 lsBn2)
+ = Styles (M.union sBn1 sBn2)
+ (M.union dSm1 dSm2)
+ (M.union lsBn1 lsBn2)
+
+-- Not all families from the specifications are implemented, only those we need.
+-- But there are none that are not mentioned here.
+data StyleFamily = FaText | FaParagraph
+-- | FaTable | FaTableCell | FaTableColumn | FaTableRow
+-- | FaGraphic | FaDrawing | FaChart
+-- | FaPresentation
+-- | FaRuby
+ deriving ( Eq, Ord, Show )
+
+instance Lookupable StyleFamily where
+ lookupTable = [ ( "text" , FaText )
+ , ( "paragraph" , FaParagraph )
+-- , ( "table" , FaTable )
+-- , ( "table-cell" , FaTableCell )
+-- , ( "table-column" , FaTableColumn )
+-- , ( "table-row" , FaTableRow )
+-- , ( "graphic" , FaGraphic )
+-- , ( "drawing-page" , FaDrawing )
+-- , ( "chart" , FaChart )
+-- , ( "presentation" , FaPresentation )
+-- , ( "ruby" , FaRuby )
+ ]
+
+-- | A named style
+data Style = Style { styleFamily :: Maybe StyleFamily
+ , styleParentName :: Maybe StyleName
+ , listStyle :: Maybe StyleName
+ , styleProperties :: StyleProperties
+ }
+ deriving ( Eq, Show )
+
+data StyleProperties = SProps { textProperties :: Maybe TextProperties
+ , paraProperties :: Maybe ParaProperties
+-- , tableColProperties :: Maybe TColProperties
+-- , tableRowProperties :: Maybe TRowProperties
+-- , tableCellProperties :: Maybe TCellProperties
+-- , tableProperties :: Maybe TableProperties
+-- , graphicProperties :: Maybe GraphProperties
+ }
+ deriving ( Eq, Show )
+
+instance Default StyleProperties where
+ def = SProps { textProperties = Just def
+ , paraProperties = Just def
+ }
+
+data TextProperties = PropT { isEmphasised :: Bool
+ , isStrong :: Bool
+ , pitch :: Maybe FontPitch
+ , verticalPosition :: VerticalTextPosition
+ , underline :: Maybe UnderlineMode
+ , strikethrough :: Maybe UnderlineMode
+ }
+ deriving ( Eq, Show )
+
+instance Default TextProperties where
+ def = PropT { isEmphasised = False
+ , isStrong = False
+ , pitch = Just def
+ , verticalPosition = def
+ , underline = Nothing
+ , strikethrough = Nothing
+ }
+
+data ParaProperties = PropP { paraNumbering :: ParaNumbering
+ , indentation :: LengthOrPercent
+ , margin_left :: LengthOrPercent
+ }
+ deriving ( Eq, Show )
+
+instance Default ParaProperties where
+ def = PropP { paraNumbering = NumberingNone
+ , indentation = def
+ , margin_left = def
+ }
+
+----
+-- All the little data types that make up the properties
+----
+
+data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub
+ deriving ( Eq, Show )
+
+instance Default VerticalTextPosition where
+ def = VPosNormal
+
+instance Read VerticalTextPosition where
+ readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ]
+ ++ [ (VPosSuper , s') | ("super" , s') <- lexS ]
+ ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ]
+ where
+ lexS = lex s
+ signumToVPos n | n < 0 = VPosSub
+ | n > 0 = VPosSuper
+ | otherwise = VPosNormal
+
+data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace
+ deriving ( Eq, Show )
+
+instance Lookupable UnderlineMode where
+ lookupTable = [ ( "continuous" , UnderlineModeNormal )
+ , ( "skip-white-space" , UnderlineModeSkipWhitespace )
+ ]
+
+
+data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int
+ deriving ( Eq, Show )
+
+data LengthOrPercent = LengthValueMM Int | PercentValue Int
+ deriving ( Eq, Show )
+
+instance Default LengthOrPercent where
+ def = LengthValueMM 0
+
+instance Read LengthOrPercent where
+ readsPrec _ s =
+ [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s]
+ ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s
+ , (unit , s'') <- reads s'
+ , let lengthMM = estimateInMillimeter
+ length' unit
+ ]
+
+data XslUnit = XslUnitMM | XslUnitCM
+ | XslUnitInch
+ | XslUnitPoints | XslUnitPica
+ | XslUnitPixel
+ | XslUnitEM
+
+instance Show XslUnit where
+ show XslUnitMM = "mm"
+ show XslUnitCM = "cm"
+ show XslUnitInch = "in"
+ show XslUnitPoints = "pt"
+ show XslUnitPica = "pc"
+ show XslUnitPixel = "px"
+ show XslUnitEM = "em"
+
+instance Read XslUnit where
+ readsPrec _ "mm" = [(XslUnitMM , "")]
+ readsPrec _ "cm" = [(XslUnitCM , "")]
+ readsPrec _ "in" = [(XslUnitInch , "")]
+ readsPrec _ "pt" = [(XslUnitPoints , "")]
+ readsPrec _ "pc" = [(XslUnitPica , "")]
+ readsPrec _ "px" = [(XslUnitPixel , "")]
+ readsPrec _ "em" = [(XslUnitEM , "")]
+ readsPrec _ _ = []
+
+-- | Rough conversion of measures into millimeters.
+-- Pixels and em's are actually implemetation dependant/relative measures,
+-- so I could not really easily calculate anything exact here even if I wanted.
+-- But I do not care about exactness right now, as I only use measures
+-- to determine if a paragraph is "indented" or not.
+estimateInMillimeter :: Int -> XslUnit -> Int
+estimateInMillimeter n XslUnitMM = n
+estimateInMillimeter n XslUnitCM = n * 10
+estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4
+estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4
+estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4
+estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4
+estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4
+
+
+----
+-- List styles
+----
+
+type ListLevel = Int
+
+newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle
+ }
+ deriving ( Eq, Show )
+
+--
+getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle
+getListLevelStyle level ListStyle{..} =
+ let (lower , exactHit , _) = M.splitLookup level levelStyles
+ in exactHit <|> fmap fst (M.maxView lower)
+ -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1]
+ -- ^ simpler, but in general less efficient
+
+data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
+ , listItemPrefix :: Maybe String
+ , listItemSuffix :: Maybe String
+ , listItemFormat :: ListItemNumberFormat
+ }
+ deriving ( Eq, Ord )
+
+instance Show ListLevelStyle where
+ show ListLevelStyle{..} = "<LLS|"
+ ++ (show listLevelType)
+ ++ "|"
+ ++ (maybeToString listItemPrefix)
+ ++ (show listItemFormat)
+ ++ (maybeToString listItemSuffix)
+ ++ ">"
+ where maybeToString = fromMaybe ""
+
+data ListLevelType = LltBullet | LltImage | LltNumbered
+ deriving ( Eq, Ord, Show )
+
+data ListItemNumberFormat = LinfNone
+ | LinfNumber
+ | LinfRomanLC | LinfRomanUC
+ | LinfAlphaLC | LinfAlphaUC
+ | LinfString String
+ deriving ( Eq, Ord )
+
+instance Show ListItemNumberFormat where
+ show LinfNone = ""
+ show LinfNumber = "1"
+ show LinfRomanLC = "i"
+ show LinfRomanUC = "I"
+ show LinfAlphaLC = "a"
+ show LinfAlphaUC = "A"
+ show (LinfString s) = s
+
+instance Default ListItemNumberFormat where
+ def = LinfNone
+
+instance Read ListItemNumberFormat where
+ readsPrec _ "" = [(LinfNone , "")]
+ readsPrec _ "1" = [(LinfNumber , "")]
+ readsPrec _ "i" = [(LinfRomanLC , "")]
+ readsPrec _ "I" = [(LinfRomanUC , "")]
+ readsPrec _ "a" = [(LinfAlphaLC , "")]
+ readsPrec _ "A" = [(LinfAlphaUC , "")]
+ readsPrec _ s = [(LinfString s , "")]
+
+--------------------------------------------------------------------------------
+-- Readers
+--
+-- ...it seems like a whole lot of this should be automatically deriveable
+-- or at least moveable into a class. Most of this is data concealed in
+-- code.
+--------------------------------------------------------------------------------
+
+--
+readAllStyles :: StyleReader _x Styles
+readAllStyles = ( readFontPitches
+ >>?! ( readAutomaticStyles
+ &&& readStyles ))
+ >>?§? chooseMax
+ -- all top elements are always on the same hierarchy level
+
+--
+readStyles :: StyleReader _x Styles
+readStyles = executeIn NsOffice "styles" $ liftAsSuccess
+ $ liftA3 Styles
+ ( tryAll NsStyle "style" readStyle >>^ M.fromList )
+ ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
+ ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList )
+
+--
+readAutomaticStyles :: StyleReader _x Styles
+readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess
+ $ liftA3 Styles
+ ( tryAll NsStyle "style" readStyle >>^ M.fromList )
+ ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
+ ( returnV M.empty )
+
+--
+readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties)
+readDefaultStyle = lookupAttr NsStyle "family"
+ >>?! keepingTheValue readStyleProperties
+
+--
+readStyle :: StyleReader _x (StyleName,Style)
+readStyle = findAttr NsStyle "name"
+ >>?! keepingTheValue
+ ( liftA4 Style
+ ( lookupAttr' NsStyle "family" )
+ ( findAttr' NsStyle "parent-style-name" )
+ ( findAttr' NsStyle "list-style-name" )
+ readStyleProperties
+ )
+
+--
+readStyleProperties :: StyleReaderSafe _x StyleProperties
+readStyleProperties = liftA2 SProps
+ ( readTextProperties >>> choiceToMaybe )
+ ( readParaProperties >>> choiceToMaybe )
+
+--
+readTextProperties :: StyleReader _x TextProperties
+readTextProperties =
+ executeIn NsStyle "text-properties" $ liftAsSuccess
+ ( liftA6 PropT
+ ( searchAttr NsXSL_FO "font-style" False isFontEmphasised )
+ ( searchAttr NsXSL_FO "font-weight" False isFontBold )
+ ( findPitch )
+ ( getAttr NsStyle "text-position" )
+ ( readUnderlineMode )
+ ( readStrikeThroughMode )
+ )
+ where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
+ isFontBold = ("normal",False):("bold",True)
+ :(map ((,True).show) ([100,200..900]::[Int]))
+
+readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
+readUnderlineMode = readLineMode "text-underline-mode"
+ "text-underline-style"
+
+readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
+readStrikeThroughMode = readLineMode "text-line-through-mode"
+ "text-line-through-style"
+
+readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode modeAttr styleAttr = proc x -> do
+ isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
+ mode <- lookupAttr' NsStyle modeAttr -< x
+ if isUL
+ then case mode of
+ Just m -> returnA -< Just m
+ Nothing -> returnA -< Just UnderlineModeNormal
+ else returnA -< Nothing
+ where
+ isLinePresent = [("none",False)] ++ map (,True)
+ [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted"
+ , "long-dash" , "solid" , "wave"
+ ]
+
+--
+readParaProperties :: StyleReader _x ParaProperties
+readParaProperties =
+ executeIn NsStyle "paragraph-properties" $ liftAsSuccess
+ ( liftA3 PropP
+ ( liftA2 readNumbering
+ ( isSet' NsText "number-lines" )
+ ( readAttr' NsText "line-number" )
+ )
+ ( liftA2 readIndentation
+ ( isSetWithDefault NsStyle "auto-text-indent" False )
+ ( getAttr NsXSL_FO "text-indent" )
+ )
+ ( getAttr NsXSL_FO "margin-left" )
+ )
+ where readNumbering (Just True) (Just n) = NumberingRestart n
+ readNumbering (Just True) _ = NumberingKeep
+ readNumbering _ _ = NumberingNone
+
+ readIndentation False indent = indent
+ readIndentation True _ = def
+
+----
+-- List styles
+----
+
+--
+readListStyle :: StyleReader _x (StyleName, ListStyle)
+readListStyle =
+ findAttr NsStyle "name"
+ >>?! keepingTheValue
+ ( liftA ListStyle
+ $ ( liftA3 SM.union3
+ ( readListLevelStyles NsText "list-level-style-number" LltNumbered )
+ ( readListLevelStyles NsText "list-level-style-bullet" LltBullet )
+ ( readListLevelStyles NsText "list-level-style-image" LltImage )
+ ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle
+ )
+--
+readListLevelStyles :: Namespace -> ElementName
+ -> ListLevelType
+ -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle)
+readListLevelStyles namespace elementName levelType =
+ ( tryAll namespace elementName (readListLevelStyle levelType)
+ >>^ SM.fromList
+ )
+
+--
+readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
+readListLevelStyle levelType = readAttr NsText "level"
+ >>?! keepingTheValue
+ ( liftA4 toListLevelStyle
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ )
+ where
+ toListLevelStyle _ p s LinfNone = ListLevelStyle LltBullet p s LinfNone
+ toListLevelStyle _ p s f@(LinfString _) = ListLevelStyle LltBullet p s f
+ toListLevelStyle t p s f = ListLevelStyle t p s f
+
+--
+chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
+chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
+ | otherwise = Just ( F.foldr1 select ls )
+ where
+ select ( ListLevelStyle t1 p1 s1 f1 )
+ ( ListLevelStyle t2 p2 s2 f2 )
+ = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2)
+ select' LltNumbered _ = LltNumbered
+ select' _ LltNumbered = LltNumbered
+ select' _ _ = LltBullet
+ selectLinf LinfNone f2 = f2
+ selectLinf f1 LinfNone = f1
+ selectLinf (LinfString _) f2 = f2
+ selectLinf f1 (LinfString _) = f1
+ selectLinf f1 _ = f1
+
+
+--------------------------------------------------------------------------------
+-- Tools to access style data
+--------------------------------------------------------------------------------
+
+--
+lookupStyle :: StyleName -> Styles -> Maybe Style
+lookupStyle name Styles{..} = M.lookup name stylesByName
+
+--
+lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties
+lookupDefaultStyle family Styles{..} = fromMaybe def
+ (M.lookup family defaultStyleMap)
+
+--
+lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties
+lookupDefaultStyle' Styles{..} family = fromMaybe def
+ (M.lookup family defaultStyleMap)
+
+--
+getListStyle :: Style -> Styles -> Maybe ListStyle
+getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles)
+
+--
+lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle
+lookupListStyleByName name Styles{..} = M.lookup name listStylesByName
+
+
+-- | Returns a chain of parent of the current style. The direct parent will
+-- be the first element of the list, followed by its parent and so on.
+-- The current style is not in the list.
+parents :: Style -> Styles -> [Style]
+parents style styles = unfoldr findNextParent style -- Ha!
+ where findNextParent Style{..}
+ = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName
+
+-- | Looks up the style family of the current style. Normally, every style
+-- should have one. But if not, all parents are searched.
+getStyleFamily :: Style -> Styles -> Maybe StyleFamily
+getStyleFamily style@Style{..} styles
+ = styleFamily
+ <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles)
+
+-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property
+-- values are specified. Instead, a value might be inherited from a
+-- parent style. This function makes this chain of inheritance
+-- concrete and easily accessible by encapsulating the necessary lookups.
+-- The resulting list contains the direct properties of the style as the first
+-- element, the ones of the direct parent element as the next one, and so on.
+--
+-- Note: There should also be default properties for each style family. These
+-- are @not@ contained in this list because properties inherited from
+-- parent elements take precedence over default styles.
+--
+-- This function is primarily meant to be used through convenience wrappers.
+--
+stylePropertyChain :: Style -> Styles -> [StyleProperties]
+stylePropertyChain style styles
+ = map styleProperties (style : parents style styles)
+
+--
+extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties]
+extendedStylePropertyChain [] _ = []
+extendedStylePropertyChain [style] styles = (stylePropertyChain style styles)
+ ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
+extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles)
+ ++ (extendedStylePropertyChain trace styles)
+-- Optimizable with Data.Sequence
+
+--
+extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties]
+extendedStylePropertyChain' [] _ = Nothing
+extendedStylePropertyChain' [style] styles = Just (
+ (stylePropertyChain style styles)
+ ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
+ )
+extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++)
+ (extendedStylePropertyChain' trace styles)
+
+--
+stylePropertyChain' :: Styles -> Style -> [StyleProperties]
+stylePropertyChain' = flip stylePropertyChain
+
+--
+getStylePropertyChain :: StyleName -> Styles -> [StyleProperties]
+getStylePropertyChain name styles = maybe []
+ (`stylePropertyChain` styles)
+ (lookupStyle name styles)
+
+--
+getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a]
+getPropertyChain extract style styles = catMaybes
+ $ map extract
+ $ stylePropertyChain style styles
+
+--
+textPropertyChain :: Style -> Styles -> [TextProperties]
+textPropertyChain = getPropertyChain textProperties
+
+--
+paraPropertyChain :: Style -> Styles -> [ParaProperties]
+paraPropertyChain = getPropertyChain paraProperties
+
+--
+getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a
+getTextProperty extract style styles = fmap extract
+ $ listToMaybe
+ $ textPropertyChain style styles
+
+--
+getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a
+getTextProperty' extract style styles = F.asum
+ $ map extract
+ $ textPropertyChain style styles
+
+--
+getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a
+getParaProperty extract style styles = fmap extract
+ $ listToMaybe
+ $ paraPropertyChain style styles
+
+-- | Lifts the reader into another readers' state.
+liftStyles :: (OdtConverterState s -> OdtConverterState Styles)
+ -> (OdtConverterState Styles -> OdtConverterState s )
+ -> XMLReader s x x
+liftStyles extract inject = switchState extract inject
+ $ convertingExtraState M.empty readAllStyles
+
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5c00a1b27..980f63504 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
-Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
+Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -42,12 +43,13 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
+import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
-import Control.Monad.Reader (Reader, runReader, ask, asks)
+import Control.Monad.Reader (Reader, runReader, ask, asks, local)
import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
@@ -56,20 +58,57 @@ import Data.Maybe (fromMaybe, isJust)
import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
+import Text.Pandoc.Error
+
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+ -> Either PandocError Pandoc
+readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
-type OrgParser = Parser [Char] OrgParserState
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
st <- getState
let meta = runF (orgStateMeta' st) st
- return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
+ let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+
+-- | Drop COMMENT headers and the document tree below those headers.
+dropCommentTrees :: [Block] -> [Block]
+dropCommentTrees [] = []
+dropCommentTrees blks@(b:bs) =
+ maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b
+
+-- | Return the level of a header starting a comment or :noexport: tree and
+-- Nothing otherwise.
+commentHeaderLevel :: Block -> Maybe Int
+commentHeaderLevel blk =
+ case blk of
+ (Header level _ ((Str "COMMENT"):_)) -> Just level
+ (Header level _ title) | hasNoExportTag title -> Just level
+ _ -> Nothing
+ where
+ hasNoExportTag :: [Inline] -> Bool
+ hasNoExportTag = any isNoExportTag
+
+ isNoExportTag :: Inline -> Bool
+ isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True
+ isNoExportTag _ = False
+
+-- | Drop blocks until a header on or above the given level is seen
+dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
+dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
+
+isHeaderLevelLowerEq :: Int -> Block -> Bool
+isHeaderLevelLowerEq n blk =
+ case blk of
+ (Header level _ _) -> n >= level
+ _ -> False
--
-- Parser State for Org
@@ -98,6 +137,9 @@ data OrgParserState = OrgParserState
, orgStateNotes' :: OrgNoteTable
}
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote
+
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
@@ -111,6 +153,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+instance HasQuoteContext st (Reader OrgParserLocal) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -134,19 +180,6 @@ recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-addBlockAttribute :: String -> String -> OrgParser ()
-addBlockAttribute key val = updateState $ \s ->
- let attrs = orgStateBlockAttributes s
- in s{ orgStateBlockAttributes = M.insert key val attrs }
-
-lookupBlockAttribute :: String -> OrgParser (Maybe String)
-lookupBlockAttribute key =
- M.lookup key . orgStateBlockAttributes <$> getState
-
-resetBlockAttributes :: OrgParser ()
-resetBlockAttributes = updateState $ \s ->
- s{ orgStateBlockAttributes = orgStateBlockAttributes def }
-
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
@@ -274,9 +307,18 @@ block = choice [ mempty <$ blanklines
, paraOrPlain
] <?> "block"
+--
+-- Block Attributes
+--
+
+-- | Parse optional block attributes (like #+TITLE or #+NAME)
optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
optionalAttributes parser = try $
resetBlockAttributes *> parseBlockAttributes *> parser
+ where
+ resetBlockAttributes :: OrgParser ()
+ resetBlockAttributes = updateState $ \s ->
+ s{ orgStateBlockAttributes = orgStateBlockAttributes def }
parseBlockAttributes :: OrgParser ()
parseBlockAttributes = do
@@ -301,6 +343,15 @@ lookupInlinesAttr attr = try $ do
(fmap Just . parseFromString parseInlines)
val
+addBlockAttribute :: String -> String -> OrgParser ()
+addBlockAttribute key val = updateState $ \s ->
+ let attrs = orgStateBlockAttributes s
+ in s{ orgStateBlockAttributes = M.insert key val attrs }
+
+lookupBlockAttribute :: String -> OrgParser (Maybe String)
+lookupBlockAttribute key =
+ M.lookup key . orgStateBlockAttributes <$> getState
+
--
-- Org Blocks (#+BEGIN_... / #+END_...)
@@ -356,11 +407,11 @@ exportsResults :: [(String, String)] -> Bool
exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
|| ("rundoc-exports", "both") `elem` attrs
-followingResultsBlock :: OrgParser (Maybe String)
+followingResultsBlock :: OrgParser (Maybe (F Blocks))
followingResultsBlock =
optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
*> blankline
- *> (unlines <$> many1 exampleLine))
+ *> block)
codeBlock :: BlockProperties -> OrgParser (F Blocks)
codeBlock blkProp = do
@@ -375,7 +426,7 @@ codeBlock blkProp = do
labelledBlck <- maybe (pure codeBlck)
(labelDiv codeBlck)
<$> lookupInlinesAttr "caption"
- let resultBlck = pure $ maybe mempty (exampleCode) resultsContent
+ let resultBlck = fromMaybe mempty resultsContent
return $ (if includeCode then labelledBlck else mempty)
<> (if includeResults then resultBlck else mempty)
where
@@ -614,8 +665,25 @@ parseFormat = try $ do
header :: OrgParser (F Blocks)
header = try $ do
level <- headerStart
- title <- inlinesTillNewline
- return $ B.header level <$> title
+ title <- manyTill inline (lookAhead headerEnd)
+ tags <- headerEnd
+ let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
+ return $ B.header level <$> inlns
+ where
+ tagToInlineF :: String -> F Inlines
+ tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
+
+headerEnd :: OrgParser [String]
+headerEnd = option [] headerTags <* newline
+
+headerTags :: OrgParser [String]
+headerTags = try $
+ skipSpaces
+ *> char ':'
+ *> many1 tag
+ <* skipSpaces
+ where tag = many1 (alphaNum <|> oneOf "@%#_")
+ <* char ':'
headerStart :: OrgParser Int
headerStart = try $
@@ -828,12 +896,14 @@ list :: OrgParser (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem bulletListStart)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.definitionList . fmap compactify'DL . sequence
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
bulletList :: OrgParser (F Blocks)
-bulletList = fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem bulletListStart)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.bulletList . fmap compactify' . sequence
+ <$> many1 (listItem (bulletListStart' $ Just n))
orderedList :: OrgParser (F Blocks)
orderedList = fmap B.orderedList . fmap compactify' . sequence
@@ -845,10 +915,27 @@ genericListStart listMarker = try $
(+) <$> (length <$> many spaceChar)
<*> (length <$> listMarker <* many1 spaceChar)
--- parses bullet list start and returns its length (excl. following whitespace)
+-- parses bullet list marker. maybe we know the indent level
bulletListStart :: OrgParser Int
-bulletListStart = genericListStart bulletListMarker
- where bulletListMarker = pure <$> oneOf "*-+"
+bulletListStart = bulletListStart' Nothing
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- length <$> many spaceChar
+ when (ind == 0) $ notFollowedBy (char '*')
+ oneOf bullets
+ many1 spaceChar
+ return (ind + 1)
+ -- Unindented lists are legal, but they can't use '*' bullets
+ -- We return n to maintain compatibility with the generic listItem
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ when (n == 1) $ notFollowedBy (char '*')
+ oneOf bullets
+ many1 spaceChar
+ return n
+
+bullets :: String
+bullets = "*+-"
orderedListStart :: OrgParser Int
orderedListStart = genericListStart orderedListMarker
@@ -918,6 +1005,7 @@ inline =
, subscript
, superscript
, inlineLaTeX
+ , smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
@@ -927,7 +1015,7 @@ parseInlines = trimInlinesF . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
whitespace :: OrgParser (F Inlines)
@@ -1054,13 +1142,13 @@ linkOrImage = explicitOrImageLink
explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink = try $ do
char '['
- srcF <- applyCustomLinkFormat =<< linkTarget
+ srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
return $ do
src <- srcF
- if isImageFilename src && isImageFilename title
+ if isImageFilename title
then pure $ B.link src "" $ B.image title mempty mempty
else linkToInlinesF src =<< title'
@@ -1087,6 +1175,9 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
+possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
@@ -1094,27 +1185,38 @@ applyCustomLinkFormat link = do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
return $ maybe link ($ drop 1 rest) formatter
-
+-- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind
+-- of parsing.
linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s@('#':_) = pure . B.link s ""
-linkToInlinesF s
- | isImageFilename s = const . pure $ B.image s "" ""
- | isUri s = pure . B.link s ""
- | isRelativeUrl s = pure . B.link s ""
-linkToInlinesF s = \title -> do
- anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then pure $ B.link ('#':s) "" title
- else pure $ B.emph title
-
-isRelativeUrl :: String -> Bool
-isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+linkToInlinesF s =
+ case s of
+ "" -> pure . B.link "" ""
+ ('#':_) -> pure . B.link s ""
+ _ | isImageFilename s -> const . pure $ B.image s "" ""
+ _ | isFileLink s -> pure . B.link (dropLinkType s) ""
+ _ | isUri s -> pure . B.link s ""
+ _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) ""
+ _ | isRelativeFilePath s -> pure . B.link s ""
+ _ -> internalLink s
+
+isFileLink :: String -> Bool
+isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s)
+
+dropLinkType :: String -> String
+dropLinkType = tail . snd . break (== ':')
+
+isRelativeFilePath :: String -> Bool
+isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) &&
+ (':' `notElem` s)
isUri :: String -> Bool
isUri s = let (scheme, path) = break (== ':') s
- in all (\c -> isAlphaNum c || c `elem` ".-") scheme
+ in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme
&& not (null path)
+isAbsoluteFilePath :: String -> Bool
+isAbsoluteFilePath = ('/' ==) . head
+
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
@@ -1124,6 +1226,13 @@ isImageFilename filename =
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
+internalLink :: String -> Inlines -> F Inlines
+internalLink link title = do
+ anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
+ if anchorB
+ then return $ B.link ('#':link) "" title
+ else return $ B.emph title
+
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
@@ -1148,7 +1257,7 @@ solidify :: String -> String
solidify = map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
- | c `elem` "_.-:" = c
+ | c `elem` ("_.-:" :: String) = c
| otherwise = '-'
-- | Parses an inline code block and marks it as an babel block.
@@ -1203,12 +1312,16 @@ displayMath :: OrgParser (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
+
+updatePositions :: Char
+ -> OrgParser (Char)
+updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
+
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
- where updatePositions c
- | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
- | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
- | otherwise = return c
emphasisBetween :: Char
-> OrgParser (F Inlines)
@@ -1387,7 +1500,8 @@ simpleSubOrSuperString = try $
inlineLaTeX :: OrgParser (F Inlines)
inlineLaTeX = try $ do
cmd <- inlineLaTeXCommand
- maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
+ maybe mzero returnF $
+ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
where
parseAsMath :: String -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
@@ -1395,6 +1509,11 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: String -> Maybe Inlines
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+ parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
+ -- dropWhileEnd would be nice here, but it's not available before base 4.5
+ where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1
+
state :: ParserState
state = def{ stateOptions = def{ readerParseRaw = True }}
@@ -1413,3 +1532,31 @@ inlineLaTeXCommand = try $ do
count len anyChar
return cs
_ -> mzero
+
+smart :: OrgParser (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return <$>) [orgApostrophe, dash, ellipses])
+ where orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (fmap B.doubleQuoted . trimInlinesF $ contents))
+ <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e5eccb116..678eecc52 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST (
- readRST
+ readRST,
+ readRSTWithWarnings
) where
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero, mplus )
+import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intersperse, intercalate,
- transpose, sort, deleteFirstsBy, isSuffixOf )
+ transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
+import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure)
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit)
+import Data.Char (toLower, isHexDigit, isSpace)
+
+import Text.Pandoc.Error
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
+readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+
type RSTParser = Parser [Char] ParserState
--
@@ -202,7 +209,7 @@ rawFieldListItem minIndent = try $ do
fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
- let term = B.str name
+ term <- parseInlineFromString name
contents <- parseFromString parseBlocks raw
optional blanklines
return (term, [contents])
@@ -222,8 +229,7 @@ fieldList = try $ do
lineBlock :: RSTParser Blocks
lineBlock = try $ do
lines' <- lineBlockLines
- lines'' <- mapM (parseFromString
- (trimInlines . mconcat <$> many inline)) lines'
+ lines'' <- mapM parseInlineFromString lines'
return $ B.para (mconcat $ intersperse B.linebreak lines'')
--
@@ -335,6 +341,13 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
+quotedBlock :: Parser [Char] st [Char]
+quotedBlock = try $ do
+ quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+ lns <- many1 $ lookAhead (char quote) >> anyLine
+ optional blanklines
+ return $ unlines lns
+
codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
@@ -342,7 +355,8 @@ codeBlock :: Parser [Char] st Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Parser [Char] st Blocks
-codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock
+codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
+ (indentedBlock <|> quotedBlock)
lhsCodeBlock :: RSTParser Blocks
lhsCodeBlock = try $ do
@@ -513,7 +527,6 @@ directive = try $ do
-- TODO: line-block, parsed-literal, table, csv-table, list-table
-- date
-- include
--- class
-- title
directive' :: RSTParser Blocks
directive' = do
@@ -535,39 +548,33 @@ directive' = do
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
"container" -> parseFromString parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
- parseFromString (trimInlines . mconcat <$> many inline)
- (trim top)
+ parseInlineFromString (trim top)
"unicode" -> B.para <$> -- consumed by substKey
- parseFromString (trimInlines . mconcat <$> many inline)
- (trim $ unicodeTransform top)
+ parseInlineFromString (trim $ unicodeTransform top)
"compound" -> parseFromString parseBlocks body'
"pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
"epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
"highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
- "rubric" -> B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ "rubric" -> B.para . B.strong <$> parseInlineFromString top
_ | label `elem` ["attention","caution","danger","error","hint",
"important","note","tip","warning"] ->
do let tit = B.para $ B.strong $ B.str label
bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
return $ B.blockQuote $ tit <> bod
"admonition" ->
- do tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ do tit <- B.para . B.strong <$> parseInlineFromString top
bod <- parseFromString parseBlocks body'
return $ B.blockQuote $ tit <> bod
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
- tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline)
+ tit <- B.para . B.strong <$> parseInlineFromString
(trim top ++ if null subtit
then ""
else (": " ++ subtit))
bod <- parseFromString parseBlocks body'
return $ B.blockQuote $ tit <> bod
"topic" ->
- do tit <- B.para . B.strong <$> parseFromString
- (trimInlines . mconcat <$> many inline) top
+ do tit <- B.para . B.strong <$> parseInlineFromString top
bod <- parseFromString parseBlocks body'
return $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
@@ -594,38 +601,69 @@ directive' = do
Just t -> B.link (escapeURI $ trim t) ""
$ B.image src "" alt
Nothing -> B.image src "" alt
- _ -> return mempty
+ "class" -> do
+ let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ -- directive content or the first immediately following element
+ children <- case body of
+ "" -> block
+ _ -> parseFromString parseBlocks body'
+ return $ B.divWith attrs children
+ other -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown directive: " ++ other
+ return mempty
-- TODO:
-- - Silently ignores illegal fields
--- - Silently drops classes
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- baseRole <- case M.lookup parentRole customRoles of
- Just (base, _, _) -> return base
- Nothing -> return parentRole
-
- let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
- annotate = maybe id addLanguage $
- if baseRole == "code"
+ let (baseRole, baseFmt, baseAttr) =
+ maybe (parentRole, Nothing, nullAttr) id $
+ M.lookup parentRole customRoles
+ fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
+ annotate :: [String] -> [String]
+ annotate = maybe id (:) $
+ if parentRole == "code"
then lookup "language" fields
else Nothing
+ attr = let (ident, classes, keyValues) = baseAttr
+ -- nub in case role name & language class are the same
+ in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+ -- warn about syntax we ignore
+ flip mapM_ fields $ \(key, _) -> case key of
+ "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "ignoring :language: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :code:"
+ "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ "ignoring :format: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
+ ": in definition of role :" ++ role ++ ": in"
+ when (parentRole == "raw" && countKeys "format" > 1) $
+ addWarning Nothing $
+ "ignoring :format: fields after the first in the definition of role :"
+ ++ role ++": in"
+ when (parentRole == "code" && countKeys "language" > 1) $
+ addWarning Nothing $
+ "ignoring :language: fields after the first in the definition of role :"
+ ++ role ++": in"
updateState $ \s -> s {
stateRstCustomRoles =
- M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
+ M.insert role (baseRole, fmt, attr) customRoles
}
return $ B.singleton Null
where
- addLanguage lang (ident, classes, keyValues) =
- (ident, "sourceCode" : lang : classes, keyValues)
+ countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
- (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+
-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
@@ -666,7 +704,7 @@ extractCaption = do
toChunks :: String -> [String]
toChunks = dropWhile null
. map (trim . unlines)
- . splitBy (all (`elem` " \t")) . lines
+ . splitBy (all (`elem` (" \t" :: String))) . lines
codeblock :: Maybe String -> String -> String -> RSTParser Blocks
codeblock numberLines lang body =
@@ -734,7 +772,7 @@ simpleReferenceName' :: Parser [Char] st String
simpleReferenceName' = do
x <- alphaNum
xs <- many $ alphaNum
- <|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
+ <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
simpleReferenceName :: Parser [Char] st Inlines
@@ -917,6 +955,9 @@ inline = choice [ whitespace
, escapedChar
, symbol ] <?> "inline"
+parseInlineFromString :: String -> RSTParser Inlines
+parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+
hyphens :: RSTParser Inlines
hyphens = do
result <- many1 (char '-')
@@ -985,21 +1026,23 @@ renderRole contents fmt role attr = case role of
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
"PEP" -> return $ pepLink contents
- "literal" -> return $ B.str contents
+ "literal" -> return $ B.codeWith attr contents
"math" -> return $ B.math contents
"title-reference" -> titleRef contents
"title" -> titleRef contents
"t" -> titleRef contents
- "code" -> return $ B.codeWith attr contents
+ "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
+ "span" -> return $ B.spanWith attr $ B.str contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
- customRole <- stateRstCustomRoles <$> getState
- case M.lookup custom customRole of
- Just (_, newFmt, inherit) -> let
- fmtStr = fmt `mplus` newFmt
- (newRole, newAttr) = inherit attr
- in renderRole contents fmtStr newRole newAttr
- Nothing -> return $ B.str contents -- Undefined role
+ customRoles <- stateRstCustomRoles <$> getState
+ case M.lookup custom customRoles of
+ Just (newRole, newFmt, newAttr) ->
+ renderRole contents newFmt newRole newAttr
+ Nothing -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
+ return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@@ -1008,11 +1051,14 @@ renderRole contents fmt role attr = case role of
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
-roleNameEndingIn :: RSTParser Char -> RSTParser String
-roleNameEndingIn end = many1Till (letter <|> char '-') end
+addClass :: String -> Attr -> Attr
+addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+
+roleName :: RSTParser String
+roleName = many1 (letter <|> char '-')
roleMarker :: RSTParser String
-roleMarker = char ':' *> roleNameEndingIn (char ':')
+roleMarker = char ':' *> roleName <* char ':'
roleBefore :: RSTParser (String,String)
roleBefore = try $ do
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
new file mode 100644
index 000000000..07b414431
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -0,0 +1,527 @@
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.TWiki
+ Copyright : Copyright (C) 2014 Alexander Sulfrian
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ Stability : alpha
+ Portability : portable
+
+Conversion of twiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.TWiki ( readTWiki
+ , readTWikiWithWarnings
+ ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
+import Data.Monoid (Monoid, mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
+import Text.Printf (printf)
+import Debug.Trace (trace)
+import Text.Pandoc.XML (fromEntities)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Data.Char (isAlphaNum)
+import qualified Data.Foldable as F
+import Text.Pandoc.Error
+
+-- | Read twiki from an input string and return a Pandoc document.
+readTWiki :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Either PandocError Pandoc
+readTWiki opts s =
+ (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
+
+readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Either PandocError (Pandoc, [String])
+readTWikiWithWarnings opts s =
+ (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
+ where parseTWikiWithWarnings = do
+ doc <- parseTWiki
+ warnings <- stateWarnings <$> getState
+ return (doc, warnings)
+
+type TWParser = Parser [Char] ParserState
+
+--
+-- utility functions
+--
+
+tryMsg :: String -> TWParser a -> TWParser a
+tryMsg msg p = try p <?> msg
+
+skip :: TWParser a -> TWParser ()
+skip parser = parser >> return ()
+
+nested :: TWParser a -> TWParser a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: String -> TWParser (Attr, String)
+htmlElement tag = tryMsg tag $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = skip $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: String -> TWParser a -> TWParser [a]
+parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+
+--
+-- main parser
+--
+
+parseTWiki :: TWParser Pandoc
+parseTWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: TWParser B.Blocks
+block = do
+ tr <- getOption readerTrace
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ when tr $
+ trace (printf "line %d: %s" (sourceLine pos)
+ (take 60 $ show $ B.toList res)) (return ())
+ return res
+
+blockElements :: TWParser B.Blocks
+blockElements = choice [ separator
+ , header
+ , verbatim
+ , literal
+ , list ""
+ , table
+ , blockQuote
+ , noautolink
+ ]
+
+separator :: TWParser B.Blocks
+separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
+
+header :: TWParser B.Blocks
+header = tryMsg "header" $ do
+ string "---"
+ level <- many1 (char '+') >>= return . length
+ guard $ level <= 6
+ classes <- option [] $ string "!!" >> return ["unnumbered"]
+ skipSpaces
+ content <- B.trimInlines . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", classes, []) content
+ return $ B.headerWith attr level $ content
+
+verbatim :: TWParser B.Blocks
+verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
+ >>= return . (uncurry B.codeBlockWith)
+
+literal :: TWParser B.Blocks
+literal = htmlElement "literal" >>= return . rawBlock
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+list :: String -> TWParser B.Blocks
+list prefix = choice [ bulletList prefix
+ , orderedList prefix
+ , definitionList prefix]
+
+definitionList :: String -> TWParser B.Blocks
+definitionList prefix = tryMsg "definitionList" $ do
+ indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ return $ B.definitionList elements
+ where
+ parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks])
+ parseDefinitionListItem indent = do
+ string (indent ++ "$ ") >> skipSpaces
+ term <- many1Till inline $ string ": "
+ line <- listItemLine indent $ string "$ "
+ return $ (mconcat term, [line])
+
+bulletList :: String -> TWParser B.Blocks
+bulletList prefix = tryMsg "bulletList" $
+ parseList prefix (char '*') (char ' ')
+
+orderedList :: String -> TWParser B.Blocks
+orderedList prefix = tryMsg "orderedList" $
+ parseList prefix (oneOf "1iIaA") (string ". ")
+
+parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks
+parseList prefix marker delim = do
+ (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ return $ case style of
+ '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
+ 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
+ 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
+ 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
+ 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
+ _ -> B.bulletList blocks
+ where
+ listStyle = do
+ indent <- many1 $ string " "
+ style <- marker
+ return (concat indent, style)
+
+parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks
+parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+
+listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks
+listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+ where
+ lineContent = do
+ content <- anyLine
+ continuation <- optionMaybe listContinuation
+ return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ filterSpaces = reverse . dropWhile (== ' ') . reverse
+ listContinuation = notFollowedBy (string prefix >> marker) >>
+ string " " >> lineContent
+ parseContent = parseFromString $ many1 $ nestedList <|> parseInline
+ parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
+ return . B.plain . mconcat
+ nestedList = list prefix
+ lastNewline = try $ char '\n' <* eof
+ newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
+
+table :: TWParser B.Blocks
+table = try $ do
+ tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ rows <- many1 tableParseRow
+ return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
+ where
+ buildTable caption rows (aligns, heads)
+ = B.table caption aligns heads rows
+ align rows = replicate (columCount rows) (AlignDefault, 0)
+ columns rows = replicate (columCount rows) mempty
+ columCount rows = length $ head rows
+
+tableParseHeader :: TWParser ((Alignment, Double), B.Blocks)
+tableParseHeader = try $ do
+ char '|'
+ leftSpaces <- many spaceChar >>= return . length
+ char '*'
+ content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
+ char '*'
+ rightSpaces <- many spaceChar >>= return . length
+ optional tableEndOfRow
+ return (tableAlign leftSpaces rightSpaces, content)
+ where
+ tableAlign left right
+ | left >= 2 && left == right = (AlignCenter, 0)
+ | left > right = (AlignRight, 0)
+ | otherwise = (AlignLeft, 0)
+
+tableParseRow :: TWParser [B.Blocks]
+tableParseRow = many1Till tableParseColumn newline
+
+tableParseColumn :: TWParser B.Blocks
+tableParseColumn = char '|' *> skipSpaces *>
+ tableColumnContent (skipSpaces >> char '|')
+ <* skipSpaces <* optional tableEndOfRow
+
+tableEndOfRow :: TWParser Char
+tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
+
+tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks
+tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+ where
+ content = continuation <|> inline
+ continuation = try $ char '\\' >> newline >> return mempty
+
+blockQuote :: TWParser B.Blocks
+blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+
+noautolink :: TWParser B.Blocks
+noautolink = do
+ (_, content) <- htmlElement "noautolink"
+ st <- getState
+ setState $ st{ stateAllowLinks = False }
+ blocks <- try $ parseContent content
+ setState $ st{ stateAllowLinks = True }
+ return $ mconcat blocks
+ where
+ parseContent = parseFromString $ many $ block
+
+para :: TWParser B.Blocks
+para = many1Till inline endOfParaElement >>= return . result . mconcat
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> skip blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+
+--
+-- inline parsers
+--
+
+inline :: TWParser B.Inlines
+inline = choice [ whitespace
+ , br
+ , macro
+ , strong
+ , strongHtml
+ , strongAndEmph
+ , emph
+ , emphHtml
+ , boldCode
+ , smart
+ , link
+ , htmlComment
+ , code
+ , codeHtml
+ , nop
+ , autoLink
+ , str
+ , symbol
+ ] <?> "inline"
+
+whitespace :: TWParser B.Inlines
+whitespace = (lb <|> regsp) >>= return
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: TWParser B.Inlines
+br = try $ string "%BR%" >> return B.linebreak
+
+linebreak :: TWParser B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b
+enclosed sep p = between sep (try $ sep <* endMarker) p
+ where
+ endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endSpace = (spaceChar <|> newline) >> return B.space
+
+macro :: TWParser B.Inlines
+macro = macroWithParameters <|> withoutParameters
+ where
+ withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ emptySpan name = buildSpan name [] mempty
+
+macroWithParameters :: TWParser B.Inlines
+macroWithParameters = try $ do
+ char '%'
+ name <- macroName
+ (content, kvs) <- attributes
+ char '%'
+ return $ buildSpan name kvs $ B.str content
+
+buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan className kvs = B.spanWith attrs
+ where
+ attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
+ additionalClasses = maybe [] words $ lookup "class" kvs
+ kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
+
+macroName :: TWParser String
+macroName = do
+ first <- letter
+ rest <- many $ alphaNum <|> char '_'
+ return (first:rest)
+
+attributes :: TWParser (String, [(String, String)])
+attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
+ return . foldr (either mkContent mkKvs) ([], [])
+ where
+ spnl = skipMany (spaceChar <|> newline)
+ mkContent c ([], kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkKvs kv (cont, rest) = (cont, (kv : rest))
+
+attribute :: TWParser (Either String (String, String))
+attribute = withKey <|> withoutKey
+ where
+ withKey = try $ do
+ key <- macroName
+ char '='
+ parseValue False >>= return . (curry Right key)
+ withoutKey = try $ parseValue True >>= return . Left
+ parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withoutQuotes allowSpaces
+ | allowSpaces == True = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
+
+nestedInlines :: Show a => TWParser a -> TWParser B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* (notFollowedBy end)
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+strong :: TWParser B.Inlines
+strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+
+strongHtml :: TWParser B.Inlines
+strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
+ >>= return . B.strong . mconcat
+
+strongAndEmph :: TWParser B.Inlines
+strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+
+emph :: TWParser B.Inlines
+emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+
+emphHtml :: TWParser B.Inlines
+emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
+ >>= return . B.emph . mconcat
+
+nestedString :: Show a => TWParser a -> TWParser String
+nestedString end = innerSpace <|> (count 1 nonspaceChar)
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+boldCode :: TWParser B.Inlines
+boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+
+htmlComment :: TWParser B.Inlines
+htmlComment = htmlTag isCommentTag >> return mempty
+
+code :: TWParser B.Inlines
+code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+
+codeHtml :: TWParser B.Inlines
+codeHtml = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ B.codeWith attrs $ fromEntities content
+
+autoLink :: TWParser B.Inlines
+autoLink = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ (text, url) <- parseLink
+ guard $ checkLink (head $ reverse url)
+ return $ makeLink (text, url)
+ where
+ parseLink = notFollowedBy nop >> (uri <|> emailAddress)
+ makeLink (text, url) = B.link url "" $ B.str text
+ checkLink c
+ | c == '/' = True
+ | otherwise = isAlphaNum c
+
+str :: TWParser B.Inlines
+str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+
+nop :: TWParser B.Inlines
+nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+ where
+ exclamation = char '!'
+ nopTag = stringAnyCase "<nop>"
+ followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+
+symbol :: TWParser B.Inlines
+symbol = count 1 nonspaceChar >>= return . B.str
+
+smart :: TWParser B.Inlines
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice [ apostrophe
+ , dash
+ , ellipses
+ ]
+
+singleQuoted :: TWParser B.Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ many1Till inline singleQuoteEnd >>=
+ (return . B.singleQuoted . B.trimInlines . mconcat)
+
+doubleQuoted :: TWParser B.Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ return (B.doubleQuoted $ B.trimInlines contents))
+ <|> (return $ (B.str "\8220") B.<> contents)
+
+link :: TWParser B.Inlines
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ B.link url title content
+
+linkText :: TWParser (String, String, B.Inlines)
+linkText = do
+ string "[["
+ url <- many1Till anyChar (char ']')
+ content <- option [B.str url] linkContent
+ char ']'
+ return (url, "", mconcat content)
+ where
+ linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ parseLinkContent = parseFromString $ many1 inline
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 3fee3051e..e5778b123 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.TeXMath
- Copyright : Copyright (C) 2007-2014 John MacFarlane
+ Copyright : Copyright (C) 2007-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ee64e8f2a..ec1da896d 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2010-2014 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
and John MacFarlane
This program is free software; you can redistribute it and/or modify
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Textile
- Copyright : Copyright (C) 2010-2014 Paul Rivier and John MacFarlane
+ Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Paul Rivier <paul*rivier#demotera*com>
@@ -57,6 +57,7 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
+import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
@@ -67,11 +68,12 @@ import Text.Printf
import Control.Applicative ((<$>), (*>), (<*), (<$))
import Data.Monoid
import Debug.Trace (trace)
+import Text.Pandoc.Error
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readTextile opts s =
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
@@ -325,33 +327,30 @@ para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
-- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState Blocks
-tableCell = do
- c <- many1 (noneOf "|\n")
- content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
+tableCell :: Bool -> Parser [Char] ParserState Blocks
+tableCell headerCell = try $ do
+ char '|'
+ when headerCell $ () <$ string "_."
+ notFollowedBy blankline
+ raw <- trim <$>
+ many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
+ content <- mconcat <$> parseFromString (many inline) raw
return $ B.plain content
-- | A table row is made of many table cells
tableRow :: Parser [Char] ParserState [Blocks]
-tableRow = try $ ( char '|' *>
- (endBy1 tableCell (optional blankline *> char '|')) <* newline)
-
--- | Many table rows
-tableRows :: Parser [Char] ParserState [[Blocks]]
-tableRows = many1 tableRow
+tableRow = many1 (tableCell False) <* char '|' <* newline
--- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: Parser [Char] ParserState [Blocks]
-tableHeaders = let separator = (try $ string "|_.") in
- try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
+tableHeader :: Parser [Char] ParserState [Blocks]
+tableHeader = many1 (tableCell True) <* char '|' <* newline
-- | A table with an optional header. Current implementation can
-- handle tables with and without header, but will parse cells
-- alignment attributes as content.
table :: Parser [Char] ParserState Blocks
table = try $ do
- headers <- option mempty tableHeaders
- rows <- tableRows
+ headers <- option mempty $ tableHeader
+ rows <- many1 tableRow
blanklines
let nbOfCols = max (length headers) (length $ head rows)
return $ B.table mempty
@@ -607,8 +606,8 @@ langAttr = do
-- | Parses material surrounded by a parser.
surrounded :: Parser [Char] st t -- ^ surrounding parser
- -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
- -> Parser [Char] st [a]
+ -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] st [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 6f8c19ac7..304d6d4c5 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -48,11 +48,12 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Control.Monad (void, guard, when)
import Data.Default
import Control.Monad.Reader (Reader, runReader, asks)
+import Text.Pandoc.Error
import Data.Time.LocalTime (getZonedTime)
import Text.Pandoc.Compat.Directory(getModificationTime)
import Data.Time.Format (formatTime)
-import System.Locale (defaultTimeLocale)
+import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import System.IO.Error (catchIOError)
type T2T = ParserT String ParserState (Reader T2TMeta)
@@ -83,12 +84,12 @@ getT2TMeta inps out = do
return $ T2TMeta curDate curMtime (intercalate ", " inps) out
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc
readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
-readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc
readTxt2TagsNoMacros = readTxt2Tags def
parseT2T :: T2T Pandoc
@@ -576,4 +577,3 @@ atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
ignoreSpacesCap :: T2T String -> T2T String
ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
-