diff options
author | ickc <ickc@users.noreply.github.com> | 2016-05-09 19:24:40 -0700 |
---|---|---|
committer | ickc <ickc@users.noreply.github.com> | 2016-05-09 19:24:40 -0700 |
commit | 0ae60a91535a8f55f3061f36fdb3a54f6ddda85d (patch) | |
tree | f0c74ae411967ed5f67bd8b0bdf175ee5ca964ab /src | |
parent | 846fa8704618e7e544313f5b3b627ccb6e65b550 (diff) | |
parent | f7601297f0ff184a59efdc3ea279137fc6012eef (diff) | |
download | pandoc-0ae60a91535a8f55f3061f36fdb3a54f6ddda85d.tar.gz |
Merge pull request #1 from jgm/master
Merge from jgm's master
Diffstat (limited to 'src')
34 files changed, 418 insertions, 271 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 4b2397eb9..0330c46e2 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -223,6 +223,14 @@ mkStringReaderWithWarnings r = StringReader $ \o s -> mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) +mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader +mkBSReaderWithWarnings r = ByteStringReader $ \o s -> + case r o s of + Left err -> return $ Left err + Right (doc, mediaBag, warnings) -> do + mapM_ warn warnings + return $ Right (doc, mediaBag) + -- | Association list of formats and readers. readers :: [(String, Reader)] readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) @@ -243,7 +251,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) ,("twiki" , mkStringReader readTWiki) - ,("docx" , mkBSReader readDocx) + ,("docx" , mkBSReaderWithWarnings readDocxWithWarnings) ,("odt" , mkBSReader readOdt) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) @@ -283,6 +291,8 @@ writers = [ writeHtmlString o{ writerSlideVariant = RevealJsSlides , writerHtml5 = True }) ,("docbook" , PureStringWriter writeDocbook) + ,("docbook5" , PureStringWriter $ \o -> + writeDocbook o{ writerDocbook5 = True }) ,("opml" , PureStringWriter writeOPML) ,("opendocument" , PureStringWriter writeOpenDocument) ,("latex" , PureStringWriter writeLaTeX) diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index c183458e4..8eb1ba663 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 0a4e08175..792098b35 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,6 +1,6 @@ {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Error - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index ecfef1832..1b9e92ae2 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 571fdd665..90dfbb5fb 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- - Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2015 John MacFarlane +Copyright : Copyright (C) 2011-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 6fd9ac373..1164e04b3 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2015 John MacFarlane + Copyright : Copyright (C) 2011-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 333f499fb..701cd8bd1 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,6 +1,6 @@ {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -264,6 +264,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges + , readerFileScope :: Bool -- ^ Parse before combining } deriving (Show, Read, Data, Typeable, Generic) instance Default ReaderOptions @@ -280,6 +281,7 @@ instance Default ReaderOptions , readerDefaultImageExtension = "" , readerTrace = False , readerTrackChanges = AcceptChanges + , readerFileScope = False } -- @@ -355,6 +357,7 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerDocbook5 :: Bool -- ^ Produce DocBook5 , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -401,6 +404,7 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc + , writerDocbook5 = False , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index da4ee4e33..4dbe1f000 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-} {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 325231846..7bf827019 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -5,7 +5,7 @@ , MultiParamTypeClasses , FlexibleInstances #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 88b7dd09e..f3ef0ef10 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,6 +1,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index e5245638d..bc71f1392 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index eb71d8dd8..9c7c3b264 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -50,8 +50,7 @@ implemented, [-] means partially implemented): * Inlines - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) + - [X] Emph (italics and underline both read as Emph) - [X] Strong - [X] Strikeout - [X] Superscript @@ -62,16 +61,16 @@ implemented, [-] means partially implemented): - [X] Code (styled with `VerbatimChar`) - [X] Space - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math + - [X] Math - [X] Link (links to an arbitrary bookmark create a span with the target as id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) + - [X] Image - [X] Note (Footnotes and Endnotes are silently combined.) -} module Text.Pandoc.Readers.Docx - ( readDocx + ( readDocxWithWarnings + , readDocx ) where import Codec.Archive.Zip @@ -98,14 +97,23 @@ import qualified Data.Sequence as Seq (null) import Text.Pandoc.Error import Text.Pandoc.Compat.Except +readDocxWithWarnings :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag, [String]) +readDocxWithWarnings opts bytes + | Right archive <- toArchiveOrFail bytes + , Right (docx, warnings) <- archiveToDocxWithWarnings archive = do + (meta, blks, mediaBag) <- docxToOutput opts docx + return (Pandoc meta blks, mediaBag, warnings) +readDocxWithWarnings _ _ = + Left (ParseFailure "couldn't parse docx file") + readDocx :: ReaderOptions -> B.ByteString -> Either PandocError (Pandoc, MediaBag) -readDocx opts bytes = - case archiveToDocx (toArchive bytes) of - Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) - <$> (docxToOutput opts docx) - Left _ -> Left (ParseFailure "couldn't parse docx file") +readDocx opts bytes = do + (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes + return (pandoc, mediaBag) data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index eec8b12c9..7265ef8dd 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Row(..) , Cell(..) , archiveToDocx + , archiveToDocxWithWarnings ) where import Codec.Archive.Zip import Text.XML.Light @@ -60,6 +61,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader +import Control.Monad.State import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except @@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show +data ReaderState = ReaderState { stateWarnings :: [String] } + deriving Show + + data DocxError = DocxError | WrongElem deriving Show instance Error DocxError where noMsg = WrongElem -type D = ExceptT DocxError (Reader ReaderEnv) +type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) -runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx) re +runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState) +runD dx re rs = runState (runReaderT (runExceptT dx) re) rs maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -257,7 +263,10 @@ type Author = String type ChangeDate = String archiveToDocx :: Archive -> Either DocxError Docx -archiveToDocx archive = do +archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive + +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive @@ -265,8 +274,12 @@ archiveToDocx archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles InDocument - doc <- runD (archiveToDocument archive) rEnv - return $ Docx doc + rState = ReaderState { stateWarnings = [] } + (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState + case eitherDoc of + Right doc -> Right (Docx doc, stateWarnings st) + Left e -> Left e + archiveToDocument :: Archive -> D Document @@ -576,12 +589,14 @@ elemToBodyPart ns element sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) - case pNumInfo parstyle of - Just (numId, lvl) -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> return $ Paragraph parstyle parparts + -- Word uses list enumeration for numbered headings, so we only + -- want to infer a list from the styles if it is NOT a heading. + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + num <- asks envNumbering + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts + _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -646,14 +661,14 @@ elemToParPart ns element | isElem ns "w" "r" element = elemToRun ns element >>= (\r -> return $ PlainRun r) elemToParPart ns element - | isElem ns "w" "ins" element + | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element , Just cId <- findAttr (elemName ns "w" "id") element , Just cAuthor <- findAttr (elemName ns "w" "author") element , Just cDate <- findAttr (elemName ns "w" "date") element = do runs <- mapD (elemToRun ns) (elChildren element) return $ Insertion cId cAuthor cDate runs elemToParPart ns element - | isElem ns "w" "del" element + | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element , Just cId <- findAttr (elemName ns "w" "id") element , Just cAuthor <- findAttr (elemName ns "w" "author") element , Just cDate <- findAttr (elemName ns "w" "date") element = do @@ -702,36 +717,58 @@ elemToExtent drawingElem = getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem >>= findAttr (QName at Nothing Nothing) >>= safeRead -elemToRun :: NameSpaces -> Element -> D Run -elemToRun ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + +childElemToRun :: NameSpaces -> Element -> D Run +childElemToRun ns element + | isElem ns "w" "drawing" element = let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem) + (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element) Nothing -> throwError WrongElem -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "footnoteReference") element - , Just fnId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "footnoteReference" element + , Just fnId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "endnoteReference") element - , Just enId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "endnoteReference" element + , Just enId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] +childElemToRun _ _ = throwError WrongElem + +elemToRun :: NameSpaces -> Element -> D Run +elemToRun ns element + | isElem ns "w" "r" element + , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element = + do let choices = findChildren (elemName ns "mc" "Choice") altCont + choiceChildren = map head $ filter (not . null) $ map elChildren choices + outputs <- mapD (childElemToRun ns) choiceChildren + case outputs of + r : _ -> return r + [] -> throwError WrongElem +elemToRun ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + childElemToRun ns drawingElem +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "footnoteReference") element = + childElemToRun ns ref +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "endnoteReference") element = + childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element = do runElems <- elemToRunElems ns element @@ -940,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem setFont :: Maybe Font -> ReaderEnv -> ReaderEnv setFont f s = s{envFont = f} + diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 07d282708..144ba9ca2 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -19,7 +19,7 @@ import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B -import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry +import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry , findEntryByPath, Entry) import qualified Data.ByteString.Lazy as BL (ByteString) import System.FilePath ( takeFileName, (</>), dropFileName, normalise @@ -39,7 +39,9 @@ import Text.Pandoc.Error type Items = M.Map String (FilePath, MimeType) readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) -readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) +readEPUB opts bytes = case toArchiveOrFail bytes of + Right archive -> runEPUB $ archiveToEPUB opts $ archive + Left _ -> Left $ ParseFailure "Couldn't extract ePub file" runEPUB :: Except PandocError a -> Either PandocError a runEPUB = runExcept diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 69df13aac..8ee5da543 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -63,7 +63,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) -import Network.URI (isURI) +import Network.URI (URI, parseURIReference, nonStrictRelativeTo) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Compat.Monoid ((<>)) @@ -103,7 +103,7 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String, + baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String } @@ -145,15 +145,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag return mempty pBaseTag = do bt <- pSatisfy (~== TagOpen "base" []) - let baseH = fromAttrib "href" bt - if null baseH - then return mempty - else do - let baseH' = case reverse baseH of - '/':_ -> baseH - _ -> baseH ++ "/" - updateState $ \st -> st{ baseHref = Just baseH' } - return mempty + updateState $ \st -> st{ baseHref = + parseURIReference $ fromAttrib "href" bt } + return mempty block :: TagParser Blocks block = do @@ -610,9 +604,9 @@ pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag @@ -624,9 +618,9 @@ pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState let url' = fromAttrib "src" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag let uid = fromAttrib "id" tag @@ -945,7 +939,7 @@ htmlInBalanced f = try $ do (TagClose _ : TagPosition er ec : _) -> do let ls = er - sr let cs = ec - sc - lscontents <- concat <$> count ls anyLine + lscontents <- unlines <$> count ls anyLine cscontents <- count cs anyChar (_,closetag) <- htmlTag (~== TagClose tn) return (lscontents ++ cscontents ++ closetag) @@ -977,11 +971,20 @@ htmlTag :: Monad m htmlTag f = try $ do lookAhead (char '<') inp <- getInput - let (next : rest) = canonicalizeTags $ parseTagsOptions - parseOptions{ optTagWarning = True } inp + let (next : _) = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = False } inp guard $ f next + let handleTag tagname = do + -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> + -- should NOT be parsed as an HTML tag, see #2277 + guard $ not ('.' `elem` tagname) + -- <https://example.org> should NOT be a tag either. + -- tagsoup will parse it as TagOpen "https:" [("example.org","")] + guard $ not (null tagname) + guard $ last tagname /= ':' + rendered <- manyTill anyChar (char '>') + return (next, rendered ++ ">") case next of - TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar @@ -989,13 +992,9 @@ htmlTag f = try $ do char '>' return (next, "<!--" ++ s ++ "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" - _ -> do - -- we get a TagWarning on things like - -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> - -- which should NOT be parsed as an HTML tag, see #2277 - guard $ not $ hasTagWarning rest - rendered <- manyTill anyChar (char '>') - return (next, rendered ++ ">") + TagOpen tagname _attr -> handleTag tagname + TagClose tagname -> handleTag tagname + _ -> mzero mkAttr :: [(String, String)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 587726084..e43714526 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -122,9 +122,6 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -isNull :: F Inlines -> Bool -isNull ils = B.isNull $ runF ils def - spnl :: Parser [Char] st () spnl = try $ do skipSpaces @@ -188,31 +185,38 @@ charsInBalancedBrackets openBrackets = -- document structure -- -titleLine :: MarkdownParser (F Inlines) -titleLine = try $ do +rawTitleBlockLine :: MarkdownParser String +rawTitleBlockLine = do char '%' skipSpaces - res <- many $ (notFollowedBy newline >> inline) - <|> try (endline >> whitespace) - newline + first <- anyLine + rest <- many $ try $ do spaceChar + notFollowedBy blankline + skipSpaces + anyLine + return $ trim $ unlines (first:rest) + +titleLine :: MarkdownParser (F Inlines) +titleLine = try $ do + raw <- rawTitleBlockLine + res <- parseFromString (many inline) raw return $ trimInlinesF $ mconcat res authorsLine :: MarkdownParser (F [Inlines]) authorsLine = try $ do - char '%' - skipSpaces - authors <- sepEndBy (many (notFollowedBy (satisfy $ \c -> - c == ';' || c == '\n') >> inline)) - (char ';' <|> - try (newline >> notFollowedBy blankline >> spaceChar)) - newline - return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors + raw <- rawTitleBlockLine + let sep = (char ';' <* spaces) <|> newline + let pAuthors = sepEndBy + (trimInlinesF . mconcat <$> many + (try $ notFollowedBy sep >> inline)) + sep + sequence <$> parseFromString pAuthors raw dateLine :: MarkdownParser (F Inlines) dateLine = try $ do - char '%' - skipSpaces - trimInlinesF . mconcat <$> manyTill inline newline + raw <- rawTitleBlockLine + res <- parseFromString (many inline) raw + return $ trimInlinesF $ mconcat res titleBlock :: MarkdownParser () titleBlock = pandocTitleBlock <|> mmdTitleBlock @@ -1354,16 +1358,18 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + let heads' = take (length aligns) <$> heads lines' <- many pipeTableRow + let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads : lines') + map (\x -> length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> fromIntegral (len + 1) / fromIntegral numColumns) seplengths else replicate (length aligns) 0.0 - return $ (aligns, widths, heads, sequence lines') + return $ (aligns, widths, heads', sequence lines'') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1372,25 +1378,27 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) -pipeTableRow = do +pipeTableRow = try $ do + scanForPipe skipMany spaceChar openPipe <- (True <$ char '|') <|> return False - let cell = mconcat <$> - many (notFollowedBy (blankline <|> char '|') >> inline) - first <- cell - rest <- many $ sepPipe *> cell + -- split into cells + let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') + <|> void (noneOf "|\n\r") + let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= + parseFromString pipeTableCell + cells <- cellContents `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: - guard $ not (null rest && not openPipe) - optional (char '|') + guard $ not (length cells == 1 && not openPipe) blankline - let cells = sequence (first:rest) - return $ do - cells' <- cells - return $ map - (\ils -> - case trimInlines ils of - ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + return $ sequence cells + +pipeTableCell :: MarkdownParser (F Blocks) +pipeTableCell = do + result <- many inline + if null result + then return mempty + else return $ B.plain . mconcat <$> sequence result pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 950497992..d3cee08e2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -225,7 +225,7 @@ table = do Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep - hasheader <- option False $ True <$ (lookAhead (char '!')) + hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!')) (cellspecs',hdr) <- unzip <$> tableRow let widths = map ((tableWidth *) . snd) cellspecs' let restwidth = tableWidth - sum widths diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index a925c1d84..68e89263c 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -59,7 +59,9 @@ readOdt _ bytes = case bytesToOdt bytes of -- bytesToOdt :: B.ByteString -> Either PandocError Pandoc -bytesToOdt bytes = archiveToOdt $ toArchive bytes +bytesToOdt bytes = case toArchiveOrFail bytes of + Right archive -> archiveToOdt archive + Left _ -> Left $ ParseFailure "Couldn't parse odt file." -- archiveToOdt :: Archive -> Either PandocError Pandoc diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7dd611be3..5a50a8f34 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -49,7 +50,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks, local) -import Data.Char (isAlphaNum, toLower) +import Data.Char (isAlphaNum, isSpace, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -57,8 +58,6 @@ import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) -import Text.Pandoc.Error - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) @@ -391,6 +390,9 @@ lookupBlockAttribute key = type BlockProperties = (Int, String) -- (Indentation, Block-Type) +updateIndent :: BlockProperties -> Int -> BlockProperties +updateIndent (_, blkType) indent = (indent, blkType) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do blockProp@(_, blkType) <- blockHeaderStart @@ -407,11 +409,23 @@ orgBlock = try $ do _ -> withParsed (fmap $ divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) -blockHeaderStart = try $ (,) <$> indent <*> blockType +blockHeaderStart = try $ (,) <$> indentation <*> blockType where - indent = length <$> many spaceChar blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) +indentation :: OrgParser Int +indentation = try $ do + tabStop <- getOption readerTabStop + s <- many spaceChar + return $ spaceLength tabStop s + +spaceLength :: Int -> String -> Int +spaceLength tabStop s = (sum . map charLen) s + where + charLen ' ' = 1 + charLen '\t' = tabStop + charLen _ = 0 + withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) @@ -450,7 +464,8 @@ codeBlock blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - content <- rawBlockContent blkProp + leadingIndent <- lookAhead indentation + content <- rawBlockContent (updateIndent blkProp leadingIndent) resultsContent <- followingResultsBlock let includeCode = exportsCode kv let includeResults = exportsResults kv @@ -472,7 +487,7 @@ rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do @@ -758,9 +773,13 @@ data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow +-- OrgTable is strongly related to the pandoc table ADT. Using the same +-- (i.e. pandoc-global) ADT would mean that the reader would break if the +-- global structure was to be changed, which would be bad. The final table +-- should be generated using a builder function. Column widths aren't +-- implemented yet, so they are not tracked here. data OrgTable = OrgTable - { orgTableColumns :: Int - , orgTableAlignments :: [Alignment] + { orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] } @@ -776,7 +795,7 @@ table = try $ do orgToPandocTable :: OrgTable -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) caption = +orgToPandocTable (OrgTable aligns heads lns) caption = B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char @@ -787,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ - OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells tableAlignCell :: OrgParser Alignment tableAlignCell = @@ -813,65 +833,61 @@ tableAlignCell = where emptyCell = try $ skipSpaces *> endOfCell tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + rowsToTable :: [OrgTableRow] -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable - where zeroTable = OrgTable 0 mempty mempty mempty - -normalizeTable :: OrgTable - -> OrgTable -normalizeTable (OrgTable cols aligns heads lns) = - let aligns' = fillColumns aligns AlignDefault - heads' = if heads == mempty - then mempty - else fillColumns heads (B.plain mempty) - lns' = map (`fillColumns` B.plain mempty) lns - fillColumns base padding = take cols $ base ++ repeat padding - in OrgTable cols aligns' heads' lns' +rowsToTable = foldM rowToContent emptyTable + where emptyTable = OrgTable mempty mempty mempty +normalizeTable :: OrgTable -> OrgTable +normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows + where + refRow = if heads /= mempty + then heads + else if rows == mempty then mempty else head rows + cols = length refRow + fillColumns base padding = take cols $ base ++ repeat padding + aligns' = fillColumns aligns AlignDefault -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTableRow - -> OrgTable - -> F OrgTable -rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - setLongestRow rs =<< appendToBody rs t - -setLongestRow :: [a] - -> OrgTable - -> F OrgTable -setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } - -maybeBodyToHeader :: OrgTable - -> F OrgTable -maybeBodyToHeader t = case t of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t - -appendToBody :: [Blocks] - -> OrgTable +rowToContent :: OrgTable + -> OrgTableRow -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } +rowToContent orgTable row = + case row of + OrgHlineRow -> return singleRowPromotedToHeader + OrgAlignRow as -> return . setAligns $ as + OrgContentRow cs -> appendToBody cs + where + singleRowPromotedToHeader :: OrgTable + singleRowPromotedToHeader = case orgTable of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + orgTable{ orgTableHeader = b , orgTableRows = [] } + _ -> orgTable -setAligns :: [Alignment] - -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } + setAligns :: [Alignment] -> OrgTable + setAligns aligns = orgTable{ orgTableAlignments = aligns } + + appendToBody :: F [Blocks] -> F OrgTable + appendToBody frow = do + newRow <- frow + let oldRows = orgTableRows orgTable + -- NOTE: This is an inefficient O(n) operation. This should be changed + -- if performance ever becomes a problem. + return orgTable{ orgTableRows = oldRows ++ [newRow] } -- @@ -1565,14 +1581,14 @@ inlineLaTeX = try $ do parseAsMathMLSym :: String -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- dropWhileEnd would be nice here, but it's not available before base 4.5 - where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1 + -- drop initial backslash and any trailing "{}" + where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} - texMathToPandoc inp = (maybeRight $ readTeX inp) >>= - writePandoc DisplayInline + texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -1582,11 +1598,18 @@ inlineLaTeXCommand = try $ do rest <- getInput case runParser rawLaTeXInline def "source" rest of Right (RawInline _ cs) -> do - let len = length cs + -- drop any trailing whitespace, those are not be part of the command as + -- far as org mode is concerned. + let cmdNoSpc = dropWhileEnd isSpace cs + let len = length cmdNoSpc count len anyChar - return cs + return cmdNoSpc _ -> mzero +-- Taken from Data.OldList. +dropWhileEnd :: (a -> Bool) -> [a] -> [a] +dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] + smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 390a7a21a..d08d636df 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2015 John MacFarlane + Copyright : Copyright (C) 2011-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 075d76847..d6b088338 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -2,7 +2,7 @@ FlexibleContexts, ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 1a27ab5ac..e19dba3e2 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index a010433fa..925925872 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,7 +1,7 @@ {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP, OverloadedStrings, GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2009-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2015 John MacFarlane + Copyright : Copyright (C) 2009-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index de3314a0d..87ed5312b 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 463be044c..5d05fa303 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 498e2d10f..8d54d62bd 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -279,7 +279,17 @@ blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. inlineListToConTeXt :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst +inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst + -- We add a \strut after a line break that precedes a space, + -- or the space gets swallowed + where addStruts (LineBreak : s : xs) | isSpacey s = + LineBreak : RawInline (Format "context") "\\strut " : s : + addStruts xs + addStruts (x:xs) = x : addStruts xs + addStruts [] = [] + isSpacey Space = True + isSpacey (Str ('\160':_)) = True + isSpacey _ = False -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2aaebf99f..9acfe289a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -112,10 +112,15 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = else elements tag = case lvl of n | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "sect" ++ show n + | n >= 1 && n <= 5 -> if writerDocbook5 opts + then "section" + else "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | - not (null id')] $ + idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook")] + else [] + attribs = nsAttr ++ idAttr + in inTags True tag attribs $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') @@ -227,9 +232,11 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = blockToDocbook opts (DefinitionList lst) = let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock f str) +blockToDocbook opts (RawBlock f str) | f == "docbook" = text str -- raw XML block - | f == "html" = text str -- allow html for backwards compatibility + | f == "html" = if writerDocbook5 opts + then empty -- No html in Docbook5 + else text str -- allow html for backwards compatibility | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = @@ -344,7 +351,9 @@ inlineToDocbook opts (Link attr txt (src, _)) | otherwise = (if isPrefixOf "#" src then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ + else if writerDocbook5 opts + then inTags False "link" $ ("xlink:href", src) : idAndRole attr + else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ inlinesToDocbook opts txt inlineToDocbook opts (Image attr _ (src, tit)) = let titleDoc = if null tit diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 150e19043..a841e1b66 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1103,7 +1103,7 @@ inlineToOpenXML opts (Link _ txt (src,_)) = do M.insert src i extlinks } return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] -inlineToOpenXML opts (Image attr alt (src, tit)) = do +inlineToOpenXML opts (Image attr alt (src, _)) = do -- first, check to see if we've already done this image pageWidth <- gets stPrintWidth imgs <- gets stImages @@ -1154,7 +1154,7 @@ inlineToOpenXML opts (Image attr alt (src, tit)) = do mknode "wp:inline" [] [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () + , mknode "wp:docPr" [("descr",stringify alt),("id","1"),("name","Picture")] () , graphic ] let imgext = case mt >>= extensionFromMimeType of Just x -> '.':x diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index f1088b158..56e2b9027 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -452,8 +452,11 @@ inlineToDokuWiki _ (Code _ str) = inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math _ str) = return $ "$" ++ str ++ "$" +inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped + where delim = case mathType of + DisplayMath -> "$$" + InlineMath -> "$" inlineToDokuWiki _ (RawInline f str) | f == Format "dokuwiki" = return str diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 804dbb926..90f502f6f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -667,7 +667,8 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ] else [] - let navData = renderHtml $ writeHtml opts' + let navData = renderHtml $ writeHtml + opts'{ writerVariables = ("navpage","true"):vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c5b6a6db2..d8b8384e7 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -855,13 +855,12 @@ inlineToHtml opts inline = (Note contents) | writerIgnoreNotes opts -> return mempty | otherwise -> do - st <- get - let notes = stNotes st + notes <- gets stNotes let number = (length notes) + 1 let ref = show number htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes - put $ st {stNotes = (htmlContents:notes)} + modify $ \st -> st {stNotes = (htmlContents:notes)} let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] let link = H.a ! A.href (toValue $ "#" ++ diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 4e4279ec5..804e0febc 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -39,8 +39,10 @@ import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) import Data.Aeson (object, (.=), FromJSON) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy ) -import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, + nub, nubBy, foldl' ) +import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, + ord, isAlphaNum ) import Data.Maybe ( fromMaybe, isJust, catMaybes ) import qualified Data.Text as T import Control.Applicative ((<|>)) @@ -223,7 +225,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do ++ poly ++ "}{##2}}}\n" else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{" + "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{" ++ babel ++ "}}{\\end{otherlanguage}}\n" ) -- eliminate duplicates that have same polyglossia name @@ -403,25 +405,28 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt notes <- gets stNotes modify $ \st -> st{ stInMinipage = False, stNotes = [] } + -- We can't have footnotes in the list of figures, so remove them: captForLof <- if null notes then return empty else brackets <$> inlineListToLaTeX (walk deNote txt) img <- inlineToLaTeX (Image attr txt (src,tit)) let footnotes = notesToLaTeX notes + lab <- labelFor ident + let caption = "\\caption" <> captForLof <> braces capt <> lab + figure <- hypertarget ident (cr <> + "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ + caption $$ "\\end{figure}" <> cr) return $ if inNote -- can't have figures in notes then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" - else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption" <> captForLof <> braces capt) $$ - "\\end{figure}" $$ - footnotes + else figure $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions @@ -468,23 +473,27 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do st <- get let params = if writerListings (stOptions st) then (case getListingsLanguage classes of - Just l -> [ "language=" ++ l ] + Just l -> [ "language=" ++ mbBraced l ] Nothing -> []) ++ [ "numbers=left" | "numberLines" `elem` classes || "number" `elem` classes || "number-lines" `elem` classes ] ++ [ (if key == "startFrom" then "firstnumber" - else key) ++ "=" ++ attr | + else key) ++ "=" ++ mbBraced attr | (key,attr) <- keyvalAttr ] ++ (if identifier == "" then [] else [ "label=" ++ ref ]) else [] + mbBraced x = if not (all isAlphaNum x) + then "{" <> x <> "}" + else x printParams | null params = empty - | otherwise = brackets $ hcat (intersperse ", " (map text params)) + | otherwise = brackets $ hcat (intersperse ", " + (map text params)) return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ "\\end{lstlisting}") $$ cr let highlightedCodeBlock = @@ -505,7 +514,8 @@ blockToLaTeX (RawBlock f x) blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental - let inc = if incremental then "[<+->]" else "" + beamer <- writerBeamer `fmap` gets stOptions + let inc = if beamer && incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst let spacing = if isTightList lst then text "\\tightlist" @@ -571,18 +581,21 @@ blockToLaTeX (Header level (id',classes,_) lst) = do blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\midrule\n") `fmap` - (tableRowToLaTeX True aligns widths) heads + else do + contents <- (tableRowToLaTeX True aligns widths) heads + return ("\\toprule" $$ contents $$ "\\midrule") let endhead = if all null heads then empty else text "\\endhead" + let endfirsthead = if all null heads + then empty + else text "\\endfirsthead" captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText - <> "\\tabularnewline\n\\toprule\n" - <> headers - <> "\\endfirsthead" + else text "\\caption" <> braces captionText <> "\\tabularnewline" + $$ headers + $$ endfirsthead rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } @@ -590,7 +603,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt - $$ "\\toprule" + $$ (if all null heads then "\\toprule" else empty) $$ headers $$ endhead $$ vcat rows' @@ -662,8 +675,7 @@ tableCellToLaTeX header (width, align, blocks) = do AlignDefault -> "\\raggedright" return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> "\\strut" <> cr <> cellContents <> cr) <> - "\\strut\\end{minipage}") $$ + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") $$ notesToLaTeX notes notesToLaTeX :: [Doc] -> Doc @@ -712,10 +724,9 @@ sectionHeader :: Bool -- True for unnumbered -> Int -> [Inline] -> State WriterState Doc -sectionHeader unnumbered ref level lst = do +sectionHeader unnumbered ident level lst = do txt <- inlineListToLaTeX lst - lab <- text `fmap` toLabel ref - plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst + plain <- stringToLaTeX TextString $ concatMap stringify lst let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -737,16 +748,6 @@ sectionHeader unnumbered ref level lst = do book <- gets stBook opts <- gets stOptions let level' = if book || writerChapters opts then level - 1 else level - internalLinks <- gets stInternalLinks - let refLabel x = (if ref `elem` internalLinks - then text "\\hypertarget" - <> braces lab - <> braces x - else x) - let headerWith x y = refLabel $ text x <> y <> - if null ref - then empty - else text "\\label" <> braces lab let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -762,16 +763,34 @@ sectionHeader unnumbered ref level lst = do -- needed for \paragraph, \subparagraph in quote environment -- see http://tex.stackexchange.com/questions/169830/ else empty + lab <- labelFor ident + stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab return $ if level' > 5 then txt - else prefix $$ - headerWith ('\\':sectionType) stuffing + else prefix $$ stuffing' $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> braces txtNoNotes else empty +hypertarget :: String -> Doc -> State WriterState Doc +hypertarget ident x = do + ref <- text `fmap` toLabel ident + internalLinks <- gets stInternalLinks + return $ + if ident `elem` internalLinks + then text "\\hypertarget" + <> braces ref + <> braces x + else x + +labelFor :: String -> State WriterState Doc +labelFor "" = return empty +labelFor ident = do + ref <- text `fmap` toLabel ident + return $ text "\\label" <> braces ref + -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc @@ -1019,7 +1038,7 @@ citationsToNatbib (c:cs) | citationMode c == AuthorInText = do citationsToNatbib cits = do cits' <- mapM convertOne cits - return $ text "\\citetext{" <> foldl combineTwo empty cits' <> text "}" + return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" where combineTwo a b | isEmpty a = b | otherwise = a <> text "; " <> b @@ -1068,7 +1087,7 @@ citationsToBiblatex (one:[]) citationsToBiblatex (c:cs) = do args <- mapM convertOne (c:cs) - return $ text cmd <> foldl (<>) empty args + return $ text cmd <> foldl' (<>) empty args where cmd = case citationMode c of AuthorInText -> "\\textcites" @@ -1112,7 +1131,7 @@ toPolyglossiaEnv l = -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple --- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf +-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf toPolyglossia :: [String] -> (String, String) toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") @@ -1140,17 +1159,21 @@ toPolyglossia ("en":"UK":_) = ("english", "variant=british") toPolyglossia ("en":"US":_) = ("english", "variant=american") toPolyglossia ("grc":_) = ("greek", "variant=ancient") toPolyglossia ("hsb":_) = ("usorbian", "") +toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") toPolyglossia ("sl":_) = ("slovenian", "") toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Babel language string. --- http://mirrors.concertpass.com/tex-archive/macros/latex/required/babel/base/babel.pdf --- Note that the PDF unfortunately does not contain a complete list of supported languages. +-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf +-- List of supported languages (slightly outdated): +-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf toBabel :: [String] -> String toBabel ("de":"1901":_) = "german" toBabel ("de":"AT":"1901":_) = "austrian" toBabel ("de":"AT":_) = "naustrian" +toBabel ("de":"CH":"1901":_) = "swissgerman" +toBabel ("de":"CH":_) = "nswissgerman" toBabel ("de":_) = "ngerman" toBabel ("dsb":_) = "lowersorbian" toBabel ("el":"polyton":_) = "polutonikogreek" @@ -1164,6 +1187,7 @@ toBabel ("fr":"CA":_) = "canadien" toBabel ("fra":"aca":_) = "acadian" toBabel ("grc":_) = "polutonikogreek" toBabel ("hsb":_) = "uppersorbian" +toBabel ("la":"x":"classic":_) = "classiclatin" toBabel ("sl":_) = "slovene" toBabel x = commonFromBcp47 x @@ -1172,12 +1196,15 @@ toBabel x = commonFromBcp47 x -- https://tools.ietf.org/html/bcp47#section-2.1 commonFromBcp47 :: [String] -> String commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazilian" +commonFromBcp47 ("pt":"BR":_) = "brazilian" +commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" +commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" commonFromBcp47 x = fromIso $ head x where fromIso "af" = "afrikaans" fromIso "am" = "amharic" fromIso "ar" = "arabic" + fromIso "as" = "assamese" fromIso "ast" = "asturian" fromIso "bg" = "bulgarian" fromIso "bn" = "bengali" @@ -1201,12 +1228,13 @@ commonFromBcp47 x = fromIso $ head x fromIso "fur" = "friulan" fromIso "ga" = "irish" fromIso "gd" = "scottish" + fromIso "gez" = "ethiopic" fromIso "gl" = "galician" fromIso "he" = "hebrew" fromIso "hi" = "hindi" fromIso "hr" = "croatian" - fromIso "hy" = "armenian" fromIso "hu" = "magyar" + fromIso "hy" = "armenian" fromIso "ia" = "interlingua" fromIso "id" = "indonesian" fromIso "ie" = "interlingua" @@ -1214,6 +1242,7 @@ commonFromBcp47 x = fromIso $ head x fromIso "it" = "italian" fromIso "jp" = "japanese" fromIso "km" = "khmer" + fromIso "kmr" = "kurmanji" fromIso "kn" = "kannada" fromIso "ko" = "korean" fromIso "la" = "latin" @@ -1229,6 +1258,7 @@ commonFromBcp47 x = fromIso $ head x fromIso "no" = "norsk" fromIso "nqo" = "nko" fromIso "oc" = "occitan" + fromIso "pa" = "panjabi" fromIso "pl" = "polish" fromIso "pms" = "piedmontese" fromIso "pt" = "portuguese" @@ -1245,6 +1275,7 @@ commonFromBcp47 x = fromIso $ head x fromIso "ta" = "tamil" fromIso "te" = "telugu" fromIso "th" = "thai" + fromIso "ti" = "ethiopic" fromIso "tk" = "turkmen" fromIso "tr" = "turkish" fromIso "uk" = "ukrainian" diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 20086ed19..e57a6fc11 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -170,7 +170,7 @@ blockToOrg (Table caption' _ _ headers rows) = do map ((+2) . numChars) $ transpose (headers' : rawRows) -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (map height blocks) + where h = maximum (1 : map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") end = lblock 2 $ vcat (map text $ replicate h " |") diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 70d8efba6..4cc2141b4 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> |