From 9849ba7fd744f529f063e0802a18fa18c8433eeb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Jun 2017 23:29:37 +0200 Subject: Use Control.Monad.State.Strict throughout. This gives 20-30% speedup and reduction of memory usage in most of the writers. --- src/Text/Pandoc/Class.hs | 2 +- src/Text/Pandoc/Compat/Time.hs | 2 +- src/Text/Pandoc/Pretty.hs | 2 +- src/Text/Pandoc/Readers/DocBook.hs | 2 +- src/Text/Pandoc/Readers/Docx.hs | 2 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 2 +- src/Text/Pandoc/Readers/Docx/StyleMap.hs | 2 +- src/Text/Pandoc/Readers/OPML.hs | 2 +- src/Text/Pandoc/Shared.hs | 2 +- src/Text/Pandoc/Writers/AsciiDoc.hs | 2 +- src/Text/Pandoc/Writers/CommonMark.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/DokuWiki.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 2 +- src/Text/Pandoc/Writers/FB2.hs | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 2 +- src/Text/Pandoc/Writers/Haddock.hs | 2 +- src/Text/Pandoc/Writers/ICML.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/Man.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 2 +- src/Text/Pandoc/Writers/MediaWiki.hs | 2 +- src/Text/Pandoc/Writers/Ms.hs | 2 +- src/Text/Pandoc/Writers/Muse.hs | 2 +- src/Text/Pandoc/Writers/ODT.hs | 2 +- src/Text/Pandoc/Writers/OpenDocument.hs | 2 +- src/Text/Pandoc/Writers/Org.hs | 2 +- src/Text/Pandoc/Writers/RST.hs | 2 +- src/Text/Pandoc/Writers/Texinfo.hs | 2 +- src/Text/Pandoc/Writers/Textile.hs | 2 +- src/Text/Pandoc/Writers/ZimWiki.hs | 2 +- 32 files changed, 33 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 14a0b8044..8db2e214e 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -107,7 +107,7 @@ import qualified System.FilePath.Glob as IO (glob) import qualified System.Directory as IO (getModificationTime) import Control.Monad as M (fail) import Control.Monad.Reader (ReaderT) -import Control.Monad.State +import Control.Monad.State.Strict import Control.Monad.Except import Control.Monad.Writer (WriterT) import Control.Monad.RWS (RWST) diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs index 1de197801..b1cde82a4 100644 --- a/src/Text/Pandoc/Compat/Time.hs +++ b/src/Text/Pandoc/Compat/Time.hs @@ -27,4 +27,4 @@ where import Data.Time import System.Locale ( defaultTimeLocale ) -#endif \ No newline at end of file +#endif diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index d78a2f1d9..1b3c647a1 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -77,7 +77,7 @@ module Text.Pandoc.Pretty ( ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace) import Data.Foldable (toList) import Data.List (intersperse) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index bd3c7c356..6108aae7f 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -9,7 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Data.Either (rights) import Data.Generics import Data.Char (isSpace) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2757314ab..21aa358f2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -76,7 +76,7 @@ module Text.Pandoc.Readers.Docx import Codec.Archive.Zip import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Default (Default) import Data.List (delete, intersect) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index e6736100f..24615ba94 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -58,7 +58,7 @@ import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import Data.Char (chr, isDigit, ord, readLitChar) diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 38f976fd8..b32a73770 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -7,7 +7,7 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , hasStyleName ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (toLower) import qualified Data.Map as M import Text.Pandoc.Readers.Docx.Util diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 591d7590e..e9f876525 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,6 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (toUpper) import Data.Text (Text, unpack, pack) import Data.Default diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 745e809d0..7b299c56b 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -117,7 +117,7 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Error (PandocError(..)) import System.FilePath ( () ) import Data.Generics (Typeable, Data) -import qualified Control.Monad.State as S +import qualified Control.Monad.State.Strict as S import qualified Control.Exception as E import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 46dbe6eaf..ee977f90b 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -37,7 +37,7 @@ that it has omitted the construct. AsciiDoc: -} module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Aeson (Result (..), Value (String), fromJSON, toJSON) import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse, stripPrefix) diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index ed316ced9..93cc0b53a 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -32,7 +32,7 @@ CommonMark: module Text.Pandoc.Writers.CommonMark (writeCommonMark) where import CMark -import Control.Monad.State (State, get, modify, runState) +import Control.Monad.State.Strict (State, get, modify, runState) import Data.Foldable (foldrM) import Data.Text (Text) import qualified Data.Text as T diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 2da6a7f9a..571c55b19 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into ConTeXt. -} module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (ord) import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 63bb8a5ae..b488f2479 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -37,7 +37,7 @@ import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except (catchError) import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 551a1b0b5..dc227cfa9 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -41,7 +41,7 @@ DokuWiki: module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where import Control.Monad (zipWithM) import Control.Monad.Reader (ReaderT, ask, local, runReaderT) -import Control.Monad.State (StateT, evalStateT, gets, modify) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, intersect, isPrefixOf, transpose) import Data.Text (Text, pack) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d68283007..bd9a4c800 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -36,7 +36,7 @@ import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, fromArchive, fromEntry, toEntry) import Control.Monad (mplus, when, zipWithM) import Control.Monad.Except (catchError, throwError) -import Control.Monad.State (State, StateT, evalState, evalStateT, get, gets, +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 213756330..20f94c185 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -38,8 +38,8 @@ FictionBook is an XML-based e-book format. For more information see: module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad.Except (catchError, throwError) -import Control.Monad.State (StateT, evalStateT, get, lift, modify) -import Control.Monad.State (liftM) +import Control.Monad.State.Strict (StateT, evalStateT, get, lift, modify) +import Control.Monad.State.Strict (liftM) import Data.ByteString.Base64 (encode) import qualified Data.ByteString.Char8 as B8 import Data.Char (isAscii, isControl, isSpace, toLower) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 5ee8ab4ce..7de38f49a 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -43,7 +43,7 @@ module Text.Pandoc.Writers.HTML ( writeDZSlides, writeRevealJs ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (ord, toLower) import Data.Text (Text) import qualified Data.Text.Lazy as TL diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1ad9acd40..7965ebfae 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to haddock markup. Haddock: -} module Text.Pandoc.Writers.Haddock (writeHaddock) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Default import Data.Text (Text) import Data.List (intersperse, transpose) diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 2884bc532..e564f94fe 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -17,7 +17,7 @@ into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where import Control.Monad.Except (catchError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) import qualified Data.Set as Set import Data.Text as Text (breakOnAll, pack) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 80606d510..e0ea9acfe 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -35,7 +35,7 @@ module Text.Pandoc.Writers.LaTeX ( , writeBeamer ) where import Control.Applicative ((<|>)) -import Control.Monad.State +import Control.Monad.State.Strict import Data.Aeson (FromJSON, object, (.=)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, toLower) diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 0fc6afbdc..d96342fb5 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to groff man page format. -} module Text.Pandoc.Writers.Man ( writeMan) where import Control.Monad.Except (throwError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intercalate, intersperse, stripPrefix, sort) import qualified Data.Map as Map import Data.Maybe (fromMaybe) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 3ac677943..4449bb5ce 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -35,7 +35,7 @@ Markdown: -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (chr, isPunctuation, isSpace, ord) import Data.Default import qualified Data.HashMap.Strict as H diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index c70e5b786..3825a4e73 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -31,7 +31,7 @@ MediaWiki: -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where import Control.Monad.Reader -import Control.Monad.State +import Control.Monad.State.Strict import Data.List (intercalate) import qualified Data.Set as Set import Data.Text (Text, pack) diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index c5c3d9f5b..0999d13db 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -52,7 +52,7 @@ import Text.Pandoc.Pretty import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char ( isLower, isUpper, toUpper ) import Text.TeXMath (writeEqn) import System.FilePath (takeExtension) diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 85e0b5467..286bd1431 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -42,7 +42,7 @@ However, @\@ tag is used for HTML raw blocks even though it is supported only in Emacs Muse. -} module Text.Pandoc.Writers.Muse (writeMuse) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Text (Text) import Data.List (intersperse, transpose, isInfixOf) import System.FilePath (takeExtension) diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 1da051380..c9a7de642 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to ODT. module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip import Control.Monad.Except (catchError) -import Control.Monad.State +import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 58295684e..fd9a13f3e 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) -import Control.Monad.State hiding (when) +import Control.Monad.State.Strict hiding (when) import Data.Char (chr) import Data.List (sortBy) import Data.Text (Text) diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index e8f48da00..8524c441d 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -35,7 +35,7 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: -} module Text.Pandoc.Writers.Org (writeOrg) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isAlphaNum, toLower) import Data.Text (Text) import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 59f6553e2..9c0693b0f 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: -} module Text.Pandoc.Writers.RST ( writeRST ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace, toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Maybe (fromMaybe) diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 387e55290..fd489786d 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -32,7 +32,7 @@ Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (chr, ord) import Data.List (maximumBy, transpose) import Data.Ord (comparing) diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 091a5baca..432c055b8 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to Textile markup. Textile: -} module Text.Pandoc.Writers.Textile ( writeTextile ) where -import Control.Monad.State +import Control.Monad.State.Strict import Data.Char (isSpace) import Data.List (intercalate) import Data.Text (Text, pack) diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 5ee239e59..ba51acfce 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -33,7 +33,7 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where import Control.Monad (zipWithM) -import Control.Monad.State (StateT, evalStateT, gets, modify) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) import qualified Data.Map as Map -- cgit v1.2.3 From abd2e94f5a8c1238eebeef9b6edb91b8031507e7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Jun 2017 11:17:00 +0200 Subject: In producing PDFs, warn if the font is missing some characters. * Added `MissingCharacter` to `LogMessage` in Text.Pandoc.Logging. * Parse the (xe)latex log for missing character warnings and issue the warning. Closes #3742. --- src/Text/Pandoc/Logging.hs | 6 ++++++ src/Text/Pandoc/PDF.hs | 47 +++++++++++++++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 13 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index da8c775f6..b31c33d4e 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -93,6 +93,7 @@ data LogMessage = | NoTitleElement String | NoLangSpecified | CouldNotHighlight String + | MissingCharacter String deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -186,6 +187,8 @@ instance ToJSON LogMessage where NoLangSpecified -> [] CouldNotHighlight msg -> ["message" .= Text.pack msg] + MissingCharacter msg -> + ["message" .= Text.pack msg] showPos :: SourcePos -> String showPos pos = sn ++ "line " ++ @@ -262,6 +265,8 @@ showLogMessage msg = "It is recommended that lang be specified for this format." CouldNotHighlight m -> "Could not highlight code block:\n" ++ m + MissingCharacter m -> + "Missing character: " ++ m messageVerbosity:: LogMessage -> Verbosity messageVerbosity msg = @@ -291,3 +296,4 @@ messageVerbosity msg = NoTitleElement{} -> WARNING NoLangSpecified -> INFO CouldNotHighlight{} -> WARNING + MissingCharacter{} -> WARNING diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index cd75d869d..25a94972a 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -40,7 +40,6 @@ import qualified Data.Text as T import Data.Text (Text) import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) -import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BC import Data.Maybe (fromMaybe) @@ -197,7 +196,22 @@ tex2pdf' verbosity args tmpDir program source = do _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf + (ExitSuccess, Just pdf) -> do + missingCharacterWarnings verbosity log' + return $ Right pdf + +missingCharacterWarnings :: Verbosity -> ByteString -> IO () +missingCharacterWarnings verbosity log' = do + let ls = BC.lines log' + let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " + let warnings = [ UTF8.toStringLazy (BC.drop 19 l) + | l <- ls + , isMissingCharacterWarning l + ] + runIO $ do + setVerbosity verbosity + mapM_ (report . MissingCharacter) warnings + return () -- parsing output @@ -255,12 +269,12 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do mapM_ print env'' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" - B.readFile file' >>= B.putStr + BL.readFile file' >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty when (verbosity >= INFO) $ do putStrLn $ "[makePDF] Run #" ++ show runNumber - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" if runNumber <= numRuns then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source @@ -271,9 +285,16 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - return (exit, out, pdf) + -- Note that some things like Missing character warnings + -- appear in the log but not on stderr, so we prefer the log: + let logFile = replaceExtension file ".log" + logExists <- doesFileExist logFile + log' <- if logExists + then BL.readFile logFile + else return out + return (exit, log', pdf) ms2pdf :: Verbosity -> [String] @@ -294,7 +315,7 @@ ms2pdf verbosity args source = do (exit, out) <- pipeProcess (Just env') "pdfroff" args (BL.fromStrict $ UTF8.fromText source) when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" return $ case exit of ExitFailure _ -> Left out @@ -318,12 +339,12 @@ html2pdf verbosity args source = do mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty removeFile file when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" pdfExists <- doesFileExist pdfFile mbPdf <- if pdfExists @@ -331,7 +352,7 @@ html2pdf verbosity args source = do -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. then do - res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile removeFile pdfFile return res else return Nothing @@ -365,11 +386,11 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" (exit, out) <- pipeProcess (Just env') "context" programArgs BL.empty when (verbosity >= INFO) $ do - B.hPutStr stdout out + BL.hPutStr stdout out putStr "\n" let pdfFile = replaceExtension file ".pdf" pdfExists <- doesFileExist pdfFile @@ -377,7 +398,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing case (exit, mbPdf) of (ExitFailure _, _) -> do -- cgit v1.2.3 From ec3992b2f0aef0eefb85bdb693adfd0969126f7d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Jun 2017 11:41:40 +0200 Subject: Use revealjs's math plugin for mathjax. This is a thin wrapper around mathjax that makes math look better on revealjs. See https://github.com/hakimel/reveal.js/#mathjax We do this by setting the 'mathjax' boolean variable and using it in the revealjs template. Also, for revealjs and mathjax, we don't assign the usual thing to the 'math' variable, since it's handled by mathjax config. Closes #3743. --- data/templates/default.revealjs | 13 +++++++++++-- src/Text/Pandoc/Writers/HTML.hs | 8 +++++++- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/data/templates/default.revealjs b/data/templates/default.revealjs index ac8d6c444..a3f39885e 100644 --- a/data/templates/default.revealjs +++ b/data/templates/default.revealjs @@ -229,15 +229,24 @@ $endif$ $if(maxScale)$ maxScale: $maxScale$, $endif$ +$if(mathjax)$ + math: { + mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js', + config: 'TeX-AMS_HTML-full', + }, +$endif$ // Optional reveal.js plugins dependencies: [ { src: '$revealjs-url$/lib/js/classList.js', condition: function() { return !document.body.classList; } }, { src: '$revealjs-url$/plugin/zoom-js/zoom.js', async: true }, - $if(notes-server)$ +$if(notes-server)$ { src: '$revealjs-url$/socket.io/socker.io.js', async: true }, { src: '$revealjs-url$/plugin/notes-server/client.js', async: true }, - $endif$ +$endif$ +$if(mathjax)$ + { src: '$revealjs-url$/plugin/math/math.js', async: true }, +$endif$ { src: '$revealjs-url$/plugin/notes/notes.js', async: true } ] }); diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 7de38f49a..43c098866 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -253,7 +253,9 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - MathJax url -> + MathJax url + | slideVariant /= RevealJsSlides -> + -- mathjax is handled via a special plugin in revealjs H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ case slideVariant of @@ -285,6 +287,10 @@ pandocToHtml opts (Pandoc meta blocks) = do (if stMath st then defField "math" (renderHtml' math) else id) $ + defField "mathjax" + (case writerHTMLMathMethod opts of + MathJax _ -> True + _ -> False) $ defField "quotes" (stQuotes st) $ maybe id (defField "toc" . renderHtml') toc $ defField "author-meta" authsMeta $ -- cgit v1.2.3 From a91b9b2a1d768cd8a4dfff3c7e72a3cc96153d83 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 19 Jun 2017 11:46:02 +0300 Subject: Add Muse reader (#3620) --- pandoc.cabal | 2 + src/Text/Pandoc/Readers.hs | 3 + src/Text/Pandoc/Readers/Muse.hs | 577 ++++++++++++++++++++++++++++++++++++++++ test/Tests/Readers/Muse.hs | 264 ++++++++++++++++++ test/test-pandoc.hs | 2 + trypandoc/index.html | 1 + 6 files changed, 849 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Muse.hs create mode 100644 test/Tests/Readers/Muse.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index c1d76785c..a9e561fa6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -380,6 +380,7 @@ Library Text.Pandoc.Readers.Docx, Text.Pandoc.Readers.Odt, Text.Pandoc.Readers.EPUB, + Text.Pandoc.Readers.Muse, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, @@ -559,6 +560,7 @@ Test-Suite test-pandoc Tests.Readers.Odt Tests.Readers.Txt2Tags Tests.Readers.EPUB + Tests.Readers.Muse Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.Docbook diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 004fefe25..4c95d5d28 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -59,6 +59,7 @@ module Text.Pandoc.Readers , readTWiki , readTxt2Tags , readEPUB + , readMuse -- * Miscellaneous , getReader , getDefaultExtensions @@ -81,6 +82,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Odt import Text.Pandoc.Readers.OPML @@ -125,6 +127,7 @@ readers = [ ("native" , TextReader readNative) ,("odt" , ByteStringReader readOdt) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) ] -- | Retrieve reader based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs new file mode 100644 index 000000000..bc9da26cb --- /dev/null +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -0,0 +1,577 @@ +{- + Copyright (C) 2017 Alexander Krotov + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Muse + Copyright : Copyright (C) 2017 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov + Stability : alpha + Portability : portable + +Conversion of Muse text to 'Pandoc' document. +-} +{- +TODO: +- {{{ }}} syntax for +- Page breaks (five "*") +- Headings with anchors (make it round trip with Muse writer) +- and ">" +- Definition lists +- Org tables +- table.el tables +- Images with attributes (floating and width) +- Anchors +- Citations and +- environment +- tag +-} +module Text.Pandoc.Readers.Muse (readMuse) where + +import Control.Monad +import Control.Monad.Except (throwError) +import qualified Data.Map as M +import Data.Text (Text, unpack) +import Data.List (stripPrefix) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (macro, nested) +import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.XML (fromEntities) +import System.FilePath (takeExtension) + +-- | Read Muse from an input string and return a Pandoc document. +readMuse :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readMuse opts s = do + res <- readWithM parseMuse def{ stateOptions = opts } (unpack s) + case res of + Left e -> throwError e + Right d -> return d + +type MuseParser = ParserT String ParserState + +-- +-- main parser +-- + +parseMuse :: PandocMonad m => MuseParser m Pandoc +parseMuse = do + many directive + blocks <- parseBlocks + st <- getState + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- stateMeta' st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +parseBlocks :: PandocMonad m => MuseParser m (F Blocks) +parseBlocks = do + res <- mconcat <$> many block + spaces + eof + return res + +-- +-- utility functions +-- + +nested :: PandocMonad m => MuseParser m a -> MuseParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlElement tag = try $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = void $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: PandocMonad m + => String -> MuseParser m a -> MuseParser m (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a] +parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p) + +-- +-- directive parsers +-- + +parseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseDirective = do + char '#' + key <- many letter + space + spaces + raw <- many $ noneOf "\n" + newline + value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw + return (key, value) + +directive :: PandocMonad m => MuseParser m () +directive = do + (key, value) <- parseDirective + updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st } + +-- +-- block parsers +-- + +block :: PandocMonad m => MuseParser m (F Blocks) +block = do + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + report $ ParsingTrace (take 60 $ show $ B.toList $ runF res defaultParserState) pos + return res + +blockElements :: PandocMonad m => MuseParser m (F Blocks) +blockElements = choice [ comment + , separator + , header + , exampleTag + , literal + , centerTag + , rightTag + , quoteTag + , bulletList + , orderedList + , table + , commentTag + , noteBlock + ] + +comment :: PandocMonad m => MuseParser m (F Blocks) +comment = try $ do + char ';' + space + many $ noneOf "\n" + void newline <|> eof + return mempty + +separator :: PandocMonad m => MuseParser m (F Blocks) +separator = try $ do + string "---" + newline + return $ return B.horizontalRule + +header :: PandocMonad m => MuseParser m (F Blocks) +header = try $ do + level <- liftM length $ many1 $ char '*' + guard $ level <= 5 + skipSpaces + content <- trimInlinesF . mconcat <$> manyTill inline newline + attr <- registerHeader ("", [], []) (runF content defaultParserState) + return $ B.headerWith attr level <$> content + +exampleTag :: PandocMonad m => MuseParser m (F Blocks) +exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example" + +literal :: PandocMonad m => MuseParser m (F Blocks) +literal = liftM (return . rawBlock) $ htmlElement "literal" + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +blockTag :: PandocMonad m + => (Blocks -> Blocks) + -> String + -> MuseParser m (F Blocks) +blockTag f s = do + res <- parseHtmlContent s block + return $ f <$> mconcat res + +--
tag is ignored +centerTag :: PandocMonad m => MuseParser m (F Blocks) +centerTag = blockTag id "center" + +-- tag is ignored +rightTag :: PandocMonad m => MuseParser m (F Blocks) +rightTag = blockTag id "right" + +quoteTag :: PandocMonad m => MuseParser m (F Blocks) +quoteTag = blockTag B.blockQuote "quote" + +commentTag :: PandocMonad m => MuseParser m (F Blocks) +commentTag = parseHtmlContent "comment" block >> return mempty + +para :: PandocMonad m => MuseParser m (F Blocks) +para = do + res <- trimInlinesF . mconcat <$> many1Till inline endOfParaElement + return $ B.para <$> res + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> void blockElements + +noteMarker :: PandocMonad m => MuseParser m String +noteMarker = try $ do + char '[' + many1Till digit $ char ']' + +noteBlock :: PandocMonad m => MuseParser m (F Blocks) +noteBlock = try $ do + pos <- getPosition + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillNote + oldnotes <- stateNotes' <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes } + return mempty + where + blocksTillNote = + many1Till block (eof <|> () <$ lookAhead noteMarker) + +-- +-- lists +-- + +listLine :: PandocMonad m => Int -> MuseParser m String +listLine markerLength = try $ do + notFollowedBy blankline + indentWith markerLength + anyLineNewline + +withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a +withListContext p = do + state <- getState + let oldContext = stateParserContext state + setState $ state { stateParserContext = ListItemState } + parsed <- p + updateState (\st -> st {stateParserContext = oldContext}) + return parsed + +listContinuation :: PandocMonad m => Int -> MuseParser m String +listContinuation markerLength = try $ do + result <- many1 $ listLine markerLength + blanks <- many1 blankline + return $ concat result ++ blanks + +listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int +listStart marker = try $ do + preWhitespace <- length <$> many spaceChar + st <- stateParserContext <$> getState + getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1) + markerLength <- marker + postWhitespace <- length <$> many1 spaceChar + return $ preWhitespace + markerLength + postWhitespace + +listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks) +listItem start = try $ do + markerLength <- start + firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + restLines <- many $ listLine markerLength + let first = firstLine ++ blank ++ concat restLines + rest <- many $ listContinuation markerLength + parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n" + +bulletListItems :: PandocMonad m => MuseParser m (F [Blocks]) +bulletListItems = sequence <$> many1 (listItem bulletListStart) + +bulletListStart :: PandocMonad m => MuseParser m Int +bulletListStart = listStart (char '-' >> return 1) + +bulletList :: PandocMonad m => MuseParser m (F Blocks) +bulletList = do + listItems <- bulletListItems + return $ B.bulletList <$> listItems + +orderedListStart :: PandocMonad m + => ListNumberStyle + -> ListNumberDelim + -> MuseParser m Int +orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim)) + +orderedList :: PandocMonad m => MuseParser m (F Blocks) +orderedList = try $ do + p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar) + guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] + guard $ delim == Period + items <- sequence <$> many1 (listItem $ orderedListStart style delim) + return $ B.orderedListWith p <$> items + +-- +-- tables +-- + +data MuseTable = MuseTable + { museTableCaption :: Inlines + , museTableHeaders :: [[Blocks]] + , museTableRows :: [[Blocks]] + , museTableFooters :: [[Blocks]] + } + +data MuseTableElement = MuseHeaderRow (F [Blocks]) + | MuseBodyRow (F [Blocks]) + | MuseFooterRow (F [Blocks]) + | MuseCaption (F Inlines) + +museToPandocTable :: MuseTable -> Blocks +museToPandocTable (MuseTable caption headers body footers) = + B.table caption attrs headRow rows + where ncol = maximum (0 : map length (headers ++ body ++ footers)) + attrs = replicate ncol (AlignDefault, 0.0) + headRow = if null headers then [] else head headers + rows = (if null headers then [] else tail headers) ++ body ++ footers + +museAppendElement :: MuseTable + -> MuseTableElement + -> F MuseTable +museAppendElement tbl element = + case element of + MuseHeaderRow row -> do + row' <- row + return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } + MuseBodyRow row -> do + row' <- row + return tbl{ museTableRows = museTableRows tbl ++ [row'] } + MuseFooterRow row-> do + row' <- row + return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } + MuseCaption inlines -> do + inlines' <- inlines + return tbl{ museTableCaption = inlines' } + +tableCell :: PandocMonad m => MuseParser m (F Blocks) +tableCell = try $ do + content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) + return $ B.plain <$> content + where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof + +tableElements :: PandocMonad m => MuseParser m [MuseTableElement] +tableElements = tableParseElement `sepEndBy1` (void newline <|> eof) + +elementsToTable :: [MuseTableElement] -> F MuseTable +elementsToTable = foldM museAppendElement emptyTable + where emptyTable = MuseTable mempty mempty mempty mempty + +table :: PandocMonad m => MuseParser m (F Blocks) +table = try $ do + rows <- tableElements + let tbl = elementsToTable rows + let pandocTbl = museToPandocTable <$> tbl :: F Blocks + return pandocTbl + +tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement = tableParseHeader + <|> tableParseBody + <|> tableParseFooter + <|> tableParseCaption + +tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow n = try $ do + fields <- tableCell `sepBy2` fieldSep + return $ sequence fields + where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) + fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) + +tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement +tableParseHeader = MuseHeaderRow <$> tableParseRow 2 + +tableParseBody :: PandocMonad m => MuseParser m MuseTableElement +tableParseBody = MuseBodyRow <$> tableParseRow 1 + +tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement +tableParseFooter = MuseFooterRow <$> tableParseRow 3 + +tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +tableParseCaption = try $ do + many spaceChar + string "|+" + contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|") + string "+|" + return $ MuseCaption contents + +-- +-- inline parsers +-- + +inline :: PandocMonad m => MuseParser m (F Inlines) +inline = choice [ whitespace + , br + , footnote + , strong + , strongTag + , emph + , emphTag + , superscriptTag + , subscriptTag + , strikeoutTag + , link + , code + , codeTag + , str + , symbol + ] "inline" + +footnote :: PandocMonad m => MuseParser m (F Inlines) +footnote = try $ do + ref <- noteMarker + return $ do + notes <- asksF stateNotes' + case M.lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just (_pos, contents) -> do + st <- askF + let contents' = runF contents st { stateNotes' = M.empty } + return $ B.note contents' + +whitespace :: PandocMonad m => MuseParser m (F Inlines) +whitespace = liftM return (lb <|> regsp) + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: PandocMonad m => MuseParser m (F Inlines) +br = try $ do + string "
" + return $ return B.linebreak + +linebreak :: PandocMonad m => MuseParser m (F Inlines) +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = do + eof + return $ return mempty + innerNewline = return $ return B.space + +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) +emphasisBetween c = try $ enclosedInlines c c + +enclosedInlines :: (PandocMonad m, Show a, Show b) + => MuseParser m a + -> MuseParser m b + -> MuseParser m (F Inlines) +enclosedInlines start end = try $ + trimInlinesF . mconcat <$> enclosed start end inline + +verbatimBetween :: PandocMonad m + => Char + -> MuseParser m String +verbatimBetween c = try $ do + char c + many1Till anyChar $ char c + +inlineTag :: PandocMonad m + => (Inlines -> Inlines) + -> String + -> MuseParser m (F Inlines) +inlineTag f s = do + res <- parseHtmlContent s inline + return $ f <$> mconcat res + +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = inlineTag B.strong "strong" + +strong :: PandocMonad m => MuseParser m (F Inlines) +strong = fmap B.strong <$> emphasisBetween (string "**") + +emph :: PandocMonad m => MuseParser m (F Inlines) +emph = fmap B.emph <$> emphasisBetween (char '*') + +emphTag :: PandocMonad m => MuseParser m (F Inlines) +emphTag = inlineTag B.emph "em" + +superscriptTag :: PandocMonad m => MuseParser m (F Inlines) +superscriptTag = inlineTag B.superscript "sup" + +subscriptTag :: PandocMonad m => MuseParser m (F Inlines) +subscriptTag = inlineTag B.subscript "sub" + +strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) +strikeoutTag = inlineTag B.strikeout "del" + +code :: PandocMonad m => MuseParser m (F Inlines) +code = return . B.code <$> verbatimBetween '=' + +codeTag :: PandocMonad m => MuseParser m (F Inlines) +codeTag = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ return $ B.codeWith attrs $ fromEntities content + +str :: PandocMonad m => MuseParser m (F Inlines) +str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference) + +symbol :: PandocMonad m => MuseParser m (F Inlines) +symbol = liftM (return . B.str) $ count 1 nonspaceChar + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ case stripPrefix "URL:" url of + Nothing -> if isImageUrl url + then B.image url title <$> fromMaybe (return mempty) content + else B.link url title <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension + +linkContent :: PandocMonad m => MuseParser m (F Inlines) +linkContent = do + char '[' + res <- many1Till anyChar $ char ']' + parseFromString (mconcat <$> many1 inline) res + +linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText = do + string "[[" + url <- many1Till anyChar $ char ']' + content <- optionMaybe linkContent + char ']' + return (url, "", content) diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs new file mode 100644 index 000000000..5a896da55 --- /dev/null +++ b/test/Tests/Readers/Muse.hs @@ -0,0 +1,264 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Muse (tests) where + +import Data.List (intersperse) +import Data.Text (Text) +import qualified Data.Text as T +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder +import Text.Pandoc.Class + +muse :: Text -> Pandoc +muse = purely $ \s -> do + putCommonState + def { stInputFiles = Just ["in"] + , stOutputFile = Just "out" + } + readMuse def s + +infix 4 =: +(=:) :: ToString c + => String -> (Text, c) -> TestTree +(=:) = test muse + +spcSep :: [Inlines] -> Inlines +spcSep = mconcat . intersperse space + +tests :: [TestTree] +tests = + [ testGroup "Inlines" + [ "Plain String" =: + "Hello, World" =?> + para (spcSep [ "Hello,", "World" ]) + + , "Emphasis" =: "*Foo bar*" =?> para (emph . spcSep $ ["Foo", "bar"]) + + , "Emphasis tag" =: "Foo bar" =?> para (emph . spcSep $ ["Foo", "bar"]) + + , "Strong" =: + "**Cider**" =?> + para (strong "Cider") + + , "Strong tag" =: "Strong" =?> para (strong "Strong") + + , "Strong Emphasis" =: + "***strength***" =?> + para (strong . emph $ "strength") + + , "Superscript tag" =: "Superscript" =?> para (superscript "Superscript") + + , "Subscript tag" =: "Subscript" =?> para (subscript "Subscript") + + , "Strikeout tag" =: "Strikeout" =?> para (strikeout "Strikeout") + + , "Linebreak" =: "Line
break" =?> para ("Line" <> linebreak <> "break") + + , "Code" =: "=foo(bar)=" =?> para (code "foo(bar)") + + , "Code tag" =: "foo(bar)" =?> para (code "foo(bar)") + + , testGroup "Links" + [ "Link without description" =: + "[[https://amusewiki.org/]]" =?> + para (link "https://amusewiki.org/" "" (str "https://amusewiki.org/")) + , "Link with description" =: + "[[https://amusewiki.org/][A Muse Wiki]]" =?> + para (link "https://amusewiki.org/" "" (text "A Muse Wiki")) + , "Image" =: + "[[image.jpg]]" =?> + para (image "image.jpg" "" mempty) + , "Image with description" =: + "[[image.jpg][Image]]" =?> + para (image "image.jpg" "" (text "Image")) + , "Image link" =: + "[[URL:image.jpg]]" =?> + para (link "image.jpg" "" (str "image.jpg")) + , "Image link with description" =: + "[[URL:image.jpg][Image]]" =?> + para (link "image.jpg" "" (text "Image")) + ] + ] + + , testGroup "Blocks" + [ "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + , "Center" =: "
Hello, world
" =?> para (text "Hello, world") + , "Right" =: "Hello, world" =?> para (text "Hello, world") + , testGroup "Comments" + [ "Comment tag" =: "\nThis is a comment\n" =?> (mempty::Blocks) + , "Line comment" =: "; Comment" =?> (mempty::Blocks) + , "Not a comment (does not start with a semicolon)" =: " ; Not a comment" =?> para (text "; Not a comment") + , "Not a comment (has no space after semicolon)" =: ";Not a comment" =?> para (text ";Not a comment") + ] + , testGroup "Headers" + [ "Part" =: + "* First level\n" =?> + header 1 "First level" + , "Chapter" =: + "** Second level\n" =?> + header 2 "Second level" + , "Section" =: + "*** Third level\n" =?> + header 3 "Third level" + , "Subsection" =: + "**** Fourth level\n" =?> + header 4 "Fourth level" + , "Subsubsection" =: + "***** Fifth level\n" =?> + header 5 "Fifth level" + ] + , testGroup "Footnotes" + [ "Simple footnote" =: + T.unlines [ "Here is a footnote[1]." + , "" + , "[1] Footnote contents" + ] =?> + para (text "Here is a footnote" <> + note (para "Footnote contents") <> + str ".") + , "Recursive footnote" =: + T.unlines [ "Start recursion here[1]" + , "" + , "[1] Recursion continues here[1]" + ] =?> + para (text "Start recursion here" <> + note (para "Recursion continues here[1]")) + ] + ] + , testGroup "Tables" + [ "Two cell table" =: + "One | Two" =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "One", plain "Two"]] + , "Table with multiple words" =: + "One two | three four" =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "One two", plain "three four"]] + , "Not a table" =: + "One| Two" =?> + para (text "One| Two") + , "Not a table again" =: + "One |Two" =?> + para (text "One |Two") + , "Two line table" =: + T.unlines + [ "One | Two" + , "Three | Four" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "One", plain "Two"], + [plain "Three", plain "Four"]] + , "Table with one header" =: + T.unlines + [ "First || Second" + , "Third | Fourth" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "First", plain "Second"] + [[plain "Third", plain "Fourth"]] + , "Table with two headers" =: + T.unlines + [ "First || header" + , "Second || header" + , "Foo | bar" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "First", plain "header"] + [[plain "Second", plain "header"], + [plain "Foo", plain "bar"]] + , "Header and footer reordering" =: + T.unlines + [ "Foo ||| bar" + , "Baz || foo" + , "Bar | baz" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "Baz", plain "foo"] + [[plain "Bar", plain "baz"], + [plain "Foo", plain "bar"]] + , "Table with caption" =: + T.unlines + [ "Foo || bar || baz" + , "First | row | here" + , "Second | row | there" + , "|+ Table caption +|" + ] =?> + table (text "Table caption") (replicate 3 (AlignDefault, 0.0)) + [plain "Foo", plain "bar", plain "baz"] + [[plain "First", plain "row", plain "here"], + [plain "Second", plain "row", plain "there"]] + , "Caption without table" =: + "|+ Foo bar baz +|" =?> + table (text "Foo bar baz") [] [] [] + , "Table indented with space" =: + T.unlines + [ " Foo | bar" + , " Baz | foo" + , " Bar | baz" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [] + [[plain "Foo", plain "bar"], + [plain "Baz", plain "foo"], + [plain "Bar", plain "baz"]] + , "Empty cells" =: + T.unlines + [ " | Foo" + , " |" + , " bar |" + , " || baz" + ] =?> + table mempty [(AlignDefault, 0.0), (AlignDefault, 0.0)] + [plain "", plain "baz"] + [[plain "", plain "Foo"], + [plain "", plain ""], + [plain "bar", plain ""]] + ] + , testGroup "Lists" + [ "Bullet list" =: + T.unlines + [ " - Item1" + , "" + , " - Item2" + ] =?> + bulletList [ para "Item1" + , para "Item2" + ] + , "Ordered list" =: + T.unlines + [ " 1. Item1" + , "" + , " 2. Item2" + ] =?> + orderedListWith (1, Decimal, Period) [ para "Item1" + , para "Item2" + ] + , "Nested list" =: + T.unlines + [ " - Item1" + , " - Item2" + , " - Item3" + , " - Item4" + , " 1. Nested" + , " 2. Ordered" + , " 3. List" + ] =?> + bulletList [ mconcat [ para "Item1" + , bulletList [ para "Item2" + , para "Item3" + ] + ] + , mconcat [ para "Item4" + , orderedListWith (1, Decimal, Period) [ para "Nested" + , para "Ordered" + , para "List" + ] + ] + ] + ] + ] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 97ad3183f..caa2b7c65 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -16,6 +16,7 @@ import qualified Tests.Readers.Odt import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Readers.Txt2Tags +import qualified Tests.Readers.Muse import qualified Tests.Shared import qualified Tests.Writers.AsciiDoc import qualified Tests.Writers.ConTeXt @@ -61,6 +62,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests , testGroup "Odt" Tests.Readers.Odt.tests , testGroup "Txt2Tags" Tests.Readers.Txt2Tags.tests , testGroup "EPUB" Tests.Readers.EPUB.tests + , testGroup "Muse" Tests.Readers.Muse.tests ] , testGroup "Lua filters" Tests.Lua.tests ] diff --git a/trypandoc/index.html b/trypandoc/index.html index 26a373112..9b84e14b7 100644 --- a/trypandoc/index.html +++ b/trypandoc/index.html @@ -88,6 +88,7 @@ $(document).ready(function() { + -- cgit v1.2.3 From b3041de2fc05b26421c5be4df374ec84aafa11ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2017 11:10:29 +0200 Subject: Text.Pandoc.Writers.Math: export defaultMathJaxURL, defaultKaTeXURL. This will ensure that we only need to update these in one place. (Currently, for example, the mathjax URL is used in both App and trypandoc.) Closes #3685. --- src/Text/Pandoc/App.hs | 10 ++++++---- src/Text/Pandoc/Writers/Math.hs | 7 +++++++ trypandoc/trypandoc.hs | 3 ++- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 19066e8b7..033614752 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -79,6 +79,7 @@ import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, setResourcePath, withMediaBag) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.Lua (runLuaFilter) +import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) @@ -133,11 +134,11 @@ convertWithOpts opts = do Nothing -> return Nothing Just fp -> Just <$> UTF8.readFile fp - let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" let mathMethod = case (optKaTeXJS opts, optKaTeXStylesheet opts) of (Nothing, _) -> optHTMLMathMethod opts - (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) + (Just js, ss) -> KaTeX js (fromMaybe + (defaultKaTeXURL ++ "katex.min.css") ss) -- --bibliography implies -F pandoc-citeproc for backwards compatibility: @@ -1355,7 +1356,8 @@ options = , Option "" ["mathjax"] (OptArg (\arg opt -> do - let url' = fromMaybe "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML-full" arg + let url' = fromMaybe (defaultMathJaxURL ++ + "MathJax.js?config=TeX-AMS_CHTML-full") arg return opt { optHTMLMathMethod = MathJax url'}) "URL") "" -- "Use MathJax for HTML math" @@ -1364,7 +1366,7 @@ options = (\arg opt -> return opt { optKaTeXJS = - arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"}) + arg <|> Just (defaultKaTeXURL ++ "katex.min.js")}) "URL") "" -- Use KaTeX for HTML Math diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs index 104d3c20b..58252d60f 100644 --- a/src/Text/Pandoc/Writers/Math.hs +++ b/src/Text/Pandoc/Writers/Math.hs @@ -1,6 +1,8 @@ module Text.Pandoc.Writers.Math ( texMathToInlines , convertMath + , defaultMathJaxURL + , defaultKaTeXURL ) where @@ -47,3 +49,8 @@ convertMath writer mt str = do DisplayMath -> DisplayBlock InlineMath -> DisplayInline +defaultMathJaxURL :: String +defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/" + +defaultKaTeXURL :: String +defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/" diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs index d8652079a..b8b821883 100644 --- a/trypandoc/trypandoc.hs +++ b/trypandoc/trypandoc.hs @@ -56,7 +56,8 @@ checkLength t = writerOpts :: WriterOptions writerOpts = def { writerReferenceLinks = True, writerEmailObfuscation = NoObfuscation, - writerHTMLMathMethod = MathJax "http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML", + writerHTMLMathMethod = MathJax (defaultMathJaxURL ++ + "MathJax.js?config=TeX-AMS_CHTML-full"), writerHighlightStyle = Just pygments } readerOpts :: ReaderOptions -- cgit v1.2.3 From 564c77964ddbbdc5541086726b9109091119e140 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Mon, 19 Jun 2017 16:15:12 -0400 Subject: Added Vimwiki reader (#3705). * New module Text.Pandoc.Readers.Vimwiki, exporting readVimwiki [API change]. * New input format `vimwiki`. * New data file, `data/vimwiki.css`, for displaying the HTML produced by this reader and pandoc's HTML writer in the style of vimwiki's own HTML export. --- MANUAL.txt | 4 +- data/vimwiki.css | 82 +++++ pandoc.cabal | 2 + src/Text/Pandoc/Readers.hs | 3 + src/Text/Pandoc/Readers/Vimwiki.hs | 655 +++++++++++++++++++++++++++++++++++++ test/Tests/Old.hs | 4 + test/vimwiki-reader.native | 305 +++++++++++++++++ test/vimwiki-reader.wiki | 414 +++++++++++++++++++++++ 8 files changed, 1467 insertions(+), 2 deletions(-) create mode 100644 data/vimwiki.css create mode 100644 src/Text/Pandoc/Readers/Vimwiki.hs create mode 100644 test/vimwiki-reader.native create mode 100644 test/vimwiki-reader.wiki (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index 772871bd9..275b47c72 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -16,8 +16,8 @@ another, and a command-line tool that uses this library. It can read Markdown], [MultiMarkdown], and (subsets of) [Textile], [reStructuredText], [HTML], [LaTeX], [MediaWiki markup], [TWiki markup], [Haddock markup], [OPML], [Emacs Org mode], [DocBook], [Muse], [txt2tags], -[EPUB], [ODT] and [Word docx]; and it can write plain text, [Markdown], -[CommonMark], [PHP Markdown Extra], [GitHub-Flavored Markdown], +[EPUB], [ODT], [Vimwiki] and [Word docx]; and it can write plain text, +[Markdown], [CommonMark], [PHP Markdown Extra], [GitHub-Flavored Markdown], [MultiMarkdown], [reStructuredText], [XHTML], [HTML5], [LaTeX] \(including [`beamer`] slide shows\), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki diff --git a/data/vimwiki.css b/data/vimwiki.css new file mode 100644 index 000000000..0a8841a32 --- /dev/null +++ b/data/vimwiki.css @@ -0,0 +1,82 @@ +pre{ + font-size: 1.5em +} + +img {vertical-align: middle} + +body {font-family: Tahoma, Geneva, sans-serif; margin: 1em 2em 1em 2em; font-size: 120%; line-height: 130%;} +h1, h2, h3, h4, h5, h6 {font-family: Trebuchet MS, Helvetica, sans-serif; font-weight: bold; line-height:100%; margin-top: 1.5em; margin-bottom: 0.5em;} +h1 {font-size: 2.6em; color: #000000;} +h2 {font-size: 2.2em; color: #404040;} +h3 {font-size: 1.8em; color: #707070;} +h4 {font-size: 1.4em; color: #909090;} +h5 {font-size: 1.3em; color: #989898;} +h6 {font-size: 1.2em; color: #9c9c9c;} +p, pre, blockquote, table, ul, ol, dl {margin-top: 1em; margin-bottom: 1em;} +ul ul, ul ol, ol ol, ol ul {margin-top: 0.5em; margin-bottom: 0.5em;} +li {margin: 0.3em auto;} +ul {margin-left: 2em; padding-left: 0.5em;} +dt {font-weight: bold;} +img {border: none;} +pre {border-left: 1px solid #ccc; margin-left: 2em; padding-left: 0.5em;} +blockquote {padding: 0.4em; background-color: #f6f5eb;} +th, td {border: 1px solid #ccc; padding: 0.3em;} +th {background-color: #f0f0f0;} +hr {border: none; border-top: 1px solid #ccc; width: 100%;} +del {text-decoration: line-through; color: #777777;} +.toc li {list-style-type: none;} +.todo {font-weight: bold; background-color: #f0ece8; color: #a03020;} +.justleft {text-align: left;} +.justright {text-align: right;} +.justcenter {text-align: center;} +.center {margin-left: auto; margin-right: auto;} +div.center > table {margin-left: auto; margin-right: auto;} +.tag {background-color: #eeeeee; font-family: monospace; padding: 2px;} + +/* classes for items of todo lists */ +.done0 { + /* list-style: none; */ + background-image: url(); + background-repeat: no-repeat; + background-position: 0 .2em; + padding-left: 1.5em; +} +.done1 { + background-image: url(); + background-repeat: no-repeat; + background-position: 0 .15em; + padding-left: 1.5em; +} +.done2 { + background-image: url(); + background-repeat: no-repeat; + background-position: 0 .15em; + padding-left: 1.5em; +} +.done3 { + background-image: url(); + background-repeat: no-repeat; + background-position: 0 .15em; + padding-left: 1.5em; +} +.done4 { + background-image: url(); + background-repeat: no-repeat; + background-position: 0 .15em; + padding-left: 1.5em; +} + +*:not(pre) > code { + font-family: Monaco,"Courier New","DejaVu Sans Mono","Bitstream Vera Sans Mono",monospace; + -webkit-border-radius: 1px; + -moz-border-radius: 1px; + border-radius: 1px; + -moz-background-clip: padding; + -webkit-background-clip: padding-box; + background-clip: padding-box; + padding: 0px 3px; + display: inline-block; + color: #52595d; + border: 1px solid #ccc; + background-color: #f9f9f9; +} diff --git a/pandoc.cabal b/pandoc.cabal index 9dbd52a07..ff61c6a2c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -162,6 +162,7 @@ Extra-Source-Files: test/markdown-citations.txt test/textile-reader.textile test/mediawiki-reader.wiki + test/vimwiki-reader.wiki test/rst-reader.rst test/s5-basic.html test/s5-fancy.html @@ -368,6 +369,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.CommonMark, Text.Pandoc.Readers.MediaWiki, + Text.Pandoc.Readers.Vimwiki, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.Org, Text.Pandoc.Readers.DocBook, diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 4c95d5d28..20e503a7e 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -46,6 +46,7 @@ module Text.Pandoc.Readers , readMarkdown , readCommonMark , readMediaWiki + , readVimwiki , readRST , readOrg , readLaTeX @@ -82,6 +83,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Vimwiki import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Odt @@ -115,6 +117,7 @@ readers = [ ("native" , TextReader readNative) ,("commonmark" , TextReader readCommonMark) ,("rst" , TextReader readRST) ,("mediawiki" , TextReader readMediaWiki) + ,("vimwiki" , TextReader readVimwiki) ,("docbook" , TextReader readDocBook) ,("opml" , TextReader readOPML) ,("org" , TextReader readOrg) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs new file mode 100644 index 000000000..07e23fa1e --- /dev/null +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -0,0 +1,655 @@ +{- + Copyright (C) 2017 Yuchen Pei + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Vimwiki + Copyright : Copyright (C) 2017 Yuchen Pei + License : GNU GPL, version 2 or above + + Maintainer : Yuchen Pei + Stability : alpha + Portability : portable + +Conversion of vimwiki text to 'Pandoc' document. +-} +{-- + progress: +* block parsers: + * [X] header + * [X] hrule + * [X] comment + * [X] blockquote + * [X] preformatted + * [X] displaymath + * [X] bulletlist / orderedlist + * [X] orderedlist with 1., i., a) etc identification. + * [X] todo lists -- not list builder with attributes? using span. + * [X] table + * [X] centered table -- using div + * [O] colspan and rowspan -- pandoc limitation, see issue #1024 + * [X] paragraph + * [X] definition list +* inline parsers: + * [X] bareURL + * [X] strong + * [X] emph + * [X] strikeout + * [X] code + * [X] link + * [X] image + * [X] inline math + * [X] tag + * [X] sub- and super-scripts +* misc: + * [X] `TODO:` mark + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- %template added to + meta, %nohtml ignored +--} + +module Text.Pandoc.Readers.Vimwiki ( readVimwiki + ) where +import Control.Monad.Except (throwError) +import Control.Monad (guard) +import Data.Default +import Data.Maybe +import Data.Monoid ((<>)) +import Data.List (isInfixOf, isPrefixOf) +import Data.Text (Text, unpack) +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList) +import qualified Text.Pandoc.Builder + as B (headerWith, str, space, strong, emph, strikeout, code, link, image, + spanWith, para, horizontalRule, blockQuote, bulletList, plain, + orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, + setMeta, definitionList, superscript, subscript) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition (Pandoc(..), Inline(Space), + Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), + ListNumberDelim(..)) +import Text.Pandoc.Logging (LogMessage(ParsingTrace)) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, + stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, + orderedListMarker, many1Till) +import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify) +import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, + alphaNum) +import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, + notFollowedBy, option) +import Text.Parsec.Prim (many, getPosition, try, updateState, getState) +import Text.Parsec.Char (oneOf, space) +import Text.Parsec.Combinator (lookAhead, between) +import Text.Parsec.Prim ((<|>)) + +readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki opts s = do + res <- readWithM parseVimwiki def{ stateOptions = opts } (unpack s) + case res of + Left e -> throwError e + Right result -> return result + +type VwParser = ParserT [Char] ParserState + + +-- constants + +specialChars :: [Char] +specialChars = "=*-#[]_~{}`$|:%^," + +spaceChars :: [Char] +spaceChars = " \t\n" + +-- main parser + +parseVimwiki :: PandocMonad m => VwParser m Pandoc +parseVimwiki = do + bs <- mconcat <$> many block + spaces + eof + st <- getState + let meta = runF (stateMeta' st) st + return $ Pandoc meta (toList bs) + +-- block parser + +block :: PandocMonad m => VwParser m Blocks +block = do + pos <- getPosition + res <- choice [ mempty <$ blanklines + , header + , hrule + , mempty <$ comment + , mixedList + , preformatted + , displayMath + , table + , mempty <$ placeholder + , blockQuote + , definitionList + , para + ] + report $ ParsingTrace (take 60 $ show $ toList res) pos + return res + +blockML :: PandocMonad m => VwParser m Blocks +blockML = choice [preformatted, displayMath, table] + +header :: PandocMonad m => VwParser m Blocks +header = try $ do + sp <- many spaceChar + eqs <- many1 (char '=') + spaceChar + let lev = length eqs + guard $ lev <= 6 + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> (string eqs) >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, + (if sp == "" then [] else ["justcenter"]), []) contents + return $ B.headerWith attr lev contents + +para :: PandocMonad m => VwParser m Blocks +para = try $ do + contents <- trimInlines . mconcat <$> many1 inline + if all (==Space) (toList contents) + then return mempty + else return $ B.para contents + +hrule :: PandocMonad m => VwParser m Blocks +hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline) + +comment :: PandocMonad m => VwParser m () +comment = try $ do + many spaceChar >> string "%%" >> many (noneOf "\n") + return () + +blockQuote :: PandocMonad m => VwParser m Blocks +blockQuote = try $ do + string " " + contents <- trimInlines . mconcat <$> many1 inlineBQ + if all (==Space) (toList contents) + then return mempty + else return $ B.blockQuote $ B.plain contents + +definitionList :: PandocMonad m => VwParser m Blocks +definitionList = try $ + B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT)) + +dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithDT = do + dt <- definitionTerm + dds <- many definitionDef + return $ (dt, dds) + +dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithoutDT = do + dds <- many1 definitionDef + return $ (mempty, dds) + +definitionDef :: PandocMonad m => VwParser m Blocks +definitionDef = try $ + (notFollowedBy definitionTerm) >> many spaceChar + >> (definitionDef1 <|> definitionDef2) + +definitionDef1 :: PandocMonad m => VwParser m Blocks +definitionDef1 = try $ mempty <$ defMarkerE + +definitionDef2 :: PandocMonad m => VwParser m Blocks +definitionDef2 = try $ B.plain <$> + (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) + + +definitionTerm :: PandocMonad m => VwParser m Inlines +definitionTerm = try $ do + x <- definitionTerm1 <|> definitionTerm2 + guard $ (stringify x /= "") + return x + +definitionTerm1 :: PandocMonad m => VwParser m Inlines +definitionTerm1 = try $ + trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE) + +definitionTerm2 :: PandocMonad m => VwParser m Inlines +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' + (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM)) + +defMarkerM :: PandocMonad m => VwParser m Char +defMarkerM = string "::" >> spaceChar + +defMarkerE :: PandocMonad m => VwParser m Char +defMarkerE = string "::" >> newline + +hasDefMarkerM :: PandocMonad m => VwParser m String +hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) + +preformatted :: PandocMonad m => VwParser m Blocks +preformatted = try $ do + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") + lookAhead newline + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + >> many spaceChar >> newline)) + if (not $ contents == "") && (head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + else return $ B.codeBlockWith (makeAttr attrText) contents + +makeAttr :: String -> Attr +makeAttr s = + let xs = splitBy (`elem` " \t") s in + ("", [], catMaybes $ map nameValue xs) + +nameValue :: String -> Maybe (String, String) +nameValue s = + let t = splitBy (== '=') s in + if length t /= 2 + then Nothing + else let (a, b) = (head t, last t) in + if ((length b) < 2) || ((head b, last b) /= ('"', '"')) + then Nothing + else Just (a, stripFirstAndLast b) + + +displayMath :: PandocMonad m => VwParser m Blocks +displayMath = try $ do + many spaceChar >> string "{{$" + mathTag <- option "" mathTagParser + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + >> many spaceChar >> newline)) + let contentsWithTags + | mathTag == "" = "\\[" ++ contents ++ "\n\\]" + | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + ++ "\n\\end{" ++ mathTag ++ "}" + return $ B.plain $ B.str contentsWithTags + +mixedList :: PandocMonad m => VwParser m Blocks +mixedList = try $ do + (bl, _) <- mixedList' (-1) + return $ head bl + +mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int) +mixedList' prevInd = do + (curInd, builder) <- option (-1, "na") (lookAhead listStart) + if curInd < prevInd + then return ([], curInd) + else do + listStart + curLine <- listItemContent + let listBuilder = + if builder == "ul" then B.bulletList else B.orderedList + (subList, lowInd) <- (mixedList' curInd) + if lowInd >= curInd + then do + (sameIndList, endInd) <- (mixedList' lowInd) + let curList = (combineList curLine subList) ++ sameIndList + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + else do + let (curList, endInd) = ((combineList curLine subList), + lowInd) + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + +plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks +plainInlineML' w = do + xs <- many inlineML + newline + return $ B.plain $ trimInlines $ mconcat $ w:xs + +plainInlineML :: PandocMonad m => VwParser m Blocks +plainInlineML = (notFollowedBy listStart) >> spaceChar >> plainInlineML' mempty + + +listItemContent :: PandocMonad m => VwParser m Blocks +listItemContent = try $ do + w <- option mempty listTodoMarker + x <- plainInlineML' w + y <- many blocksThenInline + z <- many blockML + return $ mconcat $ x:y ++ z + +blocksThenInline :: PandocMonad m => VwParser m Blocks +blocksThenInline = try $ do + y <- many1 blockML + x <- plainInlineML + return $ mconcat $ y ++ [x] + +listTodoMarker :: PandocMonad m => VwParser m Inlines +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) + (oneOf " .oOX") + return $ makeListMarkerSpan x + +makeListMarkerSpan :: Char -> Inlines +makeListMarkerSpan x = + let cl = case x of + ' ' -> "done0" + '.' -> "done1" + 'o' -> "done2" + 'O' -> "done3" + 'X' -> "done4" + _ -> "" + in + B.spanWith ("", [cl], []) mempty + +combineList :: Blocks -> [Blocks] -> [Blocks] +combineList x [y] = case toList y of + [BulletList z] -> [fromList $ (toList x) + ++ [BulletList z]] + [OrderedList attr z] -> [fromList $ (toList x) + ++ [OrderedList attr z]] + _ -> x:[y] +combineList x xs = x:xs + +listStart :: PandocMonad m => VwParser m (Int, String) +listStart = try $ do + s <- many spaceChar + listType <- bulletListMarkers <|> orderedListMarkers + spaceChar + return (length s, listType) + +bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers = "ul" <$ (char '*' <|> char '-') + +orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers = + ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) + <$> orderedListMarker + <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) + <|> ("ol" <$ char '#') + +--many need trimInlines +table :: PandocMonad m => VwParser m Blocks +table = try $ do + indent <- lookAhead (many spaceChar) + (th, trs) <- table1 <|> table2 + let tab = B.simpleTable th trs + if indent == "" + then return tab + else return $ B.divWith ("", ["center"], []) tab + +-- table with header +table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table1 = try $ do + th <- tableRow + many1 tableHeaderSeparator + trs <- many tableRow + return (th, trs) + +-- headerless table +table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table2 = try $ do + trs <- many1 tableRow + return (take (length $ head trs) $ repeat mempty, trs) + +tableHeaderSeparator :: PandocMonad m => VwParser m () +tableHeaderSeparator = try $ do + many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') + >> many spaceChar >> newline + return () + +tableRow :: PandocMonad m => VwParser m [Blocks] +tableRow = try $ do + many spaceChar >> char '|' + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + >> newline)) + guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") + tr <- many tableCell + many spaceChar >> char '\n' + return tr + +tableCell :: PandocMonad m => VwParser m Blocks +tableCell = try $ + B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|')) + +placeholder :: PandocMonad m => VwParser m () +placeholder = try $ + (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh + +ph :: PandocMonad m => String -> VwParser m () +ph s = try $ do + many spaceChar >> (string $ '%':s) >> spaceChar + contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) + --use lookAhead because of placeholder in the whitespace parser + let meta' = return $ B.setMeta s contents nullMeta :: F Meta + updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } + +noHtmlPh :: PandocMonad m => VwParser m () +noHtmlPh = try $ + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + >> (lookAhead newline)) + +templatePh :: PandocMonad m => VwParser m () +templatePh = try $ + () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") + >> (lookAhead newline)) + +-- inline parser + +inline :: PandocMonad m => VwParser m Inlines +inline = choice $ (whitespace endlineP):inlineList + +inlineList :: PandocMonad m => [VwParser m Inlines] +inlineList = [ bareURL + , todoMark + , str + , strong + , emph + , strikeout + , code + , link + , image + , inlineMath + , tag + , superscript + , subscript + , special + ] + +-- inline parser without softbreaks or comment breaks +inline' :: PandocMonad m => VwParser m Inlines +inline' = choice $ whitespace':inlineList + +-- inline parser for blockquotes +inlineBQ :: PandocMonad m => VwParser m Inlines +inlineBQ = choice $ (whitespace endlineBQ):inlineList + +-- inline parser for mixedlists +inlineML :: PandocMonad m => VwParser m Inlines +inlineML = choice $ (whitespace endlineML):inlineList + +str :: PandocMonad m => VwParser m Inlines +str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars) + +whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines +whitespace endline = B.space <$ (skipMany1 spaceChar <|> + (try (newline >> (comment <|> placeholder)))) + <|> B.softbreak <$ endline + +whitespace' :: PandocMonad m => VwParser m Inlines +whitespace' = B.space <$ skipMany1 spaceChar + +special :: PandocMonad m => VwParser m Inlines +special = B.str <$> count 1 (oneOf specialChars) + +bareURL :: PandocMonad m => VwParser m Inlines +bareURL = try $ do + (orig, src) <- uri <|> emailAddress + return $ B.link src "" (B.str orig) + +strong :: PandocMonad m => VwParser m Inlines +strong = try $ do + s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") + guard $ (not $ (head s) `elem` spaceChars) + && (not $ (last s) `elem` spaceChars) + char '*' + contents <- mconcat <$> (manyTill inline' $ char '*' + >> notFollowedBy alphaNum) + return $ (B.spanWith ((makeId contents), [], []) mempty) + <> (B.strong contents) + +makeId :: Inlines -> String +makeId i = concat (stringify <$> (toList i)) + +emph :: PandocMonad m => VwParser m Inlines +emph = try $ do + s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") + guard $ (not $ (head s) `elem` spaceChars) + && (not $ (last s) `elem` spaceChars) + char '_' + contents <- mconcat <$> (manyTill inline' $ char '_' + >> notFollowedBy alphaNum) + return $ B.emph contents + +strikeout :: PandocMonad m => VwParser m Inlines +strikeout = try $ do + string "~~" + contents <- mconcat <$> (many1Till inline' $ string $ "~~") + return $ B.strikeout contents + +code :: PandocMonad m => VwParser m Inlines +code = try $ do + char '`' + contents <- many1Till (noneOf "\n") (char '`') + return $ B.code contents + +superscript :: PandocMonad m => VwParser m Inlines +superscript = try $ + B.superscript <$> mconcat <$> (char '^' >> many1Till inline' (char '^')) + +subscript :: PandocMonad m => VwParser m Inlines +subscript = try $ + B.subscript <$> mconcat <$> (string ",," + >> many1Till inline' (try $ string ",,")) + +link :: PandocMonad m => VwParser m Inlines +link = try $ do + string "[[" + contents <- lookAhead $ manyTill anyChar (string "]]") + case '|' `elem` contents of + False -> do + manyTill anyChar (string "]]") +-- not using try here because [[hell]o]] is not rendered as a link in vimwiki + return $ B.link (procLink contents) "" (B.str contents) + True -> do + url <- manyTill anyChar $ char '|' + lab <- mconcat <$> (manyTill inline $ string "]]") + return $ B.link (procLink url) "" lab + +image :: PandocMonad m => VwParser m Inlines +image = try $ do + string "{{" + contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") + images $ length $ filter (== '|') contentText + +images :: PandocMonad m => Int -> VwParser m Inlines +images k + | k == 0 = do + imgurl <- manyTill anyChar (try $ string "}}") + return $ B.image (procImgurl imgurl) "" (B.str "") + | k == 1 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ (try $ string "}}")) + return $ B.image (procImgurl imgurl) "" alt + | k == 2 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ char '|') + attrText <- manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + | otherwise = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> (manyTill inline $ char '|') + attrText <- manyTill anyChar (char '|') + manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + +procLink' :: String -> String +procLink' s + | ((take 6 s) == "local:") = "file" ++ (drop 5 s) + | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html" + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) + = s + | s == "" = "" + | (last s) == '/' = s + | otherwise = s ++ ".html" + +procLink :: String -> String +procLink s = procLink' x ++ y + where (x, y) = break (=='#') s + +procImgurl :: String -> String +procImgurl s = if ((take 6 s) == "local:") then "file" ++ (drop 5 s) else s + +inlineMath :: PandocMonad m => VwParser m Inlines +inlineMath = try $ do + char '$' + contents <- many1Till (noneOf "\n") (char '$') + return $ B.str $ "\\(" ++ contents ++ "\\)" + +tag :: PandocMonad m => VwParser m Inlines +tag = try $ do + char ':' + s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space))) + guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") + let ss = splitBy (==':') s + return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) + +todoMark :: PandocMonad m => VwParser m Inlines +todoMark = try $ do + string "TODO:" + return $ B.spanWith ("", ["todo"], []) (B.str "TODO:") + +-- helper functions and parsers +endlineP :: PandocMonad m => VwParser m () +endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote) + +endlineBQ :: PandocMonad m => VwParser m () +endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") + +endlineML :: PandocMonad m => VwParser m () +endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) + +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +nFBTTBSB :: PandocMonad m => VwParser m () +nFBTTBSB = + notFollowedBy newline <* + notFollowedBy hrule <* + notFollowedBy tableRow <* + notFollowedBy header <* + notFollowedBy listStart <* + notFollowedBy preformatted <* + notFollowedBy displayMath <* + notFollowedBy hasDefMarker + +hasDefMarker :: PandocMonad m => VwParser m () +hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)) + +makeTagSpan' :: String -> Inlines +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> + B.spanWith (s, ["tag"], []) (B.str s) + +makeTagSpan :: String -> Inlines +makeTagSpan s = (B.space) <> (makeTagSpan' s) + +mathTagParser :: PandocMonad m => VwParser m String +mathTagParser = do + s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) + (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) + char '%' >> string s >> char '%' + return s diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index fceb776f7..b807719bc 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -99,6 +99,10 @@ tests = [ testGroup "markdown" , test "reader" ["-r", "mediawiki", "-w", "native", "-s"] "mediawiki-reader.wiki" "mediawiki-reader.native" ] + , testGroup "vimwiki" + [ test "reader" ["-r", "vimwiki", "-w", "native", "-s"] + "vimwiki-reader.wiki" "vimwiki-reader.native" + ] , testGroup "dokuwiki" [ testGroup "writer" $ writerTests "dokuwiki" , test "inline_formatting" ["-r", "native", "-w", "dokuwiki", "-s"] diff --git a/test/vimwiki-reader.native b/test/vimwiki-reader.native new file mode 100644 index 000000000..26388b71a --- /dev/null +++ b/test/vimwiki-reader.native @@ -0,0 +1,305 @@ +Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "2017-05-01"]),("title",MetaInlines [Str "title"])]}) +[Header 1 ("implemented",[],[]) [Emph [Span ("implemented",[],[]) [],Strong [Str "implemented"]]] +,Header 1 ("header",[],[]) [Str "header"] +,Header 2 ("header level two",[],[]) [Str "header",Space,Str "level",Space,Str "two"] +,Header 3 ("header level 3",[],[]) [Str "header",Space,Code ("",[],[]) "level",Space,Str "3"] +,Header 4 ("header level four",[],[]) [Str "header",Space,Strikeout [Str "level"],Space,Str "four"] +,Header 5 ("header level 5",[],[]) [Str "header",Space,Emph [Span ("level",[],[]) [],Strong [Str "level"],Space,Str "5"]] +,Header 6 ("header level 6",[],[]) [Str "header",Space,Str "level",Space,Str "6"] +,Para [Str "=======",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "========"] +,Para [Str "hi==",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "=="] +,Para [Str "===",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "=="] +,Para [Str "===",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "===-"] +,Para [Str "not",Space,Str "a",Space,Str "header:"] +,Para [Str "=n="] +,Para [Str "===",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "===="] +,Header 2 ("centred header",["justcenter"],[]) [Str "centred",Space,Str "header"] +,Header 2 ("header with some == in between",[],[]) [Str "header",Space,Str "with",Space,Str "some",Space,Code ("",[],[]) "==",Space,Str "in",Space,Str "between"] +,Header 2 ("header with some == in between",[],[]) [Str "header",Space,Str "with",Space,Str "some",Space,Str "==",Space,Str "in",Space,Str "between"] +,Header 2 ("header with some ==in between",[],[]) [Str "header",Space,Str "with",Space,Str "some",Space,Str "==in",Space,Str "between"] +,Header 2 ("emph strong and strikeout",[],[]) [Str "emph",Space,Str "strong",Space,Str "and",Space,Str "strikeout"] +,Para [Emph [Str "emph"],Space,Span ("strong",[],[]) [],Strong [Str "strong"]] +,Para [Span ("strong and emph",[],[]) [],Strong [Emph [Str "strong",Space,Str "and",Space,Str "emph"]]] +,Para [Emph [Span ("emph and strong",[],[]) [],Strong [Str "emph",Space,Str "and",Space,Str "strong"]]] +,Para [Span ("emph inside strong",[],[]) [],Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]] +,Para [Span ("strong with emph",[],[]) [],Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]] +,Para [Emph [Span ("strong inside",[],[]) [],Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]] +,Para [Emph [Strikeout [Str "strikeout"],Space,Str "inside",Space,Str "emph"]] +,Para [Strikeout [Str "This",Space,Str "is",Space,Emph [Str "struck",Space,Str "out"],Space,Str "with",Space,Str "emph"]] +,Para [Str "*not",SoftBreak,Str "strong*"] +,Para [Str "just",Space,Str "two",Space,Str "stars:",Space,Str "**"] +,Para [Str "just",Space,Str "two",Space,Str "underscores:",Space,Str "__"] +,Para [Str "just",Space,Str "four",Space,Str "~s:",Space,Str "~~~~"] +,Para [Str "_not",SoftBreak,Str "emph_"] +,Para [Str "~~not",SoftBreak,Str "strikeout~~"] +,Header 2 ("horizontal rule",[],[]) [Str "horizontal",Space,Str "rule"] +,Para [Str "top"] +,HorizontalRule +,Para [Str "middle"] +,HorizontalRule +,Para [Str "not",Space,Str "a",Space,Str "rule-----"] +,Para [Str "not",Space,Str "a",Space,Str "rule",Space,Str "(trailing",Space,Str "spaces):",SoftBreak,Str "-----"] +,Para [Str "not",Space,Str "a",Space,Str "rule",Space,Str "(leading",Space,Str "spaces):",SoftBreak,Str "----"] +,Header 2 ("comments",[],[]) [Str "comments"] +,Para [Str "this",SoftBreak,Str "is",Space,Str "%%",Space,Str "not",Space,Str "secret"] +,Header 2 ("inline code",[],[]) [Str "inline",Space,Str "code"] +,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Code ("",[],[]) "inline code",Str "."] +,Para [Str "Just",Space,Str "two",Space,Str "backticks:",Space,Str "``"] +,Header 2 ("preformatted text",[],[]) [Str "preformatted",Space,Str "text"] +,CodeBlock ("",[],[]) " Tyger! Tyger! burning bright\n In the forests of the night,\n What immortal hand or eye\n Could frame thy fearful symmetry?\n In what distant deeps or skies\n Burnt the fire of thine eyes?\n On what wings dare he aspire?\n What the hand dare sieze the fire?" +,Header 3 ("preformatted text with attributes",[],[]) [Str "preformatted",Space,Str "text",Space,Str "with",Space,Str "attributes"] +,CodeBlock ("",[],[("class","python"),("style","color:blue")]) " for i in range(1, 5):\n print(i)" +,Header 2 ("block quotes",[],[]) [Str "block",Space,Str "quotes"] +,BlockQuote + [Plain [Str "(indentation",Space,Str "4",Space,Str "spaces)",Space,Str "This",Space,Str "would",Space,Str "be",Space,Str "a",Space,Str "blockquote",Space,Str "in",Space,Str "Vimwiki.",Space,Str "It",Space,Str "is",Space,Str "not",Space,Span ("highlighted",[],[]) [],Strong [Str "highlighted"],Space,Str "in",Space,Str "Vim",Space,Str "but",SoftBreak,Str "(indentation",Space,Str "1",Space,Str "space",Space,Str "followed",Space,Str "by",Space,Str "1",Space,Str "tab",Space,Str "of",Space,Str "width",Space,Str "4)",Space,Str "could",Space,Str "be",Space,Str "styled",Space,Str "by",Space,Str "CSS",Space,Str "in",Space,Str "HTML.",Space,Str "Blockquotes",Space,Str "are",Space,Str "usually",Space,Str "used",Space,Str "to",Space,Str "quote",Space,Str "a",SoftBreak,Str "(indentation",Space,Str "1",Space,Str "tab",Space,Str "of",Space,Str "width",Space,Str "4)",Space,Str "long",Space,Str "piece",Space,Str "of",Space,Str "text",Space,Str "from",Space,Str "another",Space,Str "source.",Space,Strikeout [Str "blah",Space,Str "blah"],Space,Span ("-blockquote",[],[]) [Str ""],Span ("blockquote",["tag"],[]) [Str "blockquote"]]] +,Header 2 ("external links",[],[]) [Str "external",Space,Str "links"] +,Para [Link ("",[],[]) [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("http://google.com","")] +,Para [Link ("",[],[]) [Str "http://pandoc.org"] ("http://pandoc.org","")] +,Para [Link ("",[],[]) [Str "ftp://vim.org"] ("ftp://vim.org","")] +,Para [Link ("",[],[]) [Str "http://google.com"] ("http://google.com","")] +,Para [Link ("",[],[]) [Str "email",Space,Str "me"] ("mailto:info@example.org","")] +,Para [Link ("",[],[]) [Str "mailto:hello@bye.com"] ("mailto:hello@bye.com","")] +,Header 2 ("internal links",[],[]) [Str "internal",Space,Str "links"] +,Para [Link ("",[],[]) [Str "This is a link"] ("This is a link.html","")] +,Para [Link ("",[],[]) [Str "Description",Space,Str "of",Space,Str "the",Space,Str "link"] ("This is a link source.html","")] +,Para [Link ("",[],[]) [Str "projects/Important Project 1"] ("projects/Important Project 1.html",""),SoftBreak,Link ("",[],[]) [Str "../index"] ("../index.html",""),SoftBreak,Link ("",[],[]) [Str "Other",Space,Str "files"] ("a subdirectory/","")] +,Para [Link ("",[],[]) [Str "try",Space,Str "me",Space,Str "to",Space,Str "test",Space,Str "tag",Space,Str "anchors"] ("#tag-one","")] +,Para [Link ("",[],[]) [Str "try",Space,Str "me",Space,Str "to",Space,Str "test",Space,Str "header",Space,Str "anchors"] ("#block quotes","")] +,Para [Link ("",[],[]) [Str "try",Space,Str "me",Space,Str "to",Space,Str "test",Space,Str "strong",Space,Str "anchors"] ("#strong","")] +,Para [Link ("",[],[]) [Str "Tasks",Space,Str "for",Space,Str "tomorrow"] ("Todo List.html#Tomorrow","")] +,Para [Link ("",[],[]) [Str "diary:2017-05-01"] ("diary/2017-05-01.html","")] +,Para [Link ("",[],[]) [Str "Important",Space,Str "Data"] ("file:../assets/data.csv","")] +,Header 3 ("links with thumbnails",[],[]) [Str "links",Space,Str "with",Space,Str "thumbnails"] +,Para [Link ("",[],[]) [Image ("",[],[]) [Str ""] ("./movie.jpg","")] ("http://www.google.com","")] +,Header 2 ("images",[],[]) [Str "images"] +,Para [Image ("",[],[]) [Str ""] ("file:./lalune.jpg","")] +,Para [Image ("",[],[]) [Str "Vimwiki"] ("http://vimwiki.googlecode.com/hg/images/vimwiki_logo.png",""),SoftBreak,Image ("",[],[]) [Str ""] ("file:./movie.jpg","")] +,Header 3 ("image with attributes",[],[]) [Str "image",Space,Str "with",Space,Str "attributes"] +,Para [Image ("",[],[("style","width:150px;height:120px;")]) [Emph [Str "cool",Space,Str "stuff"]] ("lalune.jpg","")] +,Para [Image ("",[],[("style","font-color:red")]) [Span ("Non-existing",[],[]) [],Strong [Str "Non-existing"],Space,Str "image"] ("nonexist.jpg","")] +,Para [Image ("",[],[("style","width:150px;height:120px;")]) [Emph [Str "cool",Space,Str "stuff"]] ("lalune.jpg","")] +,Header 2 ("lists",[],[]) [Str "lists"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "ordered",Space,Str "list",Space,Str "item",Space,Str "1,",Space,Str "and",Space,Str "here",Space,Str "is",Space,Str "some",Space,Str "math",Space,Str "belonging",Space,Str "to",Space,Str "list",Space,Str "item",Space,Str "1"] + ,Plain [Str "\\[\n a^2 + b^2 = c^2\n\\]"] + ,Plain [Str "and",Space,Str "some",Space,Str "preformatted",Space,Str "and",Space,Str "tables",Space,Str "belonging",Space,Str "to",Space,Str "item",Space,Str "1",Space,Str "as",Space,Str "well"] + ,CodeBlock ("",[],[]) "I'm part of item 1." + ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [[] + ,[]] + [[[Plain [Str "this",Space,Str "table"]] + ,[Plain [Str "is"]]] + ,[[Plain [Str "also",Space,Str "a",Space,Str "part"]] + ,[Plain [Str "of",Space,Str "item",Space,Str "1"]]]] + ,Plain [Str "and",Space,Str "some",Space,Str "more",Space,Str "text",Space,Str "belonging",Space,Str "to",Space,Str "item",Space,Str "1."]] + ,[Plain [Str "ordered",Space,Str "list",Space,Str "item",Space,Str "2"]]] +,BulletList + [[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "2"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "the",Space,Str "#",Space,Str "become",Space,Str "numbers",Space,Str "when",Space,Str "converted",Space,Str "to",Space,Str "HTML"]]] +,BulletList + [[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "2"]]] +,BulletList + [[Plain [Str "Item",Space,Str "1"]] + ,[Plain [Str "Item",Space,Str "2"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Sub",Space,Str "item",Space,Str "1",Space,Str "(indentation",Space,Str "4",Space,Str "spaces)",SoftBreak,Str "Sub",Space,Str "item",Space,Str "1",Space,Str "continued",Space,Str "line.",SoftBreak,Str "Sub",Space,Str "item",Space,Str "1",Space,Str "next",Space,Str "continued",Space,Str "line."]] + ,[Plain [Str "Sub",Space,Str "item",Space,Str "2,",Space,Str "as",Space,Str "an",Space,Str "ordered",Space,Str "list",Space,Str "item",Space,Str "even",Space,Str "though",Space,Str "the",Space,Str "identifier",Space,Str "is",Space,Code ("",[],[]) "*",Space,Str "(indentation",Space,Str "2",Space,Str "spaces",Space,Str "followed",Space,Str "by",Space,Str "one",Space,Str "tab",Space,Str "of",Space,Str "width",Space,Str "4)"]] + ,[Plain [Str "etc.",SoftBreak,Str "Continuation",Space,Str "of",Space,Str "Item",Space,Str "2",SoftBreak,Str "Next",Space,Str "continuation",Space,Str "of",Space,Str "Item",Space,Str "2"]]]]] +,Para [Str "But",Space,Str "this",Space,Str "is",Space,Str "a",Space,Str "new",Space,Str "paragraph."] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "1"] + ,BulletList + [[Plain [Code ("",[],[]) "1.1"]]]] + ,[Plain [Str "2"] + ,BulletList + [[Plain [Str "2.1"]]]]] +,BulletList + [[Plain [Str "3"]]] +,Header 3 ("ordered lists with non-# identifiers",[],[]) [Str "ordered",Space,Str "lists",Space,Str "with",Space,Str "non-#",Space,Str "identifiers"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "2"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,BulletList + [[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "2"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "sub",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "more",Space,Str "..."] + ,BulletList + [[Plain [Str "and",Space,Str "more",Space,Str "..."]] + ,[Plain [Str "..."]]]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "sub",Space,Str "item",Space,Str "3"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Str "Numbered",Space,Str "list",Space,Str "sub",Space,Str "sub",Space,Str "item",Space,Str "1"]] + ,[Plain [Str "Numbered",Space,Str "list",Space,Str "sub",Space,Str "sub",Space,Str "item",Space,Str "2"]]]] + ,[Plain [Str "etc."]]]] + ,[Plain [Str "Bulleted",Space,Str "list",Space,Str "item",Space,Str "3"]]] +,Header 2 ("todo lists",[],[]) [Str "todo",Space,Str "lists"] +,BulletList + [[Plain [Span ("",["done0"],[]) [],Str "task",Space,Str "1"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Span ("",["done1"],[]) [],Str "5"]]]] + ,[Plain [Span ("",["done2"],[]) [],Str "3"]] + ,[Plain [Str "[]",Space,Str "not",Space,Str "a",Space,Str "todo",Space,Str "item"]] + ,[Plain [Str "[",Space,Str "]not",Space,Str "a",Space,Str "todo",Space,Str "item"]] + ,[Plain [Str "[r]",Space,Str "not",Space,Str "a",Space,Str "todo",Space,Str "item"]] + ,[Plain [Str "[",Space,Str "]",Space,Str "not",Space,Str "a",Space,Str "todo",Space,Str "item"]] + ,[Plain [Span ("",["done2"],[]) [],Str "a",Space,Str "tab",Space,Str "in",Space,Str "the",Space,Str "todo",Space,Str "list",Space,Str "marker",Space,Code ("",[],[]) "[ ]"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Plain [Span ("",["done3"],[]) [],Str "4",SoftBreak,Str "5"]] + ,[Plain [Span ("",["done4"],[]) []] + ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [[] + ,[]] + [[[Plain [Str "a"]] + ,[Plain [Str "b"]]]]]]] + ,[Plain [Span ("",["done4"],[]) [],Str "task",Space,Str "2"]]] +,Header 2 ("math",[],[]) [Str "math"] +,Para [Str "\\( \\sum_i a_i^2 = 1 \\)"] +,Plain [Str "\\[\n\\sum_i a_i^2\n=\n1\n\\]"] +,Plain [Str "\\begin{align}\n\\sum_i a_i^2 &= 1 + 1 \\\\\n&= 2.\n\\end{align}"] +,Para [Str "Just",Space,Str "two",Space,Str "dollar",Space,Str "signs:",Space,Str "$$"] +,Para [Str "[not",Space,Str "math]",Space,Str "You",Space,Str "have",Space,Str "$1",SoftBreak,Str "and",Space,Str "I",Space,Str "have",Space,Str "$1."] +,Header 2 ("tags",[],[]) [Str "tags"] +,Para [Span ("-tag-one",[],[]) [Str ""],Span ("tag-one",["tag"],[]) [Str "tag-one"],Space,Span ("-tag-two",[],[]) [Str ""],Span ("tag-two",["tag"],[]) [Str "tag-two"]] +,Header 2 ("tables",[],[]) [Str "tables"] +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "Year"]] + ,[Plain [Str "Temperature",Space,Str "(low)"]] + ,[Plain [Str "Temperature",Space,Str "(high)"]]] + [[[Plain [Str "1900"]] + ,[Plain [Str "-10"]] + ,[Plain [Str "25"]]] + ,[[Plain [Str "1910"]] + ,[Plain [Str "-15"]] + ,[Plain [Str "30"]]] + ,[[Plain [Str "1920"]] + ,[Plain [Str "-10"]] + ,[Plain [Str "32"]]] + ,[[Plain [Str "1930"]] + ,[Plain [Emph [Str "N/A"]]] + ,[Plain [Emph [Str "N/A"]]]] + ,[[Plain [Str "1940"]] + ,[Plain [Str "-2"]] + ,[Plain [Str "40"]]]] +,Header 3 ("centered headerless tables",[],[]) [Str "centered",Space,Str "headerless",Space,Str "tables"] +,Div ("",["center"],[]) + [Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [[] + ,[]] + [[[Plain [Str "a"]] + ,[Plain [Str "b"]]] + ,[[Plain [Str "c"]] + ,[Plain [Str "d"]]]]] +,Header 2 ("paragraphs",[],[]) [Str "paragraphs"] +,Para [Str "This",Space,Str "is",Space,Str "first",Space,Str "paragraph",SoftBreak,Str "with",Space,Str "two",Space,Str "lines."] +,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "second",Space,Str "paragraph",Space,Str "with",SoftBreak,Str "two",Space,Str "lines",Space,Str "after",Space,Str "many",Space,Str "blank",Space,Str "lines."] +,Header 2 ("definition list",[],[]) [Str "definition",Space,Str "list"] +,DefinitionList + [([Str "Term",Space,Str "1"], + [[Plain [Str "Definition",Space,Str "1"]]]) + ,([Str "Term",Space,Str "2"], + [[Plain [Str "Definition",Space,Str "2"]] + ,[Plain [Str "Definition",Space,Str "3"]]]) + ,([Str "Term",Space,Str "::",Space,Span ("separated",[],[]) [],Strong [Str "separated"],Space,Str "by",Space,Str "::",Space,Emph [Str "double",Space,Str "colons"]], + [[Plain [Str "Def1"]] + ,[Plain [Str "Def2"]]]) + ,([Str "Term",Space,Str "with",Space,Str "lots",Space,Str "of",Space,Str "trailing",Space,Str "colons:::::::"], + [[Plain [Str "Definition"]]]) + ,([Str "::",Space,Str "This",Space,Str "is",Space,Str "::",Space,Str "A",Space,Str "term",Space,Str "(rather",Space,Str "than",Space,Str "a",Space,Str "definition)"], + [[Plain [Str "and",Space,Str "this",Space,Str "is",Space,Str "a",Space,Str "definition"]]]) + ,([Str "Term",Space,Str "Without",Space,Str "definitions"], + [[]]) + ,([Str "Part",Space,Str "::",Space,Str "of",Space,Str "::",Space,Str "dt"], + [[Plain [Str "part",Space,Str "of",Space,Str "::dd"]]])] +,DefinitionList + [([], + [[Plain [Str "Definition",Space,Str "1",Space,Str "without",Space,Str "a",Space,Str "term"]] + ,[Plain [Str "Definition",Space,Str "2",Space,Str "without",Space,Str "a",Space,Str "term"]]])] +,DefinitionList + [([Str "T1"], + [[Plain [Str "D1"]]])] +,Para [Str "new",Space,Str "paragraph"] +,DefinitionList + [([Str "T1"], + [[Plain [Str "D1"]]])] +,Para [Str "Not::Definition"] +,Para [Str "Not",Space,Str "::Definition"] +,Para [Str "::Not",Space,Str "definition"] +,BlockQuote + [Plain [Str "::",Space,Str "blockquote"]] +,BlockQuote + [Plain [Str "block",Space,Str "::",Space,Str "quote"]] +,Header 2 ("metadata placeholders",[],[]) [Str "metadata",Space,Str "placeholders"] +,Para [Str "%this",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "placeholder"] +,Para [Str "placeholders",SoftBreak,Str "serves",Space,Str "as",Space,Str "space",Space,Str "/",Space,Str "softbreak",Space,Str "in",Space,Str "paragraphs"] +,Header 2 ("sup, sub",[],[]) [Str "sup,",Space,Str "sub"] +,Para [Str "super",Superscript [Str "script"]] +,Para [Str "sub",Subscript [Str "script"]] +,Header 2 ("the todo mark",[],[]) [Str "the",Space,Str "todo",Space,Str "mark"] +,Para [Span ("",["todo"],[]) [Str "TODO:"]] +,Header 1 ("not implemented yet",[],[]) [Emph [Span ("not implemented yet",[],[]) [],Strong [Str "not",Space,Str "implemented",Space,Str "yet"]]] +,Header 2 ("tables with spans",[],[]) [Str "tables",Space,Str "with",Space,Str "spans"] +,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0] + [[] + ,[] + ,[] + ,[]] + [[[Plain [Str "a"]] + ,[Plain [Str "b"]] + ,[Plain [Str "c"]] + ,[Plain [Str "d"]]] + ,[[Plain [Str "\\/"]] + ,[Plain [Str "e"]] + ,[Plain [Str ">"]] + ,[Plain [Str "f"]]] + ,[[Plain [Str "\\/"]] + ,[Plain [Str "\\/"]] + ,[Plain [Str ">"]] + ,[Plain [Str "g"]]] + ,[[Plain [Str "h"]] + ,[Plain [Str ">"]] + ,[Plain [Str ">"]] + ,[Plain [Str ">"]]]] +,Header 2 ("tables with multiple lines of headers",[],[]) [Str "tables",Space,Str "with",Space,Str "multiple",Space,Str "lines",Space,Str "of",Space,Str "headers"] +,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [[] + ,[]] + [[[Plain [Str "a"]] + ,[Plain [Str "b"]]] + ,[[Plain [Str "c"]] + ,[Plain [Str "d"]]] + ,[[Plain [Str "---"]] + ,[Plain [Str "---"]]]] +,Header 2 ("some other placeholders",[],[]) [Str "some",Space,Str "other",Space,Str "placeholders"] +,Para [Code ("",[],[]) "template",Space,Str "placeholder",Space,Str "is",Space,Str "ignored."] +,Para [Code ("",[],[]) "nohtml",Space,Str "placeholder",Space,Str "is",Space,Str "ignored."]] diff --git a/test/vimwiki-reader.wiki b/test/vimwiki-reader.wiki new file mode 100644 index 000000000..ad724e090 --- /dev/null +++ b/test/vimwiki-reader.wiki @@ -0,0 +1,414 @@ += _*implemented*_ = += header = + +== header level two == + +=== header `level` 3 === + +==== header ~~level~~ four ==== + +===== header _*level* 5_ ===== + +====== header level 6 ====== + +======= not a header ======== + +hi== not a header == + +=== not a header == + +=== not a header ===- + +not a header: + +=n= + +=== not a header ==== + + == centred header == + +== header with some `==` in between == +== header with some == in between == +== header with some ==in between == + +== emph strong and strikeout == + +_emph_ *strong* + +*_strong and emph_* + +_*emph and strong*_ + +*_emph inside_ strong* + +*strong with _emph_* + +_*strong inside* emph_ + +_~~strikeout~~ inside emph_ + +~~This is _struck out_ with emph~~ + +*not +strong* + +just two stars: ** + +just two underscores: __ + +just four ~s: ~~~~ + +_not +%%comment +emph_ + +~~not + %%comment + %%comment +strikeout~~ + +== horizontal rule == + +top +---- +middle + +------- + +not a rule----- + +not a rule (trailing spaces): +----- + +not a rule (leading spaces): + ---- + +== comments == + +%% you can't see me. + +this +%% secret +is %% not secret + +== inline code == + +Here is some `inline code`. + +Just two backticks: `` + +== preformatted text == + +{{{ + Tyger! Tyger! burning bright + In the forests of the night, + What immortal hand or eye + Could frame thy fearful symmetry? + In what distant deeps or skies + Burnt the fire of thine eyes? + On what wings dare he aspire? + What the hand dare sieze the fire? +}}} + +=== preformatted text with attributes === + + {{{class="python" style="color:blue" + for i in range(1, 5): + print(i) + }}} + +== block quotes == + + (indentation 4 spaces) This would be a blockquote in Vimwiki. It is not *highlighted* in Vim but + (indentation 1 space followed by 1 tab of width 4) could be styled by CSS in HTML. Blockquotes are usually used to quote a + (indentation 1 tab of width 4) long piece of text from another source. ~~blah blah~~ :blockquote: + +== external links == + +[[http://google.com|_Google_ search engine]] + +http://pandoc.org + +ftp://vim.org + +[[http://google.com]] + +[[mailto:info@example.org|email me]] + +mailto:hello@bye.com + +== internal links == + +[[This is a link]] + +[[This is a link source|Description of the link]] + +[[projects/Important Project 1]] + +[[../index]] + +[[a subdirectory/|Other files]] + +[[#tag-one|try me to test tag anchors]] + +[[#block quotes|try me to test header anchors]] + +[[#strong|try me to test strong anchors]] + +[[Todo List#Tomorrow|Tasks for tomorrow]] + +[[diary:2017-05-01]] + +[[file:../assets/data.csv|Important Data]] + +=== links with thumbnails === +[[http://www.google.com|{{./movie.jpg}}]] + +== images == + +{{file:./lalune.jpg}} + +{{http://vimwiki.googlecode.com/hg/images/vimwiki_logo.png|Vimwiki}} + +{{local:./movie.jpg}} + + +=== image with attributes === +{{lalune.jpg|_cool stuff_|style="width:150px;height:120px;"}} + +{{nonexist.jpg|*Non-existing* image|class="center flow blabla" style="font-color:red"}} + +{{lalune.jpg|_cool stuff_|style="width:150px;height:120px;"|anything in this segment is ignored}} + + +== lists == + + +# ordered list item 1, and here is some math belonging to list item 1 + {{$ + a^2 + b^2 = c^2 + }}$ + and some preformatted and tables belonging to item 1 as well +{{{ +I'm part of item 1. +}}} +| this table | is | +| also a part | of item 1 | + and some more text belonging to item 1. +# ordered list item 2 + + +* Bulleted list item 1 +* Bulleted list item 2 + + +# Bulleted list item 1 +# the # become numbers when converted to HTML + +- Bulleted list item 1 +- Bulleted list item 2 + +* Item 1 +* Item 2 + # Sub item 1 (indentation 4 spaces) + Sub item 1 continued line. +%%comments + Sub item 1 next continued line. + * Sub item 2, as an ordered list item even though the identifier is `*` (indentation 2 spaces followed by one tab of width 4) + * etc. + Continuation of Item 2 + Next continuation of Item 2 +But this is a new paragraph. + +# 1 + * `1.1` + * 2 + * 2.1 + * 3 + +=== ordered lists with non-# identifiers === +1. Numbered list item 1 +2. Numbered list item 2 +3. Numbered list item 3 + +4. Numbered list item 1 +5. Numbered list item 2 +6. Numbered list item 3 + +1) Numbered list item 1 +2) Numbered list item 2 +3) Numbered list item 3 + +a) Numbered list item 1 +b) Numbered list item 2 +c) Numbered list item 3 + +A) Numbered list item 1 +B) Numbered list item 2 +C) Numbered list item 3 + +i) Numbered list item 1 +ii) Numbered list item 2 +iii) Numbered list item 3 + +I) Numbered list item 1 +II) Numbered list item 2 +III) Numbered list item 3 + +- Bulleted list item 1 +- Bulleted list item 2 + a) Numbered list sub item 1 + b) more ... + * and more ... + * ... + c) Numbered list sub item 3 + 1. Numbered list sub sub item 1 + 2. Numbered list sub sub item 2 + d) etc. +- Bulleted list item 3 + +== todo lists == +* [ ] task 1 + 1. [.] 5 +* [o] 3 +* [] not a todo item +* [ ]not a todo item +* [r] not a todo item +* [ ] not a todo item +* [o] a tab in the todo list marker `[ ]` + III) [O] 4 + 5 + i) [X] +| a | b | +* [X] task 2 + +== math == + +$ \sum_i a_i^2 = 1 $ + +{{$ +\sum_i a_i^2 += +1 +}}$ + +{{$%align% +\sum_i a_i^2 &= 1 + 1 \\ +&= 2. +}}$ + +Just two dollar signs: $$ + +[not math] You have $1 +and I have $1. + +== tags == + +:tag-one:tag-two: + +== tables == + +| Year | Temperature (low) | Temperature (high) | +|------|-------------------|--------------------| +| 1900 | -10 | 25 | +| 1910 | -15 | 30 | +| 1920 | -10 | 32 | +| 1930 | _N/A_ | _N/A_ | +| 1940 | -2 | 40 | + + +=== centered headerless tables === + | a | b | + | c | d | + + +== paragraphs == + +This is first paragraph +with two lines. + + + + + + + + +This is a second paragraph with +two lines after many blank lines. + +== definition list == + +Term 1:: Definition 1 +Term 2:: +:: Definition 2 + :: Definition 3 +Term :: *separated* by :: _double colons_ :: Def1 +:: Def2 +Term with lots of trailing colons::::::::: Definition +:: This is :: A term (rather than a definition) :: and this is a definition +Term Without definitions :: +:: +Part :: of :: dt :: part of ::dd + +:: Definition 1 without a term +:: Definition 2 without a term + +T1 :: D1 +new paragraph +T1 :: D1 + +Not::Definition + +Not ::Definition + +::Not definition + + :: blockquote + + block :: quote + +== metadata placeholders == +%title title +%date 2017-05-01 + +%title second title is ignored +%date second date is ignored + +%this is not a placeholder + +placeholders +%title another title +%date 2017-04-23 +serves as space / softbreak in paragraphs + + +== sup, sub == + +super^script^ + +sub,,script,, + +== the todo mark == +TODO: + += _*not implemented yet*_ = +== tables with spans == +| a | b | c | d | +| \/ | e | > | f | +| \/ | \/ | > | g | +| h | > | > | > | + +== tables with multiple lines of headers == +| a | b | +| c | d | +|---|---| + +== some other placeholders == +`template` placeholder is ignored. +%template template + +`nohtml` placeholder is ignored. +%nohtml + + -- cgit v1.2.3 From 4929d027dc57151dc7f009347478b35b90d2373b Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 19 Jun 2017 23:16:21 +0300 Subject: Muse reader: fix list item continuation parsing (#3747) --- src/Text/Pandoc/Readers/Muse.hs | 4 ++-- test/Tests/Readers/Muse.hs | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index bc9da26cb..c1ea1354b 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -295,9 +295,9 @@ withListContext p = do listContinuation :: PandocMonad m => Int -> MuseParser m String listContinuation markerLength = try $ do - result <- many1 $ listLine markerLength blanks <- many1 blankline - return $ concat result ++ blanks + result <- many1 $ listLine markerLength + return $ blanks ++ concat result listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int listStart marker = try $ do diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 5a896da55..85d6f5c48 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -260,5 +260,18 @@ tests = ] ] ] + , "List continuation" =: + T.unlines + [ " - a" + , "" + , " b" + , "" + , " c" + ] =?> + bulletList [ mconcat [ para "a" + , para "b" + , para "c" + ] + ] ] ] -- cgit v1.2.3 From 814ac51d3228eeb3bbcbf78a8a88a43cd11d23dd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2017 22:04:01 +0200 Subject: Separated tracing from logging. Formerly tracing was just log messages with a DEBUG log level. We now make these things independent. Tracing can be turned on or off in PandocMonad using `setTrace`; it is independent of logging. * Removed `DEBUG` from `Verbosity`. * Removed `ParserTrace` from `LogMessage`. * Added `trace`, `setTrace` to `PandocMonad`. --- src/Text/Pandoc/App.hs | 7 +++++-- src/Text/Pandoc/Class.hs | 27 ++++++++++++++++++++++----- src/Text/Pandoc/Logging.hs | 12 +----------- src/Text/Pandoc/Readers/HTML.hs | 5 ++--- src/Text/Pandoc/Readers/Markdown.hs | 6 ++---- src/Text/Pandoc/Readers/MediaWiki.hs | 5 ++--- src/Text/Pandoc/Readers/Muse.hs | 5 ++--- src/Text/Pandoc/Readers/TWiki.hs | 6 ++---- src/Text/Pandoc/Readers/Textile.hs | 6 ++---- 9 files changed, 40 insertions(+), 39 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 033614752..2c5e1de6b 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -76,7 +76,7 @@ import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, - setResourcePath, withMediaBag) + setResourcePath, withMediaBag, setTrace) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.Lua (runLuaFilter) import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL) @@ -391,6 +391,7 @@ convertWithOpts opts = do let runIO' :: PandocIO a -> IO a runIO' f = do (res, reports) <- runIOorExplode $ do + setTrace (optTrace opts) setVerbosity verbosity x <- f rs <- getLog @@ -559,6 +560,7 @@ data Opt = Opt , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output + , optTrace :: Bool -- ^ Enable tracing , optLogFile :: Maybe FilePath -- ^ File to write JSON log output , optFailIfWarnings :: Bool -- ^ Fail on warnings , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst @@ -633,6 +635,7 @@ defaultOpts = Opt , optDumpArgs = False , optIgnoreArgs = False , optVerbosity = WARNING + , optTrace = False , optLogFile = Nothing , optFailIfWarnings = False , optReferenceLinks = False @@ -1390,7 +1393,7 @@ options = , Option "" ["trace"] (NoArg - (\opt -> return opt { optVerbosity = DEBUG })) + (\opt -> return opt { optTrace = True })) "" -- "Turn on diagnostic tracing in readers." , Option "" ["dump-args"] diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 8db2e214e..a7194f8d5 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Class ( PandocMonad(..) , getZonedTime , readFileFromDirs , report + , setTrace , getLog , setVerbosity , getMediaBag @@ -78,7 +79,7 @@ import qualified Text.Pandoc.Shared as IO ( readDataFile import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging -import Text.Parsec (ParsecT) +import Text.Parsec (ParsecT, getPosition) import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Pandoc.Definition @@ -117,6 +118,7 @@ import System.IO.Error import System.IO (stderr) import qualified Data.Map as M import Text.Pandoc.Error +import qualified Debug.Trace class (Functor m, Applicative m, Monad m, MonadError PandocError m) => PandocMonad m where @@ -140,6 +142,11 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) modifyCommonState :: (CommonState -> CommonState) -> m () modifyCommonState f = getCommonState >>= putCommonState . f + trace :: String -> m () + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) + logOutput :: LogMessage -> m () -- Functions defined for all PandocMonad instances @@ -155,10 +162,11 @@ report :: PandocMonad m => LogMessage -> m () report msg = do verbosity <- getsCommonState stVerbosity let level = messageVerbosity msg - when (level <= verbosity) $ - logOutput msg - unless (level == DEBUG) $ - modifyCommonState $ \st -> st{ stLog = msg : stLog st } + when (level <= verbosity) $ logOutput msg + modifyCommonState $ \st -> st{ stLog = msg : stLog st } + +setTrace :: PandocMonad m => Bool -> m () +setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} setMediaBag :: PandocMonad m => MediaBag -> m () setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} @@ -208,6 +216,7 @@ data CommonState = CommonState { stLog :: [LogMessage] , stOutputFile :: Maybe FilePath , stResourcePath :: [FilePath] , stVerbosity :: Verbosity + , stTrace :: Bool } instance Default CommonState where @@ -217,6 +226,7 @@ instance Default CommonState where , stOutputFile = Nothing , stResourcePath = ["."] , stVerbosity = WARNING + , stTrace = False } runIO :: PandocIO a -> IO (Either PandocError a) @@ -561,8 +571,15 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where getModificationTime = lift . getModificationTime getCommonState = lift getCommonState putCommonState = lift . putCommonState + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ do + pos <- getPosition + Debug.Trace.trace + ("[trace] Parsed " ++ msg ++ " at " ++ show pos) (return ()) logOutput = lift . logOutput + instance PandocMonad m => PandocMonad (ReaderT r m) where lookupEnv = lift . lookupEnv getCurrentTime = lift getCurrentTime diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index b31c33d4e..4090243ea 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -52,7 +52,7 @@ import Text.Pandoc.Definition import Text.Parsec.Pos -- | Verbosity level. -data Verbosity = ERROR | WARNING | INFO | DEBUG +data Verbosity = ERROR | WARNING | INFO deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic) instance ToJSON Verbosity where @@ -63,7 +63,6 @@ instance FromJSON Verbosity where "ERROR" -> return ERROR "WARNING" -> return WARNING "INFO" -> return INFO - "DEBUG" -> return DEBUG _ -> mzero parseJSON _ = mzero @@ -78,7 +77,6 @@ data LogMessage = | CircularReference String SourcePos | ParsingUnescaped String SourcePos | CouldNotLoadIncludeFile String SourcePos - | ParsingTrace String SourcePos | InlineNotRendered Inline | BlockNotRendered Block | DocxParserWarning String @@ -151,11 +149,6 @@ instance ToJSON LogMessage where "source" .= Text.pack (sourceName pos), "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] - ParsingTrace s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= sourceLine pos, - "column" .= sourceColumn pos] InlineNotRendered il -> ["contents" .= toJSON il] BlockNotRendered bl -> @@ -228,8 +221,6 @@ showLogMessage msg = "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos CouldNotLoadIncludeFile fp pos -> "Could not load include file '" ++ fp ++ "' at " ++ showPos pos - ParsingTrace s pos -> - "Parsing trace at " ++ showPos pos ++ ": " ++ s InlineNotRendered il -> "Not rendering " ++ show il BlockNotRendered bl -> @@ -281,7 +272,6 @@ messageVerbosity msg = CircularReference{} -> WARNING CouldNotLoadIncludeFile{} -> WARNING ParsingUnescaped{} -> INFO - ParsingTrace{} -> DEBUG InlineNotRendered{} -> INFO BlockNotRendered{} -> INFO DocxParserWarning{} -> WARNING diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 94f933c4d..e203298b8 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -71,7 +71,7 @@ import Data.Monoid ((<>)) import Text.Parsec.Error import qualified Data.Set as Set import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Control.Monad.Except (throwError) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -162,7 +162,6 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag block :: PandocMonad m => TagParser m Blocks block = do - pos <- getPosition res <- choice [ eSection , eSwitch B.para block @@ -182,7 +181,7 @@ block = do , pPlain , pRawHtmlBlock ] - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res namespaces :: PandocMonad m => [(String, TagParser m Inlines)] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e1c481311..edb356b39 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -52,7 +52,7 @@ import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..), report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) import Text.Pandoc.Logging @@ -488,7 +488,6 @@ parseBlocks = mconcat <$> manyTill block eof block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock @@ -514,8 +513,7 @@ block = do , para , plain ] "block" - report $ ParsingTrace - (take 60 $ show $ B.toList $ runF res defaultParserState) pos + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index a3ff60c14..e371ff152 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -52,7 +52,7 @@ import qualified Data.Set as Set import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options @@ -205,7 +205,6 @@ parseMediaWiki = do block :: PandocMonad m => MWParser m Blocks block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> table <|> header @@ -218,7 +217,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index c1ea1354b..ac19a2382 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -53,7 +53,7 @@ import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options @@ -166,12 +166,11 @@ directive = do block :: PandocMonad m => MuseParser m (F Blocks) block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> blockElements <|> para skipMany blankline - report $ ParsingTrace (take 60 $ show $ B.toList $ runF res defaultParserState) pos + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res blockElements :: PandocMonad m => MuseParser m (F Blocks) diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9e544c4ac..91ee8d1f1 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -42,9 +42,8 @@ import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition -import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) @@ -133,12 +132,11 @@ parseTWiki = do block :: PandocMonad m => TWParser m B.Blocks block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> blockElements <|> para skipMany blankline - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 1669e3e51..96b51feef 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -61,10 +61,9 @@ import Text.HTML.TagSoup (Tag (..), fromAttrib) import Text.HTML.TagSoup.Match import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.CSS import Text.Pandoc.Definition -import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) @@ -143,8 +142,7 @@ blockParsers = [ codeBlock block :: PandocMonad m => ParserT [Char] ParserState m Blocks block = do res <- choice blockParsers "block" - pos <- getPosition - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks -- cgit v1.2.3 From b6a38ed1114eae604706694aaca920b86ed28385 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2017 22:29:01 +0200 Subject: Vimwiki reader: adjusted for changes in trace. --- src/Text/Pandoc/Readers/Vimwiki.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 07e23fa1e..0cfbec34d 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -77,11 +77,10 @@ import qualified Text.Pandoc.Builder spanWith, para, horizontalRule, blockQuote, bulletList, plain, orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, setMeta, definitionList, superscript, subscript) -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition (Pandoc(..), Inline(Space), Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), ListNumberDelim(..)) -import Text.Pandoc.Logging (LogMessage(ParsingTrace)) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, @@ -91,7 +90,7 @@ import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, alphaNum) import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, notFollowedBy, option) -import Text.Parsec.Prim (many, getPosition, try, updateState, getState) +import Text.Parsec.Prim (many, try, updateState, getState) import Text.Parsec.Char (oneOf, space) import Text.Parsec.Combinator (lookAhead, between) import Text.Parsec.Prim ((<|>)) @@ -129,7 +128,6 @@ parseVimwiki = do block :: PandocMonad m => VwParser m Blocks block = do - pos <- getPosition res <- choice [ mempty <$ blanklines , header , hrule @@ -143,7 +141,7 @@ block = do , definitionList , para ] - report $ ParsingTrace (take 60 $ show $ toList res) pos + trace (take 60 $ show $ toList res) return res blockML :: PandocMonad m => VwParser m Blocks -- cgit v1.2.3 From 328655e8636bc524829ae56ffd5ef15ad21f3917 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2017 22:41:09 +0200 Subject: Tracing: give less misleading line information with parseWithString. Previously positions would be reported past the end of the chunk. We now reset the source position within the chunk and report positions "in chunk." --- src/Text/Pandoc/Class.hs | 9 +++++++-- src/Text/Pandoc/Parsing.hs | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index a7194f8d5..120ba8fee 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -79,7 +79,7 @@ import qualified Text.Pandoc.Shared as IO ( readDataFile import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Compat.Time (UTCTime) import Text.Pandoc.Logging -import Text.Parsec (ParsecT, getPosition) +import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Pandoc.Definition @@ -576,7 +576,12 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where when tracing $ do pos <- getPosition Debug.Trace.trace - ("[trace] Parsed " ++ msg ++ " at " ++ show pos) (return ()) + ("[trace] Parsed " ++ msg ++ " at line " ++ + show (sourceLine pos) ++ + if sourceName pos == "chunk" + then " of chunk" + else "") + (return ()) logOutput = lift . logOutput diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cd51bff69..eb5b37f40 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -189,7 +189,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec hiding (token) -import Text.Parsec.Pos (newPos) +import Text.Parsec.Pos (newPos, initialPos) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace, isPunctuation ) import Data.List ( intercalate, transpose, isSuffixOf ) @@ -366,6 +366,7 @@ parseFromString :: Monad m -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition + setPosition $ initialPos "chunk" oldInput <- getInput setInput str result <- parser -- cgit v1.2.3 From 6a077ac9c79ac16d6af5409976e48ad80f42fd01 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 11:21:32 +0200 Subject: Fixed footnotes in table captions. Note that if the table has a first page header and a continuation page header, the notes will appear only on the first occurrence of the header. Closes #2378. --- src/Text/Pandoc/Writers/LaTeX.hs | 34 ++++++++++++++++++---------------- test/command/2378.md | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) create mode 100644 test/command/2378.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e0ea9acfe..88ff454ce 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -647,23 +647,25 @@ blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = False} return hdr blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else do - contents <- (tableRowToLaTeX True aligns widths) heads - return ("\\toprule" $$ contents $$ "\\midrule") - let endhead = if all null heads - then empty - else text "\\endhead" - let endfirsthead = if all null heads - then empty - else text "\\endfirsthead" + let toHeaders hs = do contents <- (tableRowToLaTeX True aligns widths) hs + return ("\\toprule" $$ contents $$ "\\midrule") + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x captionText <- inlineListToLaTeX caption + firsthead <- if isEmpty captionText || all null heads + then return empty + else ($$ text "\\endfirsthead") <$> toHeaders heads + head' <- if all null heads + then return empty + -- avoid duplicate notes in head and firsthead: + else ($$ text "\\endhead") <$> + toHeaders (if isEmpty firsthead + then heads + else walk removeNote heads) let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText <> "\\tabularnewline" - $$ headers - $$ endfirsthead + else text "\\caption" <> + braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } @@ -671,9 +673,9 @@ blockToLaTeX (Table caption aligns widths heads rows) = do braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt + $$ firsthead $$ (if all null heads then "\\toprule" else empty) - $$ headers - $$ endhead + $$ head' $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" diff --git a/test/command/2378.md b/test/command/2378.md new file mode 100644 index 000000000..801c168ad --- /dev/null +++ b/test/command/2378.md @@ -0,0 +1,27 @@ +Ensure that we don't get duplicated footnotes when +a note occurs in a header cell and `\endfirsthead` +is used. + +``` +% pandoc -t latex +| x | y[^fn] | +|-|-| +|1|2| +: a table + +[^fn]: a footnote +^D +\begin{longtable}[]{@{}ll@{}} +\caption{a table}\tabularnewline +\toprule +x & y\footnote{a footnote}\tabularnewline +\midrule +\endfirsthead +\toprule +x & y{}\tabularnewline +\midrule +\endhead +1 & 2\tabularnewline +\bottomrule +\end{longtable} +``` -- cgit v1.2.3 From b26d3c45225184a882baad5cfb287f030d966104 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 14:21:43 +0200 Subject: FB2 writer: don't fail with an error on interior headers (e.g. in list). Instead, omit them with an INFO message. Closes #3750. --- src/Text/Pandoc/Writers/FB2.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 20f94c185..f561133fb 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -37,7 +37,7 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.Except (catchError, throwError) +import Control.Monad.Except (catchError) import Control.Monad.State.Strict (StateT, evalStateT, get, lift, modify) import Control.Monad.State.Strict (liftM) import Data.ByteString.Base64 (encode) @@ -371,8 +371,10 @@ blockToXml (DefinitionList defs) = needsBreak (Para _) = False needsBreak (Plain ins) = LineBreak `notElem` ins needsBreak _ = True -blockToXml (Header _ _ _) = -- should never happen, see renderSections - throwError $ PandocShouldNeverHappenError "unexpected header in section text" +blockToXml h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return [] blockToXml HorizontalRule = return [ el "empty-line" () , el "p" (txt (replicate 10 '—')) -- cgit v1.2.3 From ea1724e35e5ac008ef1293cb7b2b49595392c38e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 14:22:19 +0200 Subject: Docbook, JATS, TEI writers: print INFO message when omitting interior header. This only applies to section headers inside list items, e.g., which were otherwise silently omitted. See #3750. --- src/Text/Pandoc/Writers/Docbook.hs | 6 ++++-- src/Text/Pandoc/Writers/JATS.hs | 6 ++++-- src/Text/Pandoc/Writers/TEI.hs | 10 ++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 02ffbf831..a0e69ffb4 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -217,8 +217,10 @@ blockToDocbook opts (Div (ident,_,_) bs) = do (if null ident then mempty else selfClosingTag "anchor" [("id", ident)]) $$ contents -blockToDocbook _ (Header _ _ _) = - return empty -- should not occur after hierarchicalize +blockToDocbook _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 1a8d80747..11f3b0c22 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -203,8 +203,10 @@ blockToJATS opts (Div (ident,_,kvs) bs) = do [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True "boxed-text" attr contents -blockToJATS _ (Header _ _ _) = - return empty -- should not occur after hierarchicalize +blockToJATS _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- No Plain, everything needs to be in a block-level tag blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) -- title beginning with fig: indicates that the image is a figure diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 27d26c7d9..86a7415cf 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -159,11 +159,13 @@ blockToTEI opts (Div (ident,_,_) [Para lst]) = do let attribs = [("id", ident) | not (null ident)] inTags False "p" attribs <$> inlinesToTEI opts lst blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs -blockToTEI _ (Header _ _ _) = return empty --- should not occur after hierarchicalize +blockToTEI _ h@(Header _ _ _) = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- For TEI simple, text must be within containing block element, so --- we use plainToPara to ensure that Plain text ends up contained by --- something. +-- we use treat as Para to ensure that Plain text ends up contained by +-- something: blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- title beginning with fig: indicates that the image is a figure --blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = -- cgit v1.2.3 From 21925284244bb88f927c287c21b48df35234b260 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Tue, 20 Jun 2017 15:48:00 +0300 Subject: Muse reader: check that headers start at the first column (#3749) --- src/Text/Pandoc/Readers/Muse.hs | 2 ++ test/Tests/Readers/Muse.hs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index ac19a2382..84121cabe 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -205,6 +205,8 @@ separator = try $ do header :: PandocMonad m => MuseParser m (F Blocks) header = try $ do + st <- stateParserContext <$> getState + getPosition >>= \pos -> guard (st == NullState && sourceColumn pos == 1) level <- liftM length $ many1 $ char '*' guard $ level <= 5 skipSpaces diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 85d6f5c48..cac69dffa 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -108,6 +108,12 @@ tests = , "Subsubsection" =: "***** Fifth level\n" =?> header 5 "Fifth level" + , "No headers below top level" =: + T.unlines [ "Foo[1]" + , "[1] * Bar" + ] =?> + para (text "Foo" <> + note (para "* Bar")) ] , testGroup "Footnotes" [ "Simple footnote" =: @@ -273,5 +279,16 @@ tests = , para "c" ] ] + -- Headers in first column of list continuation are not allowed + , "No headers in list continuation" =: + T.unlines + [ " - Foo" + , "" + , " * Bar" + ] =?> + bulletList [ mconcat [ para "Foo" + , para "* Bar" + ] + ] ] ] -- cgit v1.2.3 From b78afbd9803c75fcf2db32b4ce4ded1b8fa0224a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 16:09:33 +0200 Subject: Text.Pandoc.Lua: throw LuaException instead of using 'error'. Text.Pandoc.App: trap LuaException and issue a PandocFilterError. --- src/Text/Pandoc/App.hs | 12 +++++++++--- src/Text/Pandoc/Lua.hs | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 2c5e1de6b..9778911ba 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -78,7 +78,7 @@ import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, setResourcePath, withMediaBag, setTrace) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Lua (runLuaFilter) +import Text.Pandoc.Lua (runLuaFilter, LuaException(..)) import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) @@ -782,10 +782,16 @@ expandFilterPath mbDatadir fp = liftIO $ do _ -> return fp applyLuaFilters :: MonadIO m - => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc + => Maybe FilePath -> [FilePath] -> [String] -> Pandoc + -> m Pandoc applyLuaFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters - foldrM ($) d $ map (flip runLuaFilter args) expandedFilters + let go f d' = liftIO $ do + res <- E.try (runLuaFilter f args d') + case res of + Right x -> return x + Left (LuaException s) -> E.throw (PandocFilterError f s) + foldrM ($) d $ map go expandedFilters applyFilters :: MonadIO m => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index f74c0e425..e9184c7ce 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -28,11 +28,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Pandoc lua utils. -} -module Text.Pandoc.Lua ( runLuaFilter, pushPandocModule ) where +module Text.Pandoc.Lua ( LuaException(..), + runLuaFilter, + pushPandocModule ) where +import Control.Exception import Control.Monad (unless, when, (>=>)) import Control.Monad.Trans (MonadIO (..)) import Data.Map (Map) +import Data.Typeable (Typeable) import Scripting.Lua (LuaState, StackValue (..)) import Text.Pandoc.Definition import Text.Pandoc.Lua.PandocModule (pushPandocModule) @@ -42,6 +46,11 @@ import Text.Pandoc.Walk import qualified Data.Map as Map import qualified Scripting.Lua as Lua +data LuaException = LuaException String + deriving (Show, Typeable) + +instance Exception LuaException + runLuaFilter :: (MonadIO m) => FilePath -> [String] -> Pandoc -> m Pandoc runLuaFilter filterPath args pd = liftIO $ do @@ -59,7 +68,7 @@ runLuaFilter filterPath args pd = liftIO $ do if (status /= 0) then do Just luaErrMsg <- Lua.peek lua 1 - error luaErrMsg + throwIO (LuaException luaErrMsg) else do Lua.call lua 0 Lua.multret newtop <- Lua.gettop lua @@ -195,8 +204,9 @@ instance StackValue a => PushViaFilterFunction (IO a) where Lua.call lua num 1 mbres <- Lua.peek lua (-1) case mbres of - Nothing -> error $ "Error while trying to get a filter's return " - ++ "value from lua stack." + Nothing -> throwIO $ LuaException + ("Error while trying to get a filter's return " + ++ "value from lua stack.") Just res -> res <$ Lua.pop lua 1 instance (StackValue a, PushViaFilterFunction b) => @@ -225,7 +235,8 @@ instance StackValue LuaFilterFunction where push lua v = pushFilterFunction lua v peek lua i = do isFn <- Lua.isfunction lua i - unless isFn (error $ "Not a function at index " ++ (show i)) + unless isFn (throwIO $ LuaException $ + "Not a function at index " ++ (show i)) Lua.pushvalue lua i push lua ("PANDOC_FILTER_FUNCTIONS"::String) Lua.rawget lua Lua.registryindex -- cgit v1.2.3 From 32f86067ecea5c8e63952dfec4fb30999d2a1f77 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 16:16:32 +0200 Subject: App: issue proper errors instead of using 'error'. --- src/Text/Pandoc/App.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 9778911ba..12429b51d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -520,7 +520,8 @@ externalFilter f args' d = liftIO $ do (exitcode, outbs) <- E.handle filterException $ pipeProcess env' f' args'' $ encode d case exitcode of - ExitSuccess -> return $ either error id $ eitherDecode' outbs + ExitSuccess -> either (E.throwIO . PandocFilterError f) + return $ eitherDecode' outbs ExitFailure ec -> E.throwIO $ PandocFilterError f ("Filter returned error status " ++ show ec) where filterException :: E.SomeException -> IO a @@ -978,7 +979,7 @@ options = templ <- getDefaultTemplate Nothing arg case templ of Right t -> UTF8.hPutStr stdout t - Left e -> error $ show e + Left e -> E.throwIO $ PandocAppError (show e) exitSuccess) "FORMAT") "" -- "Print default template for FORMAT" -- cgit v1.2.3 From 429c4620dfe6455bb2d40c7388da384692f85031 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 16:44:05 +0200 Subject: Removed redundant import. --- src/Text/Pandoc/Writers/FB2.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index f561133fb..4c764d987 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -54,7 +54,6 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara, -- cgit v1.2.3 From bd5a7e525800b41752e422dc9fb6e47ed8bf4479 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 Jun 2017 19:20:50 +0200 Subject: Lua: apply hslint suggestions --- src/Text/Pandoc/Lua.hs | 65 +++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index e9184c7ce..7cdcfd3d3 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -46,7 +46,7 @@ import Text.Pandoc.Walk import qualified Data.Map as Map import qualified Scripting.Lua as Lua -data LuaException = LuaException String +newtype LuaException = LuaException String deriving (Show, Typeable) instance Exception LuaException @@ -65,7 +65,7 @@ runLuaFilter filterPath args pd = liftIO $ do Lua.setglobal lua "pandoc" top <- Lua.gettop lua status <- Lua.loadfile lua filterPath - if (status /= 0) + if status /= 0 then do Just luaErrMsg <- Lua.peek lua 1 throwIO (LuaException luaErrMsg) @@ -89,8 +89,7 @@ pushGlobalFilter lua = *> Lua.rawseti lua (-2) 1 runAll :: [LuaFilter] -> Pandoc -> IO Pandoc -runAll [] = return -runAll (x:xs) = walkMWithLuaFilter x >=> runAll xs +runAll = foldr ((>=>) . walkMWithLuaFilter) return walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc walkMWithLuaFilter (LuaFilter lua fnMap) = @@ -134,20 +133,20 @@ execBlockLuaFilter lua fnMap x = do Nothing -> return x Just fn -> runLuaFilterFunction lua fn x case x of - BlockQuote _ -> tryFilter "BlockQuote" - BulletList _ -> tryFilter "BulletList" - CodeBlock _ _ -> tryFilter "CodeBlock" - DefinitionList _ -> tryFilter "DefinitionList" - Div _ _ -> tryFilter "Div" - Header _ _ _ -> tryFilter "Header" + BlockQuote{} -> tryFilter "BlockQuote" + BulletList{} -> tryFilter "BulletList" + CodeBlock{} -> tryFilter "CodeBlock" + DefinitionList{} -> tryFilter "DefinitionList" + Div{} -> tryFilter "Div" + Header{} -> tryFilter "Header" HorizontalRule -> tryFilter "HorizontalRule" - LineBlock _ -> tryFilter "LineBlock" + LineBlock{} -> tryFilter "LineBlock" Null -> tryFilter "Null" - Para _ -> tryFilter "Para" - Plain _ -> tryFilter "Plain" - RawBlock _ _ -> tryFilter "RawBlock" - OrderedList _ _ -> tryFilter "OrderedList" - Table _ _ _ _ _ -> tryFilter "Table" + Para{} -> tryFilter "Para" + Plain{} -> tryFilter "Plain" + RawBlock{} -> tryFilter "RawBlock" + OrderedList{} -> tryFilter "OrderedList" + Table{} -> tryFilter "Table" execInlineLuaFilter :: LuaState -> FunctionMap @@ -165,27 +164,27 @@ execInlineLuaFilter lua fnMap x = do Nothing -> tryFilterAlternatives alternatives Just fn -> runLuaFilterFunction lua fn x case x of - Cite _ _ -> tryFilter "Cite" - Code _ _ -> tryFilter "Code" - Emph _ -> tryFilter "Emph" - Image _ _ _ -> tryFilter "Image" + Cite{} -> tryFilter "Cite" + Code{} -> tryFilter "Code" + Emph{} -> tryFilter "Emph" + Image{} -> tryFilter "Image" LineBreak -> tryFilter "LineBreak" - Link _ _ _ -> tryFilter "Link" + Link{} -> tryFilter "Link" Math DisplayMath _ -> tryFilterAlternatives ["DisplayMath", "Math"] Math InlineMath _ -> tryFilterAlternatives ["InlineMath", "Math"] - Note _ -> tryFilter "Note" + Note{} -> tryFilter "Note" Quoted DoubleQuote _ -> tryFilterAlternatives ["DoubleQuoted", "Quoted"] Quoted SingleQuote _ -> tryFilterAlternatives ["SingleQuoted", "Quoted"] - RawInline _ _ -> tryFilter "RawInline" - SmallCaps _ -> tryFilter "SmallCaps" + RawInline{} -> tryFilter "RawInline" + SmallCaps{} -> tryFilter "SmallCaps" SoftBreak -> tryFilter "SoftBreak" Space -> tryFilter "Space" - Span _ _ -> tryFilter "Span" - Str _ -> tryFilter "Str" - Strikeout _ -> tryFilter "Strikeout" - Strong _ -> tryFilter "Strong" - Subscript _ -> tryFilter "Subscript" - Superscript _ -> tryFilter "Superscript" + Span{} -> tryFilter "Span" + Str{} -> tryFilter "Str" + Strikeout{} -> tryFilter "Strikeout" + Strong{} -> tryFilter "Strong" + Subscript{} -> tryFilter "Subscript" + Superscript{} -> tryFilter "Superscript" instance StackValue LuaFilter where valuetype _ = Lua.TTABLE @@ -232,11 +231,11 @@ pushFilterFunction lua lf = do instance StackValue LuaFilterFunction where valuetype _ = Lua.TFUNCTION - push lua v = pushFilterFunction lua v + push = pushFilterFunction peek lua i = do isFn <- Lua.isfunction lua i - unless isFn (throwIO $ LuaException $ - "Not a function at index " ++ (show i)) + unless isFn . + throwIO . LuaException $ "Not a function at index " ++ show i Lua.pushvalue lua i push lua ("PANDOC_FILTER_FUNCTIONS"::String) Lua.rawget lua Lua.registryindex -- cgit v1.2.3 From f4c12606e170ffaf558d07c21514ef5dd44d1b40 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 Jun 2017 20:51:10 +0200 Subject: Lua: use registry to store function references Using the registry directly instead of a custom table is cleaner and more efficient. The performance improvement is especially noticable when filtering on frequent elements like Str. --- src/Text/Pandoc/Lua.hs | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 7cdcfd3d3..f965bd95d 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -56,10 +56,6 @@ runLuaFilter :: (MonadIO m) runLuaFilter filterPath args pd = liftIO $ do lua <- Lua.newstate Lua.openlibs lua - -- create table in registry to store filter functions - Lua.push lua "PANDOC_FILTER_FUNCTIONS" - Lua.newtable lua - Lua.rawset lua Lua.registryindex -- store module in global "pandoc" pushPandocModule lua Lua.setglobal lua "pandoc" @@ -110,7 +106,7 @@ execDocLuaFilter lua fnMap x = do let docFnName = "Doc" case Map.lookup docFnName fnMap of Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x + Just fn -> runFilterFunction lua fn x execMetaLuaFilter :: LuaState -> FunctionMap @@ -120,7 +116,7 @@ execMetaLuaFilter lua fnMap pd@(Pandoc meta blks) = do case Map.lookup metaFnName fnMap of Nothing -> return pd Just fn -> do - meta' <- runLuaFilterFunction lua fn meta + meta' <- runFilterFunction lua fn meta return $ Pandoc meta' blks execBlockLuaFilter :: LuaState @@ -131,7 +127,7 @@ execBlockLuaFilter lua fnMap x = do tryFilter filterFnName = case Map.lookup filterFnName fnMap of Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x + Just fn -> runFilterFunction lua fn x case x of BlockQuote{} -> tryFilter "BlockQuote" BulletList{} -> tryFilter "BulletList" @@ -156,13 +152,13 @@ execInlineLuaFilter lua fnMap x = do tryFilter filterFnName = case Map.lookup filterFnName fnMap of Nothing -> return x - Just fn -> runLuaFilterFunction lua fn x + Just fn -> runFilterFunction lua fn x let tryFilterAlternatives :: [String] -> IO Inline tryFilterAlternatives [] = return x tryFilterAlternatives (fnName : alternatives) = case Map.lookup fnName fnMap of Nothing -> tryFilterAlternatives alternatives - Just fn -> runLuaFilterFunction lua fn x + Just fn -> runFilterFunction lua fn x case x of Cite{} -> tryFilter "Cite" Code{} -> tryFilter "Code" @@ -213,34 +209,28 @@ instance (StackValue a, PushViaFilterFunction b) => pushViaFilterFunction' lua lf pushArgs num x = pushViaFilterFunction' lua lf (pushArgs *> push lua x) (num + 1) --- | Push an value to the stack via a lua filter function. The function is +-- | Push a value to the stack via a lua filter function. The filter function is -- called with all arguments that are passed to this function and is expected to -- return a single value. -runLuaFilterFunction :: PushViaFilterFunction a +runFilterFunction :: PushViaFilterFunction a => LuaState -> LuaFilterFunction -> a -runLuaFilterFunction lua lf = pushViaFilterFunction' lua lf (return ()) 0 +runFilterFunction lua lf = pushViaFilterFunction' lua lf (return ()) 0 -- | Push the filter function to the top of the stack. pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO () -pushFilterFunction lua lf = do +pushFilterFunction lua lf = -- The function is stored in a lua registry table, retrieve it from there. - push lua ("PANDOC_FILTER_FUNCTIONS"::String) - Lua.rawget lua Lua.registryindex - Lua.rawgeti lua (-1) (functionIndex lf) - Lua.remove lua (-2) -- remove registry table from stack + Lua.rawgeti lua Lua.registryindex (functionIndex lf) + +registerFilterFunction :: LuaState -> Int -> IO LuaFilterFunction +registerFilterFunction lua idx = do + isFn <- Lua.isfunction lua idx + unless isFn . throwIO . LuaException $ "Not a function at index " ++ show idx + Lua.pushvalue lua idx + refIdx <- Lua.ref lua Lua.registryindex + return $ LuaFilterFunction refIdx instance StackValue LuaFilterFunction where valuetype _ = Lua.TFUNCTION push = pushFilterFunction - peek lua i = do - isFn <- Lua.isfunction lua i - unless isFn . - throwIO . LuaException $ "Not a function at index " ++ show i - Lua.pushvalue lua i - push lua ("PANDOC_FILTER_FUNCTIONS"::String) - Lua.rawget lua Lua.registryindex - len <- Lua.objlen lua (-1) - Lua.insert lua (-2) - Lua.rawseti lua (-2) (len + 1) - Lua.pop lua 1 - return . Just $ LuaFilterFunction (len + 1) + peek = fmap (fmap Just) . registerFilterFunction -- cgit v1.2.3 From 5ec84bfeb42e73acb4e309ccde34905b3254fb5c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 21:11:01 +0200 Subject: Text.Pandoc.Lua - added DeriveDataTypeable for ghc 7.8. --- src/Text/Pandoc/Lua.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index f965bd95d..90f72d685 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -1,3 +1,7 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE DeriveDataTypeable #-} {- Copyright © 2017 Albert Krewinkel @@ -15,9 +19,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017 Albert Krewinkel -- cgit v1.2.3 From 4ba5ef46aeaf979bd74d8f4a5f6cea116527ddd3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 21:25:39 +0200 Subject: Updated code example. --- src/Text/Pandoc.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 8ee1adf13..b8dba860a 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -39,12 +39,18 @@ inline links: > module Main where > import Text.Pandoc +> import Data.Text (Text) +> import qualified Data.Text.IO as T > -> markdownToRST :: String -> Either PandocError String -> markdownToRST = -> writeRST def {writerReferenceLinks = True} . readMarkdown def +> mdToRST :: Text -> IO Text +> mdToRST txt = runIOorExplode $ +> readMarkdown def txt +> >>= writeRST def{ writerReferenceLinks = True } + > -> main = getContents >>= either error return markdownToRST >>= putStrLn +> main :: IO () +> main = do +> T.getContents >>= mdToRST >>= T.putStrLn Note: all of the readers assume that the input text has @'\n'@ line endings. So if you get your input text from a web form, -- cgit v1.2.3 From 2363e6a15bdde1c206d65461bd2e21f773dbc808 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 21:52:13 +0200 Subject: Move CR filtering from tabFilter to the readers. The readers previously assumed that CRs had been filtered from the input. Now we strip the CRs in the readers themselves, before parsing. (The point of this is just to simplify the parsers.) Shared now exports a new function `crFilter`. [API change] And `tabFilter` no longer filters CRs. --- src/Text/Pandoc.hs | 4 - src/Text/Pandoc/App.hs | 4 +- src/Text/Pandoc/Readers/DocBook.hs | 5 +- src/Text/Pandoc/Readers/HTML.hs | 4 +- src/Text/Pandoc/Readers/Haddock.hs | 4 +- src/Text/Pandoc/Readers/LaTeX.hs | 3 +- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/MediaWiki.hs | 5 +- src/Text/Pandoc/Readers/Muse.hs | 3 +- src/Text/Pandoc/Readers/OPML.hs | 4 +- src/Text/Pandoc/Readers/Org.hs | 3 +- src/Text/Pandoc/Readers/RST.hs | 2 +- src/Text/Pandoc/Readers/TWiki.hs | 3 +- src/Text/Pandoc/Readers/Textile.hs | 4 +- src/Text/Pandoc/Readers/Txt2Tags.hs | 6 +- src/Text/Pandoc/Readers/Vimwiki.hs | 151 ++++++++++++++++++----------------- src/Text/Pandoc/Shared.hs | 12 ++- 17 files changed, 115 insertions(+), 104 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b8dba860a..9fa5f098d 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -52,10 +52,6 @@ inline links: > main = do > T.getContents >>= mdToRST >>= T.putStrLn -Note: all of the readers assume that the input text has @'\n'@ -line endings. So if you get your input text from a web form, -you should remove @'\r'@ characters using @filter (/='\r')@. - -} module Text.Pandoc diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 12429b51d..7e9cfdd95 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -381,8 +381,8 @@ convertWithOpts opts = do | otherwise -> [] let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" - then 0 - else optTabStop opts) + then 0 + else optTabStop opts) readSources :: [FilePath] -> PandocIO Text readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6108aae7f..c1e4d742c 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, crFilter) import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder @@ -526,7 +526,8 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp + let tree = normalizeTree . parseXML . handleInstructions + $ T.unpack $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e203298b8..301afa207 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, addMetaField - , escapeURI, safeRead ) + , escapeURI, safeRead, crFilter ) import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, Extension (Ext_epub_html_exts, Ext_raw_html, Ext_native_divs, Ext_native_spans)) @@ -82,7 +82,7 @@ readHtml :: PandocMonad m readHtml opts inp = do let tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - inp + (crFilter inp) parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index b22b71b96..a09ed8be9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -27,7 +27,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (splitBy, trim) +import Text.Pandoc.Shared (splitBy, trim, crFilter) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -35,7 +35,7 @@ readHaddock :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack s) of +readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1ac872933..090dc5fdb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -63,7 +63,8 @@ readLaTeX :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx) + parsed <- readWithM parseLaTeX def{ stateOptions = opts } + (unpack (crFilter ltx)) case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index edb356b39..96885c9b1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -74,7 +74,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e371ff152..a7f073d50 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,7 +58,8 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim) +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, + crFilter) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -77,7 +78,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack s ++ "\n") + (unpack (crFilter s) ++ "\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 84121cabe..7eee064a7 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Parsing hiding (macro, nested) import Text.Pandoc.Readers.HTML (htmlTag) import Text.Pandoc.XML (fromEntities) @@ -68,7 +69,7 @@ readMuse :: PandocMonad m -> Text -> m Pandoc readMuse opts s = do - res <- readWithM parseMuse def{ stateOptions = opts } (unpack s) + res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s)) case res of Left e -> throwError e Right d -> return d diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index e9f876525..c25ace800 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light @@ -32,7 +33,8 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML _ inp = do (bs, st') <- flip runStateT def - (mapM parseBlock $ normalizeTree $ parseXML (unpack inp)) + (mapM parseBlock $ normalizeTree $ + parseXML (unpack (crFilter inp))) return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e0d67d10..eaccc251c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) +import Text.Pandoc.Shared (crFilter) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) @@ -51,7 +52,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index fb5f6f2d4..d13f697b7 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -68,7 +68,7 @@ readRST :: PandocMonad m -> m Pandoc readRST opts s = do parsed <- (readWithM parseRST) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 91ee8d1f1..210d3e5aa 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -48,6 +48,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Shared (crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -58,7 +59,7 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case res of Left e -> throwError e Right d -> return d diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 96b51feef..a80d75340 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,7 +68,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (trim) +import Text.Pandoc.Shared (trim, crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -79,7 +79,7 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 260bb7fff..5708358f6 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -41,7 +41,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter) import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default @@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n") + let parsed = flip runReader meta $ + readWithM parseT2T (def {stateOptions = opts}) $ + T.unpack (crFilter s) ++ "\n\n" case parsed of Right result -> return $ result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 0cfbec34d..98f04eda9 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -33,10 +33,10 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] header * [X] hrule * [X] comment - * [X] blockquote - * [X] preformatted - * [X] displaymath - * [X] bulletlist / orderedlist + * [X] blockquote + * [X] preformatted + * [X] displaymath + * [X] bulletlist / orderedlist * [X] orderedlist with 1., i., a) etc identification. * [X] todo lists -- not list builder with attributes? using span. * [X] table @@ -57,8 +57,8 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] sub- and super-scripts * misc: * [X] `TODO:` mark - * [X] metadata placeholders: %title and %date - * [O] control placeholders: %template and %nohtml -- %template added to + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- %template added to meta, %nohtml ignored --} @@ -66,29 +66,29 @@ module Text.Pandoc.Readers.Vimwiki ( readVimwiki ) where import Control.Monad.Except (throwError) import Control.Monad (guard) -import Data.Default +import Data.Default import Data.Maybe import Data.Monoid ((<>)) import Data.List (isInfixOf, isPrefixOf) import Data.Text (Text, unpack) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList) -import qualified Text.Pandoc.Builder - as B (headerWith, str, space, strong, emph, strikeout, code, link, image, - spanWith, para, horizontalRule, blockQuote, bulletList, plain, - orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, +import qualified Text.Pandoc.Builder + as B (headerWith, str, space, strong, emph, strikeout, code, link, image, + spanWith, para, horizontalRule, blockQuote, bulletList, plain, + orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, setMeta, definitionList, superscript, subscript) import Text.Pandoc.Class (PandocMonad(..)) -import Text.Pandoc.Definition (Pandoc(..), Inline(Space), - Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), +import Text.Pandoc.Definition (Pandoc(..), Inline(Space), + Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), ListNumberDelim(..)) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, +import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, orderedListMarker, many1Till) -import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify) -import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, +import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter) +import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, alphaNum) -import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, +import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, notFollowedBy, option) import Text.Parsec.Prim (many, try, updateState, getState) import Text.Parsec.Char (oneOf, space) @@ -97,7 +97,8 @@ import Text.Parsec.Prim ((<|>)) readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } (unpack s) + res <- readWithM parseVimwiki def{ stateOptions = opts } + (unpack (crFilter s)) case res of Left e -> throwError e Right result -> return result @@ -110,7 +111,7 @@ type VwParser = ParserT [Char] ParserState specialChars :: [Char] specialChars = "=*-#[]_~{}`$|:%^," -spaceChars :: [Char] +spaceChars :: [Char] spaceChars = " \t\n" -- main parser @@ -134,7 +135,7 @@ block = do , mempty <$ comment , mixedList , preformatted - , displayMath + , displayMath , table , mempty <$ placeholder , blockQuote @@ -149,14 +150,14 @@ blockML = choice [preformatted, displayMath, table] header :: PandocMonad m => VwParser m Blocks header = try $ do - sp <- many spaceChar + sp <- many spaceChar eqs <- many1 (char '=') spaceChar let lev = length eqs guard $ lev <= 6 - contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar - >> (string eqs) >> many spaceChar >> newline) - attr <- registerHeader (makeId contents, + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> (string eqs) >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, (if sp == "" then [] else ["justcenter"]), []) contents return $ B.headerWith attr lev contents @@ -184,7 +185,7 @@ blockQuote = try $ do else return $ B.blockQuote $ B.plain contents definitionList :: PandocMonad m => VwParser m Blocks -definitionList = try $ +definitionList = try $ B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT)) dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) @@ -199,15 +200,15 @@ dlItemWithoutDT = do return $ (mempty, dds) definitionDef :: PandocMonad m => VwParser m Blocks -definitionDef = try $ - (notFollowedBy definitionTerm) >> many spaceChar +definitionDef = try $ + (notFollowedBy definitionTerm) >> many spaceChar >> (definitionDef1 <|> definitionDef2) definitionDef1 :: PandocMonad m => VwParser m Blocks definitionDef1 = try $ mempty <$ defMarkerE definitionDef2 :: PandocMonad m => VwParser m Blocks -definitionDef2 = try $ B.plain <$> +definitionDef2 = try $ B.plain <$> (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) @@ -218,11 +219,11 @@ definitionTerm = try $ do return x definitionTerm1 :: PandocMonad m => VwParser m Inlines -definitionTerm1 = try $ +definitionTerm1 = try $ trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE) definitionTerm2 :: PandocMonad m => VwParser m Inlines -definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM)) defMarkerM :: PandocMonad m => VwParser m Char @@ -236,8 +237,8 @@ hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do - many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") lookAhead newline contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) @@ -246,14 +247,14 @@ preformatted = try $ do else return $ B.codeBlockWith (makeAttr attrText) contents makeAttr :: String -> Attr -makeAttr s = +makeAttr s = let xs = splitBy (`elem` " \t") s in ("", [], catMaybes $ map nameValue xs) nameValue :: String -> Maybe (String, String) -nameValue s = +nameValue s = let t = splitBy (== '=') s in - if length t /= 2 + if length t /= 2 then Nothing else let (a, b) = (head t, last t) in if ((length b) < 2) || ((head b, last b) /= ('"', '"')) @@ -269,7 +270,7 @@ displayMath = try $ do >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = "\\[" ++ contents ++ "\n\\]" - | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents ++ "\n\\end{" ++ mathTag ++ "}" return $ B.plain $ B.str contentsWithTags @@ -286,7 +287,7 @@ mixedList' prevInd = do else do listStart curLine <- listItemContent - let listBuilder = + let listBuilder = if builder == "ul" then B.bulletList else B.orderedList (subList, lowInd) <- (mixedList' curInd) if lowInd >= curInd @@ -297,7 +298,7 @@ mixedList' prevInd = do then return ([listBuilder curList], endInd) else return (curList, endInd) else do - let (curList, endInd) = ((combineList curLine subList), + let (curList, endInd) = ((combineList curLine subList), lowInd) if curInd > prevInd then return ([listBuilder curList], endInd) @@ -328,13 +329,13 @@ blocksThenInline = try $ do return $ mconcat $ y ++ [x] listTodoMarker :: PandocMonad m => VwParser m Inlines -listTodoMarker = try $ do - x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) (oneOf " .oOX") return $ makeListMarkerSpan x makeListMarkerSpan :: Char -> Inlines -makeListMarkerSpan x = +makeListMarkerSpan x = let cl = case x of ' ' -> "done0" '.' -> "done1" @@ -347,9 +348,9 @@ makeListMarkerSpan x = combineList :: Blocks -> [Blocks] -> [Blocks] combineList x [y] = case toList y of - [BulletList z] -> [fromList $ (toList x) + [BulletList z] -> [fromList $ (toList x) ++ [BulletList z]] - [OrderedList attr z] -> [fromList $ (toList x) + [OrderedList attr z] -> [fromList $ (toList x) ++ [OrderedList attr z]] _ -> x:[y] combineList x xs = x:xs @@ -365,9 +366,9 @@ bulletListMarkers :: PandocMonad m => VwParser m String bulletListMarkers = "ul" <$ (char '*' <|> char '-') orderedListMarkers :: PandocMonad m => VwParser m String -orderedListMarkers = - ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) - <$> orderedListMarker +orderedListMarkers = + ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) + <$> orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -397,14 +398,14 @@ table2 = try $ do tableHeaderSeparator :: PandocMonad m => VwParser m () tableHeaderSeparator = try $ do - many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') + many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') >> many spaceChar >> newline return () - + tableRow :: PandocMonad m => VwParser m [Blocks] tableRow = try $ do many spaceChar >> char '|' - s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar >> newline)) guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") tr <- many tableCell @@ -416,25 +417,25 @@ tableCell = try $ B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|')) placeholder :: PandocMonad m => VwParser m () -placeholder = try $ +placeholder = try $ (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh ph :: PandocMonad m => String -> VwParser m () ph s = try $ do many spaceChar >> (string $ '%':s) >> spaceChar - contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) + contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) --use lookAhead because of placeholder in the whitespace parser let meta' = return $ B.setMeta s contents nullMeta :: F Meta updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } noHtmlPh :: PandocMonad m => VwParser m () noHtmlPh = try $ - () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar >> (lookAhead newline)) templatePh :: PandocMonad m => VwParser m () templatePh = try $ - () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") + () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") >> (lookAhead newline)) -- inline parser @@ -475,7 +476,7 @@ str :: PandocMonad m => VwParser m Inlines str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines -whitespace endline = B.space <$ (skipMany1 spaceChar <|> +whitespace endline = B.space <$ (skipMany1 spaceChar <|> (try (newline >> (comment <|> placeholder)))) <|> B.softbreak <$ endline @@ -493,24 +494,24 @@ bareURL = try $ do strong :: PandocMonad m => VwParser m Inlines strong = try $ do s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '*' - contents <- mconcat <$> (manyTill inline' $ char '*' + contents <- mconcat <$> (manyTill inline' $ char '*' >> notFollowedBy alphaNum) - return $ (B.spanWith ((makeId contents), [], []) mempty) + return $ (B.spanWith ((makeId contents), [], []) mempty) <> (B.strong contents) -makeId :: Inlines -> String +makeId :: Inlines -> String makeId i = concat (stringify <$> (toList i)) emph :: PandocMonad m => VwParser m Inlines emph = try $ do s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '_' - contents <- mconcat <$> (manyTill inline' $ char '_' + contents <- mconcat <$> (manyTill inline' $ char '_' >> notFollowedBy alphaNum) return $ B.emph contents @@ -532,32 +533,32 @@ superscript = try $ subscript :: PandocMonad m => VwParser m Inlines subscript = try $ - B.subscript <$> mconcat <$> (string ",," + B.subscript <$> mconcat <$> (string ",," >> many1Till inline' (try $ string ",,")) link :: PandocMonad m => VwParser m Inlines -link = try $ do +link = try $ do string "[[" contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + case '|' `elem` contents of False -> do - manyTill anyChar (string "]]") + manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki return $ B.link (procLink contents) "" (B.str contents) - True -> do + True -> do url <- manyTill anyChar $ char '|' lab <- mconcat <$> (manyTill inline $ string "]]") return $ B.link (procLink url) "" lab image :: PandocMonad m => VwParser m Inlines -image = try $ do +image = try $ do string "{{" contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") images $ length $ filter (== '|') contentText images :: PandocMonad m => Int -> VwParser m Inlines images k - | k == 0 = do + | k == 0 = do imgurl <- manyTill anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do @@ -578,15 +579,15 @@ images k procLink' :: String -> String procLink' s - | ((take 6 s) == "local:") = "file" ++ (drop 5 s) + | ((take 6 s) == "local:") = "file" ++ (drop 5 s) | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html" - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", - "news:", "telnet:" ]) + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) = s | s == "" = "" | (last s) == '/' = s | otherwise = s ++ ".html" - + procLink :: String -> String procLink s = procLink' x ++ y where (x, y) = break (=='#') s @@ -606,7 +607,7 @@ tag = try $ do s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space))) guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") let ss = splitBy (==':') s - return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) + return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) todoMark :: PandocMonad m => VwParser m Inlines todoMark = try $ do @@ -623,7 +624,7 @@ endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") endlineML :: PandocMonad m => VwParser m () endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) ---- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks nFBTTBSB :: PandocMonad m => VwParser m () nFBTTBSB = notFollowedBy newline <* @@ -639,7 +640,7 @@ hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)) makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) makeTagSpan :: String -> Inlines @@ -647,7 +648,7 @@ makeTagSpan s = (B.space) <> (makeTagSpan' s) mathTagParser :: PandocMonad m => VwParser m String mathTagParser = do - s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) char '%' >> string s >> char '%' return s diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 7b299c56b..53fd38ffd 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -49,6 +49,7 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, + crFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -279,13 +280,12 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. +-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop -> T.Text -- ^ Input -> T.Text -tabFilter tabStop = T.filter (/= '\r') . T.unlines . - (if tabStop == 0 then id else map go) . T.lines +tabFilter 0 = id +tabFilter tabStop = T.unlines . map go . T.lines where go s = let (s1, s2) = T.break (== '\t') s in if T.null s2 @@ -294,6 +294,10 @@ tabFilter tabStop = T.filter (/= '\r') . T.unlines . (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") <> go (T.drop 1 s2) +-- | Strip out DOS line endings. +crFilter :: T.Text -> T.Text +crFilter = T.filter (/= '\r') + -- -- Date/time -- -- cgit v1.2.3 From 8f8f505fd4db9a4903dc616dc179901d2492c6dd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 22:41:34 +0200 Subject: Text.Pandoc.Error: added PandocTemplateError. --- src/Text/Pandoc/Error.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 3cf381168..0056a1591 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -61,6 +61,7 @@ data PandocError = PandocIOError String IOError | PandocFilterError String String | PandocCouldNotFindDataFileError String | PandocResourceNotFound String + | PandocTemplateError String | PandocAppError String deriving (Show, Typeable, Generic) @@ -101,6 +102,7 @@ handleError (Left e) = "Could not find data file " ++ fn PandocResourceNotFound fn -> err 99 $ "File " ++ fn ++ " not found in resource path" + PandocTemplateError s -> err 5 s PandocAppError s -> err 1 s err :: Int -> String -> IO a -- cgit v1.2.3 From c0a12860253c2ddf67d6e9bcb0d6b67f4be18c18 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 22:41:56 +0200 Subject: Text.Pandoc.Templates: change type of renderTemplate'. Now it runs in PandocMonad and raises a proper PandocTemplateError if there are problems, rather than failing with uncatchable 'error'. --- src/Text/Pandoc/Templates.hs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 9b635a97b..1a26b7168 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -33,20 +33,20 @@ A simple templating system with variable substitution and conditionals. -} -module Text.Pandoc.Templates ( renderTemplate +module Text.Pandoc.Templates ( module Text.DocTemplates , renderTemplate' - , TemplateTarget - , varListToJSON - , compileTemplate - , Template - , getDefaultTemplate ) where + , getDefaultTemplate + ) where import qualified Control.Exception as E (IOException, try) +import Control.Monad.Except (throwError) import Data.Aeson (ToJSON (..)) import qualified Data.Text as T import System.FilePath ((<.>), ()) import Text.DocTemplates (Template, TemplateTarget, applyTemplate, compileTemplate, renderTemplate, varListToJSON) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Error import Text.Pandoc.Shared (readDataFileUTF8) -- | Get default template for the specified writer. @@ -72,7 +72,11 @@ getDefaultTemplate user writer = do _ -> let fname = "templates" "default" <.> format in E.try $ readDataFileUTF8 user fname --- | Like 'applyTemplate', but raising an error if compilation fails. -renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b -renderTemplate' template = either error id . applyTemplate (T.pack template) - +-- | Like 'applyTemplate', but runs in PandocMonad and +-- raises an error if compilation fails. +renderTemplate' :: (PandocMonad m, ToJSON a, TemplateTarget b) + => String -> a -> m b +renderTemplate' template context = do + case applyTemplate (T.pack template) context of + Left e -> throwError (PandocTemplateError e) + Right r -> return r -- cgit v1.2.3 From 21c4281b13bf2d72012139ecc5c16cf7cae21de1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 22:43:06 +0200 Subject: Odt reader: replaced collectRights with rights from Data.Either. --- src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs | 5 ----- src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 4d6a67b8e..8c47cdaf5 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -124,8 +124,3 @@ instance ChoiceVector SuccessList where spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing where unTagRight (Right x) = (x:) unTagRight _ = id - --- | Like 'catMaybes', but for 'Either'. -collectRights :: [Either _l r] -> [r] -collectRights = collectNonFailing . untag . spreadChoice . SuccessList - where untag = fromLeft (error "Unexpected Left") diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 1c3e08a7f..428048427 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -71,6 +71,7 @@ import Control.Applicative hiding ( liftA, liftA2 ) import Control.Monad ( MonadPlus ) import Control.Arrow +import Data.Either ( rights ) import qualified Data.Map as M import Data.Default import Data.Maybe @@ -604,7 +605,7 @@ tryAll :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [a] tryAll nsID name a = prepareIteration nsID name >>> iterateS (switchingTheStack a) - >>^ collectRights + >>^ rights -------------------------------------------------------------------------------- -- Matching children -- cgit v1.2.3 From c349f0b0baef5866041b6668fff5bbb16f0002f9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 22:43:48 +0200 Subject: Writers: adjusted for renderTemplate' changes. Now we raise a proper error on template failure. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 2 +- src/Text/Pandoc/Writers/CommonMark.hs | 6 +++--- src/Text/Pandoc/Writers/ConTeXt.hs | 6 +++--- src/Text/Pandoc/Writers/Custom.hs | 7 +++++-- src/Text/Pandoc/Writers/Docbook.hs | 6 +++--- src/Text/Pandoc/Writers/DokuWiki.hs | 2 +- src/Text/Pandoc/Writers/HTML.hs | 2 +- src/Text/Pandoc/Writers/Haddock.hs | 2 +- src/Text/Pandoc/Writers/ICML.hs | 6 +++--- src/Text/Pandoc/Writers/JATS.hs | 6 +++--- src/Text/Pandoc/Writers/LaTeX.hs | 6 +++--- src/Text/Pandoc/Writers/Man.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 2 +- src/Text/Pandoc/Writers/MediaWiki.hs | 5 ++--- src/Text/Pandoc/Writers/Ms.hs | 5 +++-- src/Text/Pandoc/Writers/Muse.hs | 2 +- src/Text/Pandoc/Writers/OPML.hs | 6 +++--- src/Text/Pandoc/Writers/OpenDocument.hs | 6 +++--- src/Text/Pandoc/Writers/Org.hs | 2 +- src/Text/Pandoc/Writers/RST.hs | 2 +- src/Text/Pandoc/Writers/RTF.hs | 7 ++++--- src/Text/Pandoc/Writers/TEI.hs | 2 +- src/Text/Pandoc/Writers/Texinfo.hs | 2 +- src/Text/Pandoc/Writers/Textile.hs | 2 +- src/Text/Pandoc/Writers/ZimWiki.hs | 2 +- 25 files changed, 51 insertions(+), 47 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index ee977f90b..112f8b657 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -105,7 +105,7 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do $ metadata' case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for AsciiDoc. escapeString :: String -> String diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 93cc0b53a..63249a7ce 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -58,9 +58,9 @@ writeCommonMark opts (Pandoc meta blocks) = do (inlinesToCommonMark opts) meta let context = defField "body" main $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 571c55b19..5a81aa8a0 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -105,9 +105,9 @@ pandocToConTeXt options (Pandoc meta blocks) = do getField "lang" context) $ defField "context-dir" (toContextDir $ getField "dir" context) $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' toContextDir :: Maybe String -> String toContextDir (Just "rtl") = "r2l" diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 1314ef844..363bad99b 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -46,6 +46,7 @@ import Data.Typeable import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua +import Text.Pandoc.Error import Text.Pandoc.Lua.Compat ( loadstring ) import Text.Pandoc.Lua.Util ( addValue ) import Text.Pandoc.Lua.SharedInstances () @@ -141,8 +142,10 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do let body = rendered case writerTemplate opts of Nothing -> return $ pack body - Just tpl -> return $ pack $ - renderTemplate' tpl $ setField "body" body context + Just tpl -> + case applyTemplate (pack tpl) $ setField "body" body context of + Left e -> throw (PandocTemplateError e) + Right r -> return (pack r) docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String docToCustom lua opts (Pandoc (Meta metamap) blocks) = do diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index a0e69ffb4..9db9a0102 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -124,9 +124,9 @@ writeDocbook opts (Pandoc meta blocks) = do MathML -> True _ -> False) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index dc227cfa9..ad8689e8c 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -103,7 +103,7 @@ pandocToDokuWiki opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for DokuWiki. escapeString :: String -> String diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 43c098866..3687ca53b 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -210,7 +210,7 @@ writeHtmlString' st opts d = do lookup "sourcefile" (writerVariables opts) report $ NoTitleElement fallback return $ resetField "pagetitle" fallback context - return $ renderTemplate' tpl $ + renderTemplate' tpl $ defField "body" (renderHtml' body) context' writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 7965ebfae..d1146ca73 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -80,7 +80,7 @@ pandocToHaddock opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return haddock representation of notes. notesToHaddock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index e564f94fe..37df58e65 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -147,9 +147,9 @@ writeICML opts (Pandoc meta blocks) = do $ defField "parStyles" (render' $ parStylesToDoc st) $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Auxilary functions for parStylesToDoc and charStylesToDoc. contains :: String -> (String, (String, String)) -> [(String, String)] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 11f3b0c22..012ff8416 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -128,9 +128,9 @@ docToJATS opts (Pandoc meta blocks) = do MathML -> True _ -> False) $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to JATS. elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 88ff454ce..53a67a27a 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -285,9 +285,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do Just "rtl" -> True _ -> False) $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' -- | Convert Elements to LaTeX elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index d96342fb5..4e756c419 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -110,7 +110,7 @@ pandocToMan opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return man representation of notes. notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 4449bb5ce..8433f648f 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -228,7 +228,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do $ addVariablesToJSON opts metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return markdown representation of reference key table. refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 3825a4e73..58d1b0707 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -82,9 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do let main = body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata - return $ pack - $ case writerTemplate opts of - Nothing -> main + pack <$> case writerTemplate opts of + Nothing -> return main Just tpl -> renderTemplate' tpl context -- | Escape special characters for MediaWiki. diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 0999d13db..493da1545 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -125,7 +125,7 @@ pandocToMs opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Association list of characters to escape. msEscapes :: Map.Map Char String @@ -411,7 +411,8 @@ definitionListItemToMs opts (label, defs) = do let (first, rest) = case blocks of ((Para x):y) -> (Plain x,y) (x:y) -> (x,y) - [] -> error "blocks is null" + [] -> (Plain [], []) + -- should not happen rest' <- liftM vcat $ mapM (\item -> blockToMs opts item) rest first' <- blockToMs opts first diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 286bd1431..3d9e232ae 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -101,7 +101,7 @@ pandocToMuse (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Convert list of Pandoc block elements to Muse. blockListToMuse :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 4a0a317fa..52577ac17 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -60,9 +60,9 @@ writeOPML opts (Pandoc meta blocks) = do meta' main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements) let context = defField "body" main metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context writeHtmlInlines :: PandocMonad m => [Inline] -> m Text diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index fd9a13f3e..95a800c94 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -221,9 +221,9 @@ writeOpenDocument opts (Pandoc meta blocks) = do let context = defField "body" body $ defField "automatic-styles" (render' automaticStyles) $ metadata - return $ case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context + case writerTemplate opts of + Nothing -> return body + Just tpl -> renderTemplate' tpl context withParagraphStyle :: PandocMonad m => WriterOptions -> String -> [Block] -> OD m Doc diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 8524c441d..48f17c4fb 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -86,7 +86,7 @@ pandocToOrg (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return Org representation of notes. notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 9c0693b0f..019c8335d 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -108,7 +108,7 @@ pandocToRST (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context where normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 5c990f324..6666f6549 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -125,10 +125,11 @@ writeRTF options doc = do then defField "toc" toc else id) $ metadata - return $ T.pack - $ case writerTemplate options of + T.pack <$> + case writerTemplate options of Just tpl -> renderTemplate' tpl context - Nothing -> case reverse body of + Nothing -> return $ + case reverse body of ('\n':_) -> body _ -> body ++ "\n" diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 86a7415cf..26070966e 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -85,7 +85,7 @@ writeTEI opts (Pandoc meta blocks) = do $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Convert an Element to TEI. elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index fd489786d..549d4f3d9 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -106,7 +106,7 @@ pandocToTexinfo options (Pandoc meta blocks) = do $ metadata case writerTemplate options of Nothing -> return body - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape things as needed for Texinfo. stringToTexinfo :: String -> String diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 432c055b8..acc9eaa0f 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -75,7 +75,7 @@ pandocToTextile opts (Pandoc meta blocks) = do let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context withUseTags :: PandocMonad m => TW m a -> TW m a withUseTags action = do diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index ba51acfce..ced02d4be 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -78,7 +78,7 @@ pandocToZimWiki opts (Pandoc meta blocks) = do $ defField "toc" (writerTableOfContents opts) $ metadata case writerTemplate opts of - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context Nothing -> return main -- | Escape special characters for ZimWiki. -- cgit v1.2.3 From 6e6324badee219164bad271f3fcd037889962096 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 22:44:09 +0200 Subject: Removed an 'error' bomb. --- src/Text/Pandoc/Readers/Markdown.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 96885c9b1..793ee0996 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -55,6 +55,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad(..), report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) @@ -368,7 +369,9 @@ parseMarkdown = do -- lookup to get sourcepos case M.lookup n (stateNotes' st) of Just (pos, _) -> report (NoteDefinedButNotUsed n pos) - Nothing -> error "The impossible happened.") notesDefined + Nothing -> throwError $ + PandocShouldNeverHappenError "note not found") + notesDefined let doc = runF (do Pandoc _ bs <- B.doc <$> blocks meta <- stateMeta' st return $ Pandoc meta bs) st -- cgit v1.2.3 From 242e2a064f6a32b22e1599bbfe72e64d7b6203b8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 21 Jun 2017 23:54:16 +0200 Subject: Change default EPUB directory structure in OCF container. See #3720. We now put all EPUB related content in an EPUB/ subdirectory by default (later this will be configurable). mimetype META-INF/ com.apple.ibooks.display-options.xml container.xml EPUB/ <<--configurable-->> fonts/ <<--static-->> font.otf media/ <<--static-->> cover.jpg fig1.jpg styles/ <<--static-->> stylesheet.css content.opf toc.ncx text/ <<--static-->> ch001.xhtml --- src/Text/Pandoc/Writers/EPUB.hs | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index bd9a4c800..11ca7d168 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -80,6 +80,7 @@ data Chapter = Chapter (Maybe [Int]) [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + , stEPUBSubdir :: String } type E m = StateT EPUBState m @@ -362,6 +363,7 @@ writeEPUB :: PandocMonad m -> m B.ByteString writeEPUB epubVersion opts doc = let initState = EPUBState { stMediaPaths = [] + , stEPUBSubdir = "EPUB" } in evalStateT (pandocToEPUB epubVersion opts doc) @@ -373,6 +375,7 @@ pandocToEPUB :: PandocMonad m -> Pandoc -> E m B.ByteString pandocToEPUB version opts doc@(Pandoc meta _) = do + epubSubdir <- gets stEPUBSubdir let epub3 = version == EPUB3 let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . writeHtmlStringForEPUB version o @@ -383,10 +386,11 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do -- stylesheet stylesheets <- case epubStylesheets metadata of [] -> (\x -> [B.fromChunks [x]]) <$> - P.readDataFile (writerUserDataDir opts) "epub.css" + P.readDataFile (writerUserDataDir opts) + "epub.css" fs -> mapM P.readFileLazy fs let stylesheetEntries = zipWith - (\bs n -> mkEntry ("stylesheet" ++ show n ++ ".css") bs) + (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) stylesheets [(1 :: Int)..] let vars = ("epub3", if epub3 then "true" else "false") @@ -431,7 +435,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do when (null xs) $ report $ CouldNotFetchResource f "glob did not match any font files" return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f) + let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) <$> + lift (P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -728,7 +733,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") + unode "rootfile" ! [("full-path", + epubSubdir ++ ['/' | not (null epubSubdir)] ++ "content.opf") ,("media-type","application/oebps-package+xml")] $ () let containerEntry = mkEntry "META-INF/container.xml" containerData @@ -739,10 +745,14 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple + let addEpubSubdir :: Entry -> Entry + addEpubSubdir e = e{ eRelativePath = + epubSubdir ++ ['/' | not (null epubSubdir)] ++ eRelativePath e } -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : tpEntry : - contentsEntry : tocEntry : navEntry : + let archive = foldr addEntryToArchive emptyArchive $ + [mimetypeEntry, containerEntry, appleEntry] ++ + map addEpubSubdir + (tpEntry : contentsEntry : tocEntry : navEntry : (stylesheetEntries ++ picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive @@ -878,15 +888,16 @@ modifyMediaRef :: PandocMonad m modifyMediaRef _ "" = return "" modifyMediaRef opts oldsrc = do media <- gets stMediaPaths + epubSubdir <- gets stEPUBSubdir case lookup oldsrc media of Just (n,_) -> return n Nothing -> catchError (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) epochtime <- floor `fmap` lift P.getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + let entry = toEntry new epochtime (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (new, Just entry)):media} return new) @@ -952,7 +963,7 @@ mediaTypeOf x = -- Returns filename for chapter number. showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" +showChapter = printf "text/ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: [Block] -> [Block] -- cgit v1.2.3 From 379b99f63abe534c7666c8e16e0bf2e980b63d1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 22 Jun 2017 11:43:50 +0200 Subject: Added `writerEpubSubdirectory` to `WriterOptions`. [API change] The EPUB writer now takes its EPUB subdirectory from this option. Also added `PandocEpubSubdirectoryError` to `PandocError`. This is raised if the EPUB subdirectory is not all ASCII alphanumerics. See #3720. --- src/Text/Pandoc/Error.hs | 3 +++ src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Writers/EPUB.hs | 16 ++++++++-------- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 0056a1591..60bc699ab 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -63,6 +63,7 @@ data PandocError = PandocIOError String IOError | PandocResourceNotFound String | PandocTemplateError String | PandocAppError String + | PandocEpubSubdirectoryError String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -104,6 +105,8 @@ handleError (Left e) = "File " ++ fn ++ " not found in resource path" PandocTemplateError s -> err 5 s PandocAppError s -> err 1 s + PandocEpubSubdirectoryError s -> err 31 $ + "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c7211c86e..6519f807c 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -213,6 +213,7 @@ data WriterOptions = WriterOptions , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) @@ -249,6 +250,7 @@ instance Default WriterOptions where , writerListings = False , writerHighlightStyle = Just pygments , writerSetextHeaders = True + , writerEpubSubdirectory = "EPUB" , writerEpubMetadata = Nothing , writerEpubFonts = [] , writerEpubChapterLevel = 1 diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 11ca7d168..96c8847df 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -34,14 +34,14 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, fromArchive, fromEntry, toEntry) -import Control.Monad (mplus, when, zipWithM) +import Control.Monad (mplus, when, unless, zipWithM) import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Data.Text.Lazy as TL -import Data.Char (isAlphaNum, isDigit, toLower) +import Data.Char (isAlphaNum, isDigit, toLower, isAscii) import Data.List (intercalate, isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (catMaybes, fromMaybe) @@ -80,7 +80,6 @@ data Chapter = Chapter (Maybe [Int]) [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] - , stEPUBSubdir :: String } type E m = StateT EPUBState m @@ -362,9 +361,7 @@ writeEPUB :: PandocMonad m -> Pandoc -- ^ Document to convert -> m B.ByteString writeEPUB epubVersion opts doc = - let initState = EPUBState { stMediaPaths = [] - , stEPUBSubdir = "EPUB" - } + let initState = EPUBState { stMediaPaths = [] } in evalStateT (pandocToEPUB epubVersion opts doc) initState @@ -375,7 +372,10 @@ pandocToEPUB :: PandocMonad m -> Pandoc -> E m B.ByteString pandocToEPUB version opts doc@(Pandoc meta _) = do - epubSubdir <- gets stEPUBSubdir + let epubSubdir = writerEpubSubdirectory opts + -- sanity check on epubSubdir + unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + throwError $ PandocEpubSubdirectoryError epubSubdir let epub3 = version == EPUB3 let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . writeHtmlStringForEPUB version o @@ -888,7 +888,7 @@ modifyMediaRef :: PandocMonad m modifyMediaRef _ "" = return "" modifyMediaRef opts oldsrc = do media <- gets stMediaPaths - epubSubdir <- gets stEPUBSubdir + let epubSubdir = writerEpubSubdirectory opts case lookup oldsrc media of Just (n,_) -> return n Nothing -> catchError -- cgit v1.2.3 From 24d215acf584a52ad3ea3a9a3f97c751d26e08a4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 22 Jun 2017 12:01:33 +0200 Subject: Added `--epub-subdirectory` option. This specifies the subdirectory in the OCF container that holds the EPUB specific content. Closes #3720. --- MANUAL.txt | 6 ++++++ src/Text/Pandoc/App.hs | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index d200c2f0c..a75c6fd2a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -977,6 +977,12 @@ Options affecting specific writers documents with few level 1 headers, one might want to use a chapter level of 2 or 3. +`--epub-subdirectory=`*DIRNAME* + +: Specify the subdirectory in the OCF container that is to hold + the EPUB-specific contents. The default is `EPUB`. To put + the EPUB contents in the top level, use an empty string. + `--latex-engine=pdflatex`|`lualatex`|`xelatex` : Use the specified LaTeX engine when producing PDF output. diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 7e9cfdd95..3c259fce7 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -355,6 +355,7 @@ convertWithOpts opts = do writerSlideLevel = optSlideLevel opts, writerHighlightStyle = highlightStyle, writerSetextHeaders = optSetextHeaders opts, + writerEpubSubdirectory = optEpubSubdirectory opts, writerEpubMetadata = epubMetadata, writerEpubFonts = optEpubFonts opts, writerEpubChapterLevel = optEpubChapterLevel opts, @@ -553,6 +554,7 @@ data Opt = Opt , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc + , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters @@ -628,6 +630,7 @@ defaultOpts = Opt , optHTMLMathMethod = PlainMath , optAbbreviations = Nothing , optReferenceDoc = Nothing + , optEpubSubdirectory = "EPUB" , optEpubMetadata = Nothing , optEpubFonts = [] , optEpubChapterLevel = 1 @@ -1243,6 +1246,13 @@ options = "FILE") "" -- "Path of custom reference doc" + , Option "" ["epub-subdirectory"] + (ReqArg + (\arg opt -> + return opt { optEpubSubdirectory = arg }) + "DIRNAME") + "" -- "Name of subdirectory for epub content in OCF container" + , Option "" ["epub-cover-image"] (ReqArg (\arg opt -> -- cgit v1.2.3 From 2b3e8cb718d527640c237486d84cefa741221035 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 22 Jun 2017 12:38:08 +0200 Subject: EPUB writer: Fixed various things with new EPUB structure. --- src/Text/Pandoc/Writers/EPUB.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 96c8847df..ab9f873c8 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -394,7 +394,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do stylesheets [(1 :: Int)..] let vars = ("epub3", if epub3 then "true" else "false") - : map (\e -> ("css", eRelativePath e)) stylesheetEntries + : map (\e -> ("css", "../" ++ eRelativePath e)) stylesheetEntries ++ [(x,y) | (x,y) <- writerVariables opts, x /= "css"] let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True @@ -521,7 +521,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do chapters' let chapToEntry num (Chapter mbnum bs) = - mkEntry (showChapter num) <$> + mkEntry ("text/" ++ showChapter num) <$> (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> @@ -888,7 +888,6 @@ modifyMediaRef :: PandocMonad m modifyMediaRef _ "" = return "" modifyMediaRef opts oldsrc = do media <- gets stMediaPaths - let epubSubdir = writerEpubSubdirectory opts case lookup oldsrc media of Just (n,_) -> return n Nothing -> catchError @@ -924,12 +923,13 @@ transformInline :: PandocMonad m -> E m Inline transformInline opts (Image attr lab (src,tit)) = do newsrc <- modifyMediaRef opts src - return $ Image attr lab (newsrc, tit) + return $ Image attr lab ("../" ++ newsrc, tit) transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do newsrc <- modifyMediaRef opts (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] + return $ Span ("",["math",mathclass],[]) + [Image nullAttr [x] ("../" ++ newsrc, "")] transformInline opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw @@ -963,7 +963,7 @@ mediaTypeOf x = -- Returns filename for chapter number. showChapter :: Int -> String -showChapter = printf "text/ch%03d.xhtml" +showChapter = printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: [Block] -> [Block] -- cgit v1.2.3 From 4a6868885d961b0df782c19f70dd725148446633 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 22 Jun 2017 12:42:21 +0200 Subject: EPUB writer: put title_page.xhtml in text/. --- src/Text/Pandoc/Writers/EPUB.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ab9f873c8..d20eb8a2f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -422,7 +422,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do tpContent <- lift $ writeHtml opts'{ writerVariables = ("titlepage","true"):vars } (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent + let tpEntry = mkEntry "text/title_page.xhtml" tpContent -- handle pictures -- mediaRef <- P.newIORef [] @@ -658,7 +658,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] + , unode "content" ! [("src","text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ -- cgit v1.2.3 From 2b34337a9cf8b025914e8219498b4c0258772be0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 22 Jun 2017 23:38:42 +0200 Subject: Text.Pandoc.Extensions: Added `Ext_raw_attribute`. Documented in MANUAL.txt. This is enabled by default in pandoc markdown and multimarkdown. --- MANUAL.txt | 27 +++++++++++++++++++++--- src/Text/Pandoc/Extensions.hs | 4 ++++ src/Text/Pandoc/Readers/Markdown.hs | 42 +++++++++++++++++++++++++++++-------- test/command/3537.md | 28 +++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 12 deletions(-) create mode 100644 test/command/3537.md (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index a75c6fd2a..a4bc7a410 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3033,9 +3033,6 @@ For the most part this should give the same output as `raw_html`, but it makes it easier to write pandoc filters to manipulate groups of inlines. -Raw TeX -------- - #### Extension: `raw_tex` #### In addition to raw HTML, pandoc allows raw LaTeX, TeX, and ConTeXt to be @@ -3060,6 +3057,30 @@ LaTeX, not as Markdown. Inline LaTeX is ignored in output formats other than Markdown, LaTeX, Emacs Org mode, and ConTeXt. +### Generic raw attribute ### + +#### Extension: `raw_attribute` #### + +Inline spans and fenced code blocks with a special +kind of attribute will be parsed as raw content with the +designated format. For example, the following produces a raw +groff `ms` block: + + ```{=ms} + .MYMACRO + blah blah + ``` +And the following produces a raw `html` inline element: + + This is `html`{=html} + +This extension presupposes that the relevant kind of +inline code or fenced code block is enabled. Thus, for +example, to use a raw attribute with a backtick code block, +`backtick_code_blocks` must be enabled. + +The raw attribute cannot be combined with regular attributes. + LaTeX macros ------------ diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 58e8c414d..398944d47 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -94,6 +94,7 @@ data Extension = | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of
tags | Ext_native_spans -- ^ Use Span inlines for contents of @@ -162,6 +163,7 @@ pandocExtensions = extensionsFromList , Ext_fenced_code_attributes , Ext_backtick_code_blocks , Ext_inline_code_attributes + , Ext_raw_attribute , Ext_markdown_in_html_blocks , Ext_native_divs , Ext_native_spans @@ -275,6 +277,8 @@ multimarkdownExtensions = extensionsFromList , Ext_subscript , Ext_backtick_code_blocks , Ext_spaced_reference_links + -- So far only in dev version of mmd: + , Ext_raw_attribute ] -- | Language extensions to be used with strict markdown. diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 793ee0996..b91efcd8c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -681,19 +681,36 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -1516,13 +1533,20 @@ code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) diff --git a/test/command/3537.md b/test/command/3537.md new file mode 100644 index 000000000..df4eeba7d --- /dev/null +++ b/test/command/3537.md @@ -0,0 +1,28 @@ +Generalized raw attributes. + +```` +% pandoc -t native +```{=ms} +.MACRO +foo bar +``` +^D +[RawBlock (Format "ms") ".MACRO\nfoo bar"] +```` + +```` +% pandoc -t native +Hi `there`{=ms}. +^D +[Para [Str "Hi",Space,RawInline (Format "ms") "there",Str "."]] +```` + +```` +% pandoc -t native +~~~ {=ms} +.MACRO +foo bar +~~~ +^D +[RawBlock (Format "ms") ".MACRO\nfoo bar"] +```` -- cgit v1.2.3 From da7d9ef295de3d51db97c4ff57104ae7d6e57e86 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Jun 2017 11:51:26 +0200 Subject: HTML writer: make sure html4, html5 formats work for raw blocks/inlines. --- src/Text/Pandoc/Writers/HTML.hs | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3687ca53b..45c878781 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -603,7 +603,8 @@ blockToHtml opts (Para lst) contents <- inlineListToHtml opts lst return $ H.p contents where - isEmptyRaw [RawInline f _] = f /= (Format "html") + isEmptyRaw [RawInline f _] = f `notElem` [Format "html", + Format "html4", Format "html5"] isEmptyRaw _ = False blockToHtml opts (LineBlock lns) = if writerWrapText opts == WrapNone @@ -632,14 +633,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts (ident, classes', kvs) $ divtag $ contents' -blockToHtml opts (RawBlock f str) - | f == Format "html" = return $ preEscapedString str - | (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] - | otherwise = do - report $ BlockNotRendered (RawBlock f str) - return mempty +blockToHtml opts (RawBlock f str) = do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then blockToHtml opts $ Plain [Math DisplayMath str] + else do + report $ BlockNotRendered (RawBlock f str) + return mempty blockToHtml _ (HorizontalRule) = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr @@ -977,11 +981,13 @@ inlineToHtml opts inline = do return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag - (RawInline f str) - | f == Format "html" -> return $ preEscapedString str - | otherwise -> do - report $ InlineNotRendered inline - return mempty + (RawInline f str) -> do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else do + report $ InlineNotRendered inline + return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt lift $ obfuscateLink opts attr linkText s @@ -1129,3 +1135,9 @@ allowsMathEnvironments (MathJax _) = True allowsMathEnvironments (MathML) = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False + +isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool +isRawHtml f = do + html5 <- gets stHtml5 + return $ f == Format "html" || + ((html5 && f == Format "html5") || f == Format "html4") -- cgit v1.2.3 From 57cc9a391c18977f229d7a5e15d0e9bcb861b684 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Jun 2017 11:51:44 +0200 Subject: Markdown writer: make sure `plain`, `markdown_github`, etc. work for raw. Previously only `markdown` worked. Note: currently a raw block labeled `markdown_github` will be printed for any `markdown` format. --- src/Text/Pandoc/Writers/Markdown.hs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 8433f648f..6c7e662bf 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -433,8 +433,10 @@ blockToMarkdown' opts (LineBlock lns) = return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns blockToMarkdown' opts b@(RawBlock f str) - | f == "markdown" = return $ text str <> text "\n" - | f == "html" && isEnabled Ext_raw_html opts = do + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + = return $ text str <> text "\n" + | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do plain <- asks envPlain return $ if plain then empty @@ -1053,10 +1055,12 @@ inlineToMarkdown opts (Math DisplayMath str) = (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts il@(RawInline f str) = do plain <- asks envPlain - if not plain && - ( f == "markdown" || + if (plain && f == "plain") || (not plain && + ( f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || - (isEnabled Ext_raw_html opts && f == "html") ) + (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"]) + )) then return $ text str else do report $ InlineNotRendered il -- cgit v1.2.3 From 5812ac03902169e5ce8593c26fb2d8cffafbd828 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Jun 2017 22:31:08 +0200 Subject: Markdown reader: interpret YAML metadata as Inlines when possible. If the metadata field is all on one line, we try to interpret it as Inlines, and only try parsing as Blocks if that fails. If it extends over one line (including possibly the `|` or `>` character signaling an indented block), then we parse as Blocks. This was motivated by some German users finding that date: '22. Juin 2017' got parsed as an ordered list. Closes #3755. --- src/Text/Pandoc/Readers/Markdown.hs | 25 +++++++++++++------------ test/command/3755.md | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 test/command/3755.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b91efcd8c..b3b275674 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -292,18 +292,19 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) - where - toMeta p = do - p' <- p - return $ - case B.toList p' of - [Plain xs] -> MetaInlines xs - [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x yamlToMeta :: PandocMonad m => Yaml.Value -> MarkdownParser m (F MetaValue) diff --git a/test/command/3755.md b/test/command/3755.md new file mode 100644 index 000000000..5e1ffc921 --- /dev/null +++ b/test/command/3755.md @@ -0,0 +1,23 @@ +``` +% pandoc -t native -s +--- +title: 'Titel' +date: '22. Juni 2017' +--- +^D +Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "22.",Space,Str "Juni",Space,Str "2017"]),("title",MetaInlines [Str "Titel"])]}) +[] +``` + +``` +% pandoc -t native -s +--- +title: '
foo
' +date: | + 22. Juni 2017 +--- +^D +Pandoc (Meta {unMeta = fromList [("date",MetaBlocks [OrderedList (22,Decimal,Period) [[Plain [Str "Juni",Space,Str "2017"]]]]),("title",MetaBlocks [Div ("",[],[]) [Plain [Str "foo"]]])]}) +[] +``` + -- cgit v1.2.3 From a20302d9cfc3651a217bd76e9f9cce3e285433d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 23 Jun 2017 22:36:43 +0200 Subject: Added comment in source. --- src/Text/Pandoc/Readers/Markdown.hs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b3b275674..31b51f237 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -305,6 +305,9 @@ toMetaValue x = p' <- p return $ MetaInlines (B.toList p') endsWithNewline = T.pack "\n" `T.isSuffixOf` x + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. yamlToMeta :: PandocMonad m => Yaml.Value -> MarkdownParser m (F MetaValue) -- cgit v1.2.3 From a9259c1501cdfd0d0b5c0f95f36497da82befb50 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Jun 2017 13:20:30 +0200 Subject: Extensions: Monoid instance for Extensions. [API change] --- src/Text/Pandoc/Extensions.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 398944d47..79e3529e9 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -45,7 +45,7 @@ module Text.Pandoc.Extensions ( Extension(..) , githubMarkdownExtensions , multimarkdownExtensions ) where -import Data.Bits (clearBit, setBit, testBit) +import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -55,6 +55,10 @@ import Text.Parsec newtype Extensions = Extensions Integer deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) +instance Monoid Extensions where + mempty = Extensions 0 + mappend (Extensions a) (Extensions b) = Extensions (a .|. b) + extensionsFromList :: [Extension] -> Extensions extensionsFromList = foldr enableExtension emptyExtensions -- cgit v1.2.3 From 743419af5c0872d8e4b5fdf53d47080e8648b4a7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Jun 2017 13:47:10 +0200 Subject: Readers.getReader, Writers.getWriter API change. Now these functions return a pair of a reader/writer and an Extensions, instead of building the extensions into the reader/writer. The calling code must explicitly set readerExtensions or writerExtensions using the Extensions returned. The point of the change is to make it possible for the calling code to determine what extensions are being used. See #3659. --- src/Text/Pandoc/App.hs | 14 +++++++++----- src/Text/Pandoc/Lua/PandocModule.hs | 5 +++-- src/Text/Pandoc/Readers.hs | 12 ++++-------- src/Text/Pandoc/Writers.hs | 11 ++++------- trypandoc/trypandoc.hs | 10 ++++++---- 5 files changed, 26 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 3c259fce7..ee74d39c0 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -182,11 +182,12 @@ convertWithOpts opts = do let msOutput = format == "ms" -- disabling the custom writer for now - writer <- if ".lua" `isSuffixOf` format + (writer, writerExts) <- + if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName then return (TextWriter (\o d -> liftIO $ writeCustom writerName o d) - :: Writer PandocIO) + :: Writer PandocIO, mempty) else case getWriter writerName of Left e -> E.throwIO $ PandocAppError $ if format == "pdf" @@ -196,12 +197,13 @@ convertWithOpts opts = do "\nand specify an output file with " ++ ".pdf extension (-o filename.pdf)." else e - Right w -> return (w :: Writer PandocIO) + Right (w, es) -> return (w :: Writer PandocIO, es) -- TODO: we have to get the input and the output into the state for -- the sake of the text2tags reader. - reader <- case getReader readerName of - Right r -> return (r :: Reader PandocIO) + (reader, readerExts) <- + case getReader readerName of + Right (r, es) -> return (r :: Reader PandocIO, es) Left e -> E.throwIO $ PandocAppError e' where e' = case readerName of "pdf" -> e ++ @@ -310,6 +312,7 @@ convertWithOpts opts = do optDefaultImageExtension opts , readerTrackChanges = optTrackChanges opts , readerAbbreviations = abbrevs + , readerExtensions = readerExts } highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts @@ -340,6 +343,7 @@ convertWithOpts opts = do writerNumberSections = optNumberSections opts, writerNumberOffset = optNumberOffset opts, writerSectionDivs = optSectionDivs opts, + writerExtensions = writerExts, writerReferenceLinks = optReferenceLinks opts, writerReferenceLocation = optReferenceLocation opts, writerDpi = optDpi opts, diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs index 27c19d4f0..fccfbebf3 100644 --- a/src/Text/Pandoc/Lua/PandocModule.hs +++ b/src/Text/Pandoc/Lua/PandocModule.hs @@ -34,6 +34,7 @@ import Data.Text (pack) import Scripting.Lua (LuaState, call, push, pushhsfunction, rawset) import Text.Pandoc.Class hiding (readDataFile) import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Options (ReaderOptions(readerExtensions)) import Text.Pandoc.Lua.Compat (loadstring) import Text.Pandoc.Lua.StackInstances () import Text.Pandoc.Readers (Reader (..), getReader) @@ -57,10 +58,10 @@ read_doc :: String -> String -> IO (Either String Pandoc) read_doc formatSpec content = do case getReader formatSpec of Left s -> return $ Left s - Right reader -> + Right (reader, es) -> case reader of TextReader r -> do - res <- runIO $ r def (pack content) + res <- runIO $ r def{ readerExtensions = es } (pack content) case res of Left s -> return . Left $ show s Right pd -> return $ Right pd diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 20e503a7e..0374d27d5 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -133,20 +133,16 @@ readers = [ ("native" , TextReader readNative) ,("muse" , TextReader readMuse) ] --- | Retrieve reader based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> Either String (Reader m) +-- | Retrieve reader, extensions based on formatSpec (format+extensions). +getReader :: PandocMonad m => String -> Either String (Reader m, Extensions) getReader s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName - Just (TextReader r) -> Right $ TextReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } + Just r -> Right (r, setExts $ + getDefaultExtensions readerName) -- | Read pandoc document from JSON format. readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index dbe55449f..6dfc1a7b3 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -176,19 +176,16 @@ writers = [ ,("muse" , TextWriter writeMuse) ] -getWriter :: PandocMonad m => String -> Either String (Writer m) +-- | Retrieve writer, extensions based on formatSpec (format+extensions). +getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions) getWriter s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] Right (writerName, setExts) -> case lookup writerName writers of Nothing -> Left $ "Unknown writer: " ++ writerName - Just (TextWriter r) -> Right $ TextWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (ByteStringWriter r) -> Right $ ByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } + Just r -> Right (r, setExts $ + getDefaultExtensions writerName) writeJSON :: WriterOptions -> Pandoc -> Text writeJSON _ = UTF8.toText . BL.toStrict . encode diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs index b8b821883..5a4828877 100644 --- a/trypandoc/trypandoc.hs +++ b/trypandoc/trypandoc.hs @@ -3,15 +3,15 @@ module Main where import Network.Wai.Handler.CGI import Network.Wai import Control.Applicative ((<$>)) -import Data.Maybe (mapMaybe, fromMaybe) +import Data.Maybe (fromMaybe) import Network.HTTP.Types.Status (status200) import Network.HTTP.Types.Header (hContentType) import Network.HTTP.Types.URI (queryToQueryText) import Text.Pandoc +import Text.Pandoc.Writers.Math (defaultMathJaxURL) import Text.Pandoc.Highlighting (pygments) import Text.Pandoc.Readers (getReader, Reader(..)) import Text.Pandoc.Writers (getWriter, Writer(..)) -import Text.Pandoc.Error (PandocError) import Text.Pandoc.Shared (tabFilter) import Data.Aeson import qualified Data.Text as T @@ -29,11 +29,13 @@ app req respond = do fromFormat <- fromMaybe "" <$> getParam "from" toFormat <- fromMaybe "" <$> getParam "to" let reader = case getReader (T.unpack fromFormat) of - Right (TextReader r) -> r readerOpts + Right (TextReader r, es) -> r readerOpts{ + readerExtensions = es } _ -> error $ "could not find reader for " ++ T.unpack fromFormat let writer = case getWriter (T.unpack toFormat) of - Right (TextWriter w) -> w writerOpts + Right (TextWriter w, es) -> w writerOpts{ + writerExtensions = es } _ -> error $ "could not find writer for " ++ T.unpack toFormat let result = case runPure $ reader (tabFilter 4 text) >>= writer of -- cgit v1.2.3 From f8877516e03ec678aeb735cdafe56e20ca52b235 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Sun, 25 Jun 2017 11:01:43 +0300 Subject: Muse reader: Require space before and after '=' for code (#3758) --- src/Text/Pandoc/Readers/Muse.hs | 13 ++++++++++--- test/Tests/Readers/Muse.hs | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 7eee064a7..fe8a55f5c 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -442,8 +442,7 @@ tableParseCaption = try $ do -- inline :: PandocMonad m => MuseParser m (F Inlines) -inline = choice [ whitespace - , br +inline = choice [ br , footnote , strong , strongTag @@ -455,6 +454,7 @@ inline = choice [ whitespace , link , code , codeTag + , whitespace , str , symbol ] "inline" @@ -535,7 +535,14 @@ strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) strikeoutTag = inlineTag B.strikeout "del" code :: PandocMonad m => MuseParser m (F Inlines) -code = return . B.code <$> verbatimBetween '=' +code = try $ do + pos <- getPosition + sp <- if sourceColumn pos == 1 + then pure mempty + else skipMany1 spaceChar >> pure B.space + cd <- verbatimBetween '=' + notFollowedBy nonspaceChar + return $ return (sp B.<> B.code cd) codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = do diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index cac69dffa..bae389584 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -58,6 +58,8 @@ tests = , "Code" =: "=foo(bar)=" =?> para (code "foo(bar)") + , "Not code" =: "a=b= =c=d" =?> para (text "a=b= =c=d") + , "Code tag" =: "foo(bar)" =?> para (code "foo(bar)") , testGroup "Links" @@ -79,6 +81,12 @@ tests = , "Image link with description" =: "[[URL:image.jpg][Image]]" =?> para (link "image.jpg" "" (text "Image")) + -- Implicit links are supported in Emacs Muse, but not in Amusewiki: + -- https://github.com/melmothx/text-amuse/issues/18 + -- + -- This test also makes sure '=' without whitespace is not treated as code markup + , "No implicit links" =: "http://example.org/index.php?action=view&id=1" + =?> para "http://example.org/index.php?action=view&id=1" ] ] -- cgit v1.2.3 From 87e6235fe73fef3fd6bc6e374c9372b2254d2764 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 10:37:49 +0200 Subject: Text.Pandoc.Writers.Shared: added getLang. --- src/Text/Pandoc/Writers/Shared.hs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 2047285eb..6f7c9f75c 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -29,7 +29,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( - metaToJSON + getLang + , metaToJSON , metaToJSON' , addVariablesToJSON , getField @@ -42,7 +43,7 @@ module Text.Pandoc.Writers.Shared ( , gridTable ) where -import Control.Monad (liftM, zipWithM) +import Control.Monad (liftM, zipWithM, mplus) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) import qualified Data.HashMap.Strict as H @@ -57,6 +58,16 @@ import Text.Pandoc.Pretty import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe String +getLang opts meta = + lookup "lang" (writerVariables opts) + `mplus` + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- Variables overwrite metadata fields with the same names. -- cgit v1.2.3 From 0c993a6c7b73bfd10c795f45abfed32772790999 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 12:45:25 +0200 Subject: Text.Pandoc.Writers.Shared: export splitLang. --- src/Text/Pandoc/Writers/Shared.hs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 6f7c9f75c..0b35d27f6 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -30,6 +30,7 @@ Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( getLang + , splitLang , metaToJSON , metaToJSON' , addVariablesToJSON @@ -53,8 +54,11 @@ import Data.Maybe (isJust) import qualified Data.Text as T import qualified Data.Traversable as Traversable import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty +import Text.Pandoc.Shared (splitBy) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) @@ -68,6 +72,21 @@ getLang opts meta = Just (MetaString s) -> Just s _ -> Nothing +-- | Split `lang` field into lang and country, issuing warning +-- if it doesn't look valid. +splitLang :: PandocMonad m => String -> m (Maybe String, Maybe String) +splitLang lang = + case splitBy (== '-') lang of + [la,co] + | length la == 2 && length co == 2 + -> return (Just la, Just co) + [la] + | length la == 2 + -> return (Just la, Nothing) + _ -> do + report $ InvalidLang lang + return (Nothing, Nothing) + -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- Variables overwrite metadata fields with the same names. -- cgit v1.2.3 From a02f08c9fc608727da0ac3b65b39f627e8bb2033 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 12:45:42 +0200 Subject: Added InvalidLang to LogMessage. --- src/Text/Pandoc/Logging.hs | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 4090243ea..ad0fcdd2d 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -90,6 +90,7 @@ data LogMessage = | Extracting String | NoTitleElement String | NoLangSpecified + | InvalidLang String | CouldNotHighlight String | MissingCharacter String deriving (Show, Eq, Data, Ord, Typeable, Generic) @@ -178,6 +179,8 @@ instance ToJSON LogMessage where NoTitleElement fallback -> ["fallback" .= Text.pack fallback] NoLangSpecified -> [] + InvalidLang s -> + ["lang" .= Text.pack s] CouldNotHighlight msg -> ["message" .= Text.pack msg] MissingCharacter msg -> @@ -254,6 +257,9 @@ showLogMessage msg = NoLangSpecified -> "No value for 'lang' was specified in the metadata.\n" ++ "It is recommended that lang be specified for this format." + InvalidLang s -> + "Invalid 'lang' value '" ++ s ++ "'.\n" ++ + "Use ISO 8601 format like 'en-US'." CouldNotHighlight m -> "Could not highlight code block:\n" ++ m MissingCharacter m -> @@ -285,5 +291,6 @@ messageVerbosity msg = Extracting{} -> INFO NoTitleElement{} -> WARNING NoLangSpecified -> INFO + InvalidLang{} -> WARNING CouldNotHighlight{} -> WARNING MissingCharacter{} -> WARNING -- cgit v1.2.3 From 083a224d1e3d791c459a998d18aa9975b8816c15 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 10:38:11 +0200 Subject: Support `lang` attribute in OpenDocument and ODT writers. This adds the required attributes to the temporary styles, and also replaces existing language attributes in styles.xml. Support for lang attributes on Div and Span has also been added. Closes #1667. --- src/Text/Pandoc/Writers/Docx.hs | 7 ++--- src/Text/Pandoc/Writers/ODT.hs | 48 ++++++++++++++++++++++++++++----- src/Text/Pandoc/Writers/OpenDocument.hs | 35 +++++++++++++++++++----- 3 files changed, 72 insertions(+), 18 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index b488f2479..d93b99486 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,7 +67,7 @@ import Text.Pandoc.Shared hiding (Element) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math -import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML @@ -257,10 +257,7 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles - let lang = case lookupMeta "lang" meta of - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing + let lang = getLang opts meta let addLang :: Element -> Element addLang e = case lang >>= \l -> (return . XMLC.toTree . go l . XMLC.fromElement) e of Just (Elem e') -> e' diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index c9a7de642..dff4f8fcf 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -33,6 +33,7 @@ import Codec.Archive.Zip import Control.Monad.Except (catchError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import qualified Data.Text.Lazy as TL @@ -46,13 +47,13 @@ import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty import Text.Pandoc.Shared (stringify) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) -import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, splitLang) import Text.Pandoc.XML import Text.TeXMath -import Text.XML.Light.Output +import Text.XML.Light data ODTState = ODTState { stEntries :: [Entry] } @@ -78,6 +79,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta + let lang = getLang opts meta refArchive <- case writerReferenceDoc opts of Just f -> liftM toArchive $ lift $ P.readFileLazy f @@ -132,18 +134,50 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] - $ ( inTagsSimple "office:meta" - $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title)) - ) + $ ( inTagsSimple "office:meta" $ + ( inTagsSimple "dc:title" + (text $ escapeStringForXML (stringify title)) + $$ + case lang of + Just l -> inTagsSimple "dc:language" + (text (escapeStringForXML l)) + Nothing -> empty + ) ) ) -- make sure mimetype is first let mimetypeEntry = toEntry "mimetype" epochtime $ fromStringLazy "application/vnd.oasis.opendocument.text" - let archive'' = addEntryToArchive mimetypeEntry + archive'' <- updateStyleWithLang lang + $ addEntryToArchive mimetypeEntry $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' +updateStyleWithLang :: PandocMonad m => Maybe String -> Archive -> O m Archive +updateStyleWithLang Nothing arch = return arch +updateStyleWithLang (Just l) arch = do + (mblang, mbcountry) <- splitLang l + epochtime <- floor `fmap` (lift P.getPOSIXTime) + return arch{ zEntries = [if eRelativePath e == "styles.xml" + then case parseXMLDoc + (toStringLazy (fromEntry e)) of + Nothing -> e + Just d -> + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang mblang mbcountry $ d ) + else e + | e <- zEntries arch] } + +addLang :: Maybe String -> Maybe String -> Element -> Element +addLang mblang mbcountry = everywhere' (mkT updateLangAttr) + where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) l) + = Attr n (maybe l id mblang) + updateLangAttr (Attr n@(QName "country" _ (Just "fo")) c) + = Attr n (maybe c id mbcountry) + updateLangAttr x = x + -- | transform both Image and Math elements transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 95a800c94..a4c9e0ef2 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Shared (linesToPara) +import Text.Pandoc.Shared (linesToPara, splitBy) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared @@ -75,6 +75,8 @@ data WriterState = , stTight :: Bool , stFirstPara :: Bool , stImageId :: Int + , stLang :: Maybe String + , stCountry :: Maybe String } defaultWriterState :: WriterState @@ -90,6 +92,8 @@ defaultWriterState = , stTight = False , stFirstPara = False , stImageId = 1 + , stLang = Nothing + , stCountry = Nothing } when :: Bool -> Doc -> Doc @@ -155,6 +159,10 @@ withTextStyle s f = do inTextStyle :: PandocMonad m => Doc -> OD m Doc inTextStyle d = do at <- gets stTextStyleAttr + mblang <- gets stLang + mbcountry <- gets stCountry + let langat = maybe [] (\la -> [("fo:language", la)]) mblang + let countryat = maybe [] (\co -> [("fo:country", co)]) mbcountry if Set.null at then return d else do @@ -168,8 +176,9 @@ inTextStyle d = do inTags False "style:style" [("style:name", styleName) ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (concatMap textStyleAttr (Set.toList at))) + $ selfClosingTag "style:text-properties" + (langat ++ countryat ++ + concatMap textStyleAttr (Set.toList at))) return $ inTags False "text:span" [("text:style-name",styleName)] d @@ -203,8 +212,10 @@ writeOpenDocument opts (Pandoc meta blocks) = do else Nothing let render' :: Doc -> Text render' = render colwidth + let lang = getLang opts meta + (mblang, mbcountry) <- maybe (return (Nothing, Nothing)) splitLang lang ((body, metadata),s) <- flip runStateT - defaultWriterState $ do + defaultWriterState{ stLang = mblang, stCountry = mbcountry } $ do m <- metaToJSON opts (fmap render' . blocksToOpenDocument opts) (fmap render' . inlinesToOpenDocument opts) @@ -326,7 +337,8 @@ blockToOpenDocument o bs then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div _ xs <- bs = blocksToOpenDocument o xs + | Div attr xs <- bs = withLangFromAttr attr + (blocksToOpenDocument o xs) | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -444,7 +456,7 @@ inlineToOpenDocument o ils | writerWrapText o == WrapPreserve -> return $ preformatted "\n" | otherwise -> return $ space - Span _ xs -> inlinesToOpenDocument o xs + Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs) LineBreak -> return $ selfClosingTag "text:line-break" [] Str s -> return $ handleSpaces $ escapeStringForXML s Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l @@ -625,3 +637,14 @@ textStyleAttr s ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] | otherwise = [] + +withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a +withLangFromAttr (_,_,kvs) action = do + oldlang <- gets stLang + case lookup "lang" kvs of + Nothing -> action + Just l -> do + modify (\st -> st{ stLang = Just l}) + result <- action + modify (\st -> st{ stLang = oldlang}) + return result -- cgit v1.2.3 From 3ae4105d143dbec44afa713f6c3fa28f7a8c1d1f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 10:38:11 +0200 Subject: Fixed support for `lang` attribute in OpenDocument and ODT writers. This improves on the last commit, which didn't work in some important ways. See #1667. --- src/Text/Pandoc/Writers/OpenDocument.hs | 35 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index a4c9e0ef2..3a720acdc 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -36,6 +36,7 @@ import Control.Arrow ((***), (>>>)) import Control.Monad.State.Strict hiding (when) import Data.Char (chr) import Data.List (sortBy) +import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Map as Map import Data.Ord (comparing) @@ -45,7 +46,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Shared (linesToPara, splitBy) +import Text.Pandoc.Shared (linesToPara) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared @@ -75,8 +76,6 @@ data WriterState = , stTight :: Bool , stFirstPara :: Bool , stImageId :: Int - , stLang :: Maybe String - , stCountry :: Maybe String } defaultWriterState :: WriterState @@ -92,8 +91,6 @@ defaultWriterState = , stTight = False , stFirstPara = False , stImageId = 1 - , stLang = Nothing - , stCountry = Nothing } when :: Bool -> Doc -> Doc @@ -159,10 +156,6 @@ withTextStyle s f = do inTextStyle :: PandocMonad m => Doc -> OD m Doc inTextStyle d = do at <- gets stTextStyleAttr - mblang <- gets stLang - mbcountry <- gets stCountry - let langat = maybe [] (\la -> [("fo:language", la)]) mblang - let countryat = maybe [] (\co -> [("fo:country", co)]) mbcountry if Set.null at then return d else do @@ -177,8 +170,7 @@ inTextStyle d = do [("style:name", styleName) ,("style:family", "text")] $ selfClosingTag "style:text-properties" - (langat ++ countryat ++ - concatMap textStyleAttr (Set.toList at))) + (concatMap textStyleAttr (Set.toList at))) return $ inTags False "text:span" [("text:style-name",styleName)] d @@ -212,10 +204,8 @@ writeOpenDocument opts (Pandoc meta blocks) = do else Nothing let render' :: Doc -> Text render' = render colwidth - let lang = getLang opts meta - (mblang, mbcountry) <- maybe (return (Nothing, Nothing)) splitLang lang ((body, metadata),s) <- flip runStateT - defaultWriterState{ stLang = mblang, stCountry = mbcountry } $ do + defaultWriterState $ do m <- metaToJSON opts (fmap render' . blocksToOpenDocument opts) (fmap render' . inlinesToOpenDocument opts) @@ -619,6 +609,7 @@ paraTableStyles t s (a:xs) , ("style:justify-single-word", "false")] data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre + | Lang String String deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -636,15 +627,19 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] + | Lang lang country <- s + = [("fo:language" ,lang) + ,("fo:country" ,country)] | otherwise = [] withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a -withLangFromAttr (_,_,kvs) action = do - oldlang <- gets stLang +withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> do - modify (\st -> st{ stLang = Just l}) - result <- action - modify (\st -> st{ stLang = oldlang}) - return result + (mblang, mbcountry) <- splitLang l + case (mblang, mbcountry) of + (Just lang, _) -> withTextStyle + (Lang lang (fromMaybe "" mbcountry)) + action + _ -> action -- cgit v1.2.3 From e7cd3cb4668b119b61eb69eed857b0254a614ad9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 15:36:30 +0200 Subject: Writers.Shared: refactored getLang, splitLang... into `Lang(..)`, `getLang`, `parceBCP47`. --- src/Text/Pandoc/Writers/Docx.hs | 8 +++++--- src/Text/Pandoc/Writers/ODT.hs | 26 ++++++++++++------------ src/Text/Pandoc/Writers/OpenDocument.hs | 22 ++++++++++++--------- src/Text/Pandoc/Writers/Shared.hs | 35 ++++++++++++++++++++++----------- 4 files changed, 55 insertions(+), 36 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d93b99486..52ababb14 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,7 +67,7 @@ import Text.Pandoc.Shared hiding (Element) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, renderLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML @@ -257,9 +257,11 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles - let lang = getLang opts meta + lang <- getLang opts meta let addLang :: Element -> Element - addLang e = case lang >>= \l -> (return . XMLC.toTree . go l . XMLC.fromElement) e of + addLang e = case lang >>= \l -> + (return . XMLC.toTree . go (renderLang l) + . XMLC.fromElement) e of Just (Elem e') -> e' _ -> e -- return original where go :: String -> Cursor -> Cursor diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index dff4f8fcf..8573f5719 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -50,7 +50,8 @@ import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, splitLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, Lang(..), + renderLang) import Text.Pandoc.XML import Text.TeXMath import Text.XML.Light @@ -79,7 +80,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta - let lang = getLang opts meta + lang <- getLang opts meta refArchive <- case writerReferenceDoc opts of Just f -> liftM toArchive $ lift $ P.readFileLazy f @@ -140,7 +141,7 @@ pandocToODT opts doc@(Pandoc meta _) = do $$ case lang of Just l -> inTagsSimple "dc:language" - (text (escapeStringForXML l)) + (text (escapeStringForXML (renderLang l))) Nothing -> empty ) ) @@ -153,10 +154,9 @@ pandocToODT opts doc@(Pandoc meta _) = do $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' -updateStyleWithLang :: PandocMonad m => Maybe String -> Archive -> O m Archive +updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive updateStyleWithLang Nothing arch = return arch -updateStyleWithLang (Just l) arch = do - (mblang, mbcountry) <- splitLang l +updateStyleWithLang (Just lang) arch = do epochtime <- floor `fmap` (lift P.getPOSIXTime) return arch{ zEntries = [if eRelativePath e == "styles.xml" then case parseXMLDoc @@ -166,16 +166,16 @@ updateStyleWithLang (Just l) arch = do toEntry "styles.xml" epochtime ( fromStringLazy . ppTopElement - . addLang mblang mbcountry $ d ) + . addLang lang $ d ) else e | e <- zEntries arch] } -addLang :: Maybe String -> Maybe String -> Element -> Element -addLang mblang mbcountry = everywhere' (mkT updateLangAttr) - where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) l) - = Attr n (maybe l id mblang) - updateLangAttr (Attr n@(QName "country" _ (Just "fo")) c) - = Attr n (maybe c id mbcountry) +addLang :: Lang -> Element -> Element +addLang (Lang lang country) = everywhere' (mkT updateLangAttr) + where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) + = Attr n lang + updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) + = Attr n country updateLangAttr x = x -- | transform both Image and Math elements diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 3a720acdc..57f3c1194 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -36,7 +36,6 @@ import Control.Arrow ((***), (>>>)) import Control.Monad.State.Strict hiding (when) import Data.Char (chr) import Data.List (sortBy) -import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Map as Map import Data.Ord (comparing) @@ -608,8 +607,14 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre - | Lang String String +data TextStyle = Italic + | Bold + | Strike + | Sub + | Sup + | SmallC + | Pre + | Language String String deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -627,7 +632,7 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] - | Lang lang country <- s + | Language lang country <- s = [("fo:language" ,lang) ,("fo:country" ,country)] | otherwise = [] @@ -637,9 +642,8 @@ withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> do - (mblang, mbcountry) <- splitLang l - case (mblang, mbcountry) of - (Just lang, _) -> withTextStyle - (Lang lang (fromMaybe "" mbcountry)) - action + mblang <- parseBCP47 l + case mblang of + Just (Lang lang country) -> withTextStyle + (Language lang country) action _ -> action diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 0b35d27f6..efb553ac2 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -30,7 +30,9 @@ Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( getLang - , splitLang + , parseBCP47 + , Lang(..) + , renderLang , metaToJSON , metaToJSON' , addVariablesToJSON @@ -62,30 +64,41 @@ import Text.Pandoc.Shared (splitBy) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) +-- | Represents BCP 47 language/country code. +data Lang = Lang String String + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang (Lang la co) = la ++ if null co + then "" + else '-':co + -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe String -getLang opts meta = - lookup "lang" (writerVariables opts) +getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) +getLang opts meta = maybe (return Nothing) parseBCP47 $ + case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> Nothing `mplus` case lookupMeta "lang" meta of Just (MetaInlines [Str s]) -> Just s Just (MetaString s) -> Just s _ -> Nothing --- | Split `lang` field into lang and country, issuing warning --- if it doesn't look valid. -splitLang :: PandocMonad m => String -> m (Maybe String, Maybe String) -splitLang lang = +-- | Parse a BCP 47 string as a Lang, issuing a warning if there +-- are issues. +parseBCP47 :: PandocMonad m => String -> m (Maybe Lang) +parseBCP47 lang = case splitBy (== '-') lang of [la,co] | length la == 2 && length co == 2 - -> return (Just la, Just co) + -> return $ Just $ Lang la co [la] | length la == 2 - -> return (Just la, Nothing) + -> return $ Just $ Lang la "" _ -> do report $ InvalidLang lang - return (Nothing, Nothing) + return Nothing -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- cgit v1.2.3 From a85d8335767b8acad7de36a16be1c6ae4bca9aff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 15:52:30 +0200 Subject: Fixed log message for InvalidLang. --- src/Text/Pandoc/Logging.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index ad0fcdd2d..e31fb1521 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -259,7 +259,7 @@ showLogMessage msg = "It is recommended that lang be specified for this format." InvalidLang s -> "Invalid 'lang' value '" ++ s ++ "'.\n" ++ - "Use ISO 8601 format like 'en-US'." + "Use an IETF language tag like 'en-US'." CouldNotHighlight m -> "Could not highlight code block:\n" ++ m MissingCharacter m -> -- cgit v1.2.3 From 643cbdf1044623475cb6ade9c35de85148d0dff6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 18:31:59 +0200 Subject: Writers.Shared: improve type of Lang and bcp47 parser. Use a real parsec parser for BCP47, include variants. --- src/Text/Pandoc/Writers/ODT.hs | 6 +-- src/Text/Pandoc/Writers/OpenDocument.hs | 18 +++---- src/Text/Pandoc/Writers/Shared.hs | 96 +++++++++++++++++++++++---------- 3 files changed, 79 insertions(+), 41 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 8573f5719..54873efb2 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -171,11 +171,11 @@ updateStyleWithLang (Just lang) arch = do | e <- zEntries arch] } addLang :: Lang -> Element -> Element -addLang (Lang lang country) = everywhere' (mkT updateLangAttr) +addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n lang + = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n country + = Attr n (langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 57f3c1194..763cea5ad 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -614,7 +614,7 @@ data TextStyle = Italic | Sup | SmallC | Pre - | Language String String + | Language Lang deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -632,9 +632,9 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] - | Language lang country <- s - = [("fo:language" ,lang) - ,("fo:country" ,country)] + | Language lang <- s + = [("fo:language" ,langLanguage lang) + ,("fo:country" ,langRegion lang)] | otherwise = [] withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a @@ -642,8 +642,8 @@ withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> do - mblang <- parseBCP47 l - case mblang of - Just (Lang lang country) -> withTextStyle - (Language lang country) action - _ -> action + case parseBCP47 l of + Right lang -> withTextStyle (Language lang) action + Left _ -> do + report $ InvalidLang l + action diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index efb553ac2..b56f2d468 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -46,11 +46,12 @@ module Text.Pandoc.Writers.Shared ( , gridTable ) where -import Control.Monad (liftM, zipWithM, mplus) +import Control.Monad (liftM, zipWithM, guard) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) +import Data.Char (isAscii, isLetter, isUpper, isLower) import qualified Data.HashMap.Strict as H -import Data.List (groupBy, intersperse, transpose) +import Data.List (groupBy, intersperse, transpose, intercalate) import qualified Data.Map as M import Data.Maybe (isJust) import qualified Data.Text as T @@ -60,45 +61,82 @@ import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Shared (splitBy) import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) +import qualified Text.Parsec as P -- | Represents BCP 47 language/country code. -data Lang = Lang String String +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) -- | Render a Lang as BCP 47. renderLang :: Lang -> String -renderLang (Lang la co) = la ++ if null co - then "" - else '-':co +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) -- | Get the contents of the `lang` metadata field or variable. getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) -getLang opts meta = maybe (return Nothing) parseBCP47 $ - case lookup "lang" (writerVariables opts) of - Just s -> Just s - _ -> Nothing - `mplus` - case lookupMeta "lang" meta of - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing +getLang opts meta = case + (case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing) of + Nothing -> return Nothing + Just s -> case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) --- | Parse a BCP 47 string as a Lang, issuing a warning if there --- are issues. -parseBCP47 :: PandocMonad m => String -> m (Maybe Lang) +-- | Parse a BCP 47 string as a Lang. +parseBCP47 :: String -> Either String Lang parseBCP47 lang = - case splitBy (== '-') lang of - [la,co] - | length la == 2 && length co == 2 - -> return $ Just $ Lang la co - [la] - | length la == 2 - -> return $ Just $ Lang la "" - _ -> do - report $ InvalidLang lang - return Nothing + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many pVariant + () <$ P.char '-' P.<|> P.eof + return $ Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return var -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- cgit v1.2.3 From ac9423eccc76005f996a10a545594247ac753e02 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 21:00:35 +0200 Subject: Moved BCP47 specific functions from Writers.Shared to new module. Text.Pandoc.BCP47 (unexported, internal module). `getLang`, `Lang(..)`, `parseBCP47`. --- pandoc.cabal | 1 + src/Text/Pandoc/BCP47.hs | 117 ++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Docx.hs | 3 +- src/Text/Pandoc/Writers/ODT.hs | 4 +- src/Text/Pandoc/Writers/OpenDocument.hs | 1 + src/Text/Pandoc/Writers/Shared.hs | 87 +----------------------- 6 files changed, 126 insertions(+), 87 deletions(-) create mode 100644 src/Text/Pandoc/BCP47.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 3b644c7d0..5ae255284 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -463,6 +463,7 @@ Library Text.Pandoc.Lua.Util, Text.Pandoc.CSS, Text.Pandoc.UUID, + Text.Pandoc.BCP47 Text.Pandoc.Slides, Text.Pandoc.Compat.Time, Paths_pandoc diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs new file mode 100644 index 000000000..ae7f54473 --- /dev/null +++ b/src/Text/Pandoc/BCP47.hs @@ -0,0 +1,117 @@ +{- +Copyright (C) 2017 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.BCP47 + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for parsing and rendering BCP47 language identifiers. +-} +module Text.Pandoc.BCP47 ( + getLang + , parseBCP47 + , Lang(..) + , renderLang + ) +where +import Control.Monad (guard) +import Data.Char (isAscii, isLetter, isUpper, isLower) +import Data.List (intercalate) +import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import qualified Text.Parsec as P + +-- | Represents BCP 47 language/country code. +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) + +-- | Get the contents of the `lang` metadata field or variable. +getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) +getLang opts meta = case + (case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing) of + Nothing -> return Nothing + Just s -> case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Parse a BCP 47 string as a Lang. +parseBCP47 :: String -> Either String Lang +parseBCP47 lang = + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many pVariant + () <$ P.char '-' P.<|> P.eof + return $ Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return var diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 52ababb14..bc8568cd1 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,7 +67,8 @@ import Text.Pandoc.Shared hiding (Element) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, renderLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 54873efb2..98aa3b30b 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -50,8 +50,8 @@ import Text.Pandoc.Shared (stringify) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) -import Text.Pandoc.Writers.Shared (fixDisplayMath, getLang, Lang(..), - renderLang) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang) import Text.Pandoc.XML import Text.TeXMath import Text.XML.Light diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 763cea5ad..6c53ab4ab 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Pandoc.XML +import Text.Pandoc.BCP47 (parseBCP47, Lang(..)) import Text.Printf (printf) -- | Auxiliary function to convert Plain block to Para. diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index b56f2d468..2047285eb 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -29,11 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( - getLang - , parseBCP47 - , Lang(..) - , renderLang - , metaToJSON + metaToJSON , metaToJSON' , addVariablesToJSON , getField @@ -46,97 +42,20 @@ module Text.Pandoc.Writers.Shared ( , gridTable ) where -import Control.Monad (liftM, zipWithM, guard) +import Control.Monad (liftM, zipWithM) import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), encode, fromJSON) -import Data.Char (isAscii, isLetter, isUpper, isLower) import qualified Data.HashMap.Strict as H -import Data.List (groupBy, intersperse, transpose, intercalate) +import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M import Data.Maybe (isJust) import qualified Data.Text as T import qualified Data.Traversable as Traversable import Text.Pandoc.Definition -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.UTF8 (toStringLazy) import Text.Pandoc.XML (escapeStringForXML) -import qualified Text.Parsec as P - --- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: String - , langScript :: String - , langRegion :: String - , langVariants :: [String] } - deriving (Eq, Ord, Show) - --- | Render a Lang as BCP 47. -renderLang :: Lang -> String -renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) - ([langScript lang, langRegion lang] ++ langVariants lang)) - --- | Get the contents of the `lang` metadata field or variable. -getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) -getLang opts meta = case - (case lookup "lang" (writerVariables opts) of - Just s -> Just s - _ -> - case lookupMeta "lang" meta of - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing) of - Nothing -> return Nothing - Just s -> case parseBCP47 s of - Left _ -> do - report $ InvalidLang s - return Nothing - Right l -> return (Just l) - --- | Parse a BCP 47 string as a Lang. -parseBCP47 :: String -> Either String Lang -parseBCP47 lang = - case P.parse bcp47 "lang" lang of - Right r -> Right r - Left e -> Left $ show e - where bcp47 = do - language <- pLanguage - script <- P.option "" pScript - region <- P.option "" pRegion - variants <- P.many pVariant - () <$ P.char '-' P.<|> P.eof - return $ Lang{ langLanguage = language - , langScript = script - , langRegion = region - , langVariants = variants } - asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) - pLanguage = do - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return cs - pScript = P.try $ do - P.char '-' - x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) - xs <- P.count 3 - (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return (x:xs) - pRegion = P.try $ do - P.char '-' - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return cs - pVariant = P.try $ do - P.char '-' - ds <- P.option "" (P.count 1 P.digit) - cs <- P.many1 asciiLetter - let var = ds ++ cs - guard $ if null ds - then length var >= 5 && length var <= 8 - else length var == 4 - return var -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- cgit v1.2.3 From d0d2443f2e069c9aa4510579f10ed8fe0b5f20ab Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 21:56:29 +0200 Subject: Refactored ConTeXt writer to use BCP47. BCP47 - consistent case for BCP47 fields (e.g. uppercase for region). --- src/Text/Pandoc/BCP47.hs | 10 +++--- src/Text/Pandoc/Writers/ConTeXt.hs | 68 +++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 39 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index ae7f54473..956130fb7 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -35,7 +35,7 @@ module Text.Pandoc.BCP47 ( ) where import Control.Monad (guard) -import Data.Char (isAscii, isLetter, isUpper, isLower) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower) import Data.List (intercalate) import Text.Pandoc.Definition import Text.Pandoc.Class (PandocMonad, report) @@ -93,19 +93,19 @@ parseBCP47 lang = cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return cs + return $ map toLower cs pScript = P.try $ do P.char '-' x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) xs <- P.count 3 (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return (x:xs) + return $ map toLower (x:xs) pRegion = P.try $ do P.char '-' cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return cs + return $ map toUpper cs pVariant = P.try $ do P.char '-' ds <- P.option "" (P.count 1 P.digit) @@ -114,4 +114,4 @@ parseBCP47 lang = guard $ if null ds then length var >= 5 && length var <= 8 else length var == 4 - return var + return $ map toLower var diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 5a81aa8a0..ae6cb482f 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -35,6 +35,7 @@ import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) import Data.Text (Text) import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition @@ -88,6 +89,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do ,("top","margin-top") ,("bottom","margin-bottom") ] + lang <- maybe "" fromBCP47 <$> getLang options meta let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -100,11 +102,10 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) + $ defField "context-lang" lang $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context + let context' = defField "context-dir" (toContextDir + $ getField "dir" context) context case writerTemplate options of Nothing -> return main Just tpl -> renderTemplate' tpl context' @@ -196,7 +197,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do _ -> id wrapLang txt = case lookup "lang" kvs of Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + <> text (fromBCP47' lng) <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs @@ -421,7 +422,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + Just lng -> "\\start\\language[" <> text (fromBCP47' lng) <> "]" <> txt <> "\\stop " Nothing -> txt fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils @@ -458,36 +459,35 @@ sectionHeader (ident,classes,_) hdrLevel lst = do <> blankline _ -> contents <> blankline -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') +fromBCP47' :: String -> String +fromBCP47' s = case parseBCP47 s of + Right r -> fromBCP47 r + Left _ -> "" -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l +fromBCP47 :: Lang -> String +fromBCP47 (Lang "ar" _ "SY" _) = "ar-sy" +fromBCP47 (Lang "ar" _ "IQ" _) = "ar-iq" +fromBCP47 (Lang "ar" _ "JO" _) = "ar-jo" +fromBCP47 (Lang "ar" _ "LB" _) = "ar-lb" +fromBCP47 (Lang "ar" _ "DZ" _) = "ar-dz" +fromBCP47 (Lang "ar" _ "MA" _) = "ar-ma" +fromBCP47 (Lang "de" _ _ ["1901"]) = "deo" +fromBCP47 (Lang "de" _ "DE" _) = "de-de" +fromBCP47 (Lang "de" _ "AT" _) = "de-at" +fromBCP47 (Lang "de" _ "CH" _) = "de-ch" +fromBCP47 (Lang "el" _ _ ["poly"]) = "agr" +fromBCP47 (Lang "en" _ "US" _) = "en-us" +fromBCP47 (Lang "en" _ "GB" _) = "en-gb" +fromBCP47 (Lang "grc"_ _ _) = "agr" +fromBCP47 (Lang "el" _ _ _) = "gr" +fromBCP47 (Lang "eu" _ _ _) = "ba" +fromBCP47 (Lang "he" _ _ _) = "il" +fromBCP47 (Lang "jp" _ _ _) = "ja" +fromBCP47 (Lang "uk" _ _ _) = "ua" +fromBCP47 (Lang "vi" _ _ _) = "vn" +fromBCP47 (Lang "zh" _ _ _) = "cn" +fromBCP47 (Lang l _ _ _) = l -- cgit v1.2.3 From 4cbbc9dd587d73d576b4c891f3f37a19f12cf10c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Jun 2017 23:16:55 +0200 Subject: BCP47: split toLang from getLang, rearranged types. --- src/Text/Pandoc/BCP47.hs | 26 +++++++++------ src/Text/Pandoc/Writers/ConTeXt.hs | 67 +++++++++++++++++++------------------- src/Text/Pandoc/Writers/Docx.hs | 6 ++-- src/Text/Pandoc/Writers/ODT.hs | 4 +-- 4 files changed, 55 insertions(+), 48 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 956130fb7..16dd3a032 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -29,6 +29,7 @@ Functions for parsing and rendering BCP47 language identifiers. -} module Text.Pandoc.BCP47 ( getLang + , toLang , parseBCP47 , Lang(..) , renderLang @@ -56,21 +57,26 @@ renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) ([langScript lang, langRegion lang] ++ langVariants lang)) -- | Get the contents of the `lang` metadata field or variable. -getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang) -getLang opts meta = case - (case lookup "lang" (writerVariables opts) of +getLang :: WriterOptions -> Meta -> Maybe String +getLang opts meta = + case lookup "lang" (writerVariables opts) of Just s -> Just s _ -> case lookupMeta "lang" meta of Just (MetaInlines [Str s]) -> Just s Just (MetaString s) -> Just s - _ -> Nothing) of - Nothing -> return Nothing - Just s -> case parseBCP47 s of - Left _ -> do - report $ InvalidLang s - return Nothing - Right l -> return (Just l) + _ -> Nothing + +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) -- | Parse a BCP 47 string as a Lang. parseBCP47 :: String -> Either String Lang diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index ae6cb482f..7886bc052 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -89,7 +89,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do ,("top","margin-top") ,("bottom","margin-bottom") ] - lang <- maybe "" fromBCP47 <$> getLang options meta + mblang <- fromBCP47 (getLang options meta) let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -102,7 +102,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) - $ defField "context-lang" lang + $ maybe id (defField "context-lang") mblang $ metadata let context' = defField "context-dir" (toContextDir $ getField "dir" context) context @@ -187,6 +187,7 @@ blockToConTeXt b@(RawBlock _ _ ) = do return empty blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + mblang <- fromBCP47 (lookup "lang" kvs) let wrapRef txt = if null ident then txt else ("\\reference" <> brackets (text $ toLabel ident) <> @@ -195,9 +196,9 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do Just "rtl" -> align "righttoleft" Just "ltr" -> align "lefttoright" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text (fromBCP47' lng) <> "]" $$ txt $$ "\\stop" + <> text lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs @@ -417,12 +418,13 @@ inlineToConTeXt (Note contents) = do else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do + mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBCP47' lng) + wrapLang txt = case mblang of + Just lng -> "\\start\\language[" <> text lng <> "]" <> txt <> "\\stop " Nothing -> txt fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils @@ -459,35 +461,34 @@ sectionHeader (ident,classes,_) hdrLevel lst = do <> blankline _ -> contents <> blankline -fromBCP47' :: String -> String -fromBCP47' s = case parseBCP47 s of - Right r -> fromBCP47 r - Left _ -> "" +fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBCP47 :: Lang -> String -fromBCP47 (Lang "ar" _ "SY" _) = "ar-sy" -fromBCP47 (Lang "ar" _ "IQ" _) = "ar-iq" -fromBCP47 (Lang "ar" _ "JO" _) = "ar-jo" -fromBCP47 (Lang "ar" _ "LB" _) = "ar-lb" -fromBCP47 (Lang "ar" _ "DZ" _) = "ar-dz" -fromBCP47 (Lang "ar" _ "MA" _) = "ar-ma" -fromBCP47 (Lang "de" _ _ ["1901"]) = "deo" -fromBCP47 (Lang "de" _ "DE" _) = "de-de" -fromBCP47 (Lang "de" _ "AT" _) = "de-at" -fromBCP47 (Lang "de" _ "CH" _) = "de-ch" -fromBCP47 (Lang "el" _ _ ["poly"]) = "agr" -fromBCP47 (Lang "en" _ "US" _) = "en-us" -fromBCP47 (Lang "en" _ "GB" _) = "en-gb" -fromBCP47 (Lang "grc"_ _ _) = "agr" -fromBCP47 (Lang "el" _ _ _) = "gr" -fromBCP47 (Lang "eu" _ _ _) = "ba" -fromBCP47 (Lang "he" _ _ _) = "il" -fromBCP47 (Lang "jp" _ _ _) = "ja" -fromBCP47 (Lang "uk" _ _ _) = "ua" -fromBCP47 (Lang "vi" _ _ _) = "vn" -fromBCP47 (Lang "zh" _ _ _) = "cn" -fromBCP47 (Lang l _ _ _) = l +fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" +fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" +fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" +fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" +fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" +fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" +fromBCP47' (Just (Lang l _ _ _) ) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index bc8568cd1..06318b20c 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -68,7 +68,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared (fixDisplayMath) -import Text.Pandoc.BCP47 (getLang, renderLang) +import Text.Pandoc.BCP47 (getLang, renderLang, toLang) import Text.Printf (printf) import Text.TeXMath import Text.XML.Light as XML @@ -258,9 +258,9 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles - lang <- getLang opts meta + mblang <- toLang $ getLang opts meta let addLang :: Element -> Element - addLang e = case lang >>= \l -> + addLang e = case mblang >>= \l -> (return . XMLC.toTree . go (renderLang l) . XMLC.fromElement) e of Just (Elem e') -> e' diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 98aa3b30b..785891a9f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -51,7 +51,7 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.Writers.Shared (fixDisplayMath) -import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang) +import Text.Pandoc.BCP47 (getLang, toLang, Lang(..), renderLang) import Text.Pandoc.XML import Text.TeXMath import Text.XML.Light @@ -80,7 +80,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta - lang <- getLang opts meta + lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of Just f -> liftM toArchive $ lift $ P.readFileLazy f -- cgit v1.2.3 From b95f391bebdd6d79b11db4469d97640e80285ccc Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 26 Jun 2017 09:40:53 +0300 Subject: Muse reader: simplify para implementation (#3761) --- src/Text/Pandoc/Readers/Muse.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index fe8a55f5c..06d385222 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -247,9 +247,7 @@ commentTag :: PandocMonad m => MuseParser m (F Blocks) commentTag = parseHtmlContent "comment" block >> return mempty para :: PandocMonad m => MuseParser m (F Blocks) -para = do - res <- trimInlinesF . mconcat <$> many1Till inline endOfParaElement - return $ B.para <$> res +para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement where endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement endOfInput = try $ skipMany blankline >> skipSpaces >> eof -- cgit v1.2.3 From 492b3b129190be9742981493812894f888bb5f2d Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 26 Jun 2017 09:41:17 +0300 Subject: Muse reader: fix horizontal rule parsing (#3762) Do not parse 3 dashes as horizontal rule and allow whitespace after rule --- src/Text/Pandoc/Readers/Muse.hs | 6 ++++-- test/Tests/Readers/Muse.hs | 13 ++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 06d385222..eb0769e0b 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -200,8 +200,10 @@ comment = try $ do separator :: PandocMonad m => MuseParser m (F Blocks) separator = try $ do - string "---" - newline + string "----" + many $ char '-' + many spaceChar + void newline <|> eof return $ return B.horizontalRule header :: PandocMonad m => MuseParser m (F Blocks) diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index bae389584..3d7baf8f0 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -91,7 +91,18 @@ tests = ] , testGroup "Blocks" - [ "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + [ "Block elements end paragraphs" =: + T.unlines [ "First paragraph" + , "----" + , "Second paragraph" + ] =?> para (text "First paragraph") <> horizontalRule <> para (text "Second paragraph") + , testGroup "Horizontal rule" + [ "Less than 4 dashes is not a horizontal rule" =: "---" =?> para (text "---") + , "4 dashes is a horizontal rule" =: "----" =?> horizontalRule + , "5 dashes is a horizontal rule" =: "-----" =?> horizontalRule + , "4 dashes with spaces is a horizontal rule" =: "---- " =?> horizontalRule + ] + , "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") , "Center" =: "
Hello, world
" =?> para (text "Hello, world") , "Right" =: "Hello, world" =?> para (text "Hello, world") , testGroup "Comments" -- cgit v1.2.3 From f09473eab70f3d540fe1586c0256336ab9679049 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Mon, 26 Jun 2017 02:41:51 -0400 Subject: minor updates to vimwiki reader. (#3759) - updated comments in Vimwiki.hs to reflect current status of implementation - added vimwiki to trypandoc --- src/Text/Pandoc/Readers/Vimwiki.hs | 13 ++++++------- trypandoc/index.html | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 98f04eda9..11faedb24 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -28,20 +28,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of vimwiki text to 'Pandoc' document. -} {-- - progress: +[X]: implemented +[O]: not implemented * block parsers: * [X] header * [X] hrule * [X] comment * [X] blockquote - * [X] preformatted + * [X] preformatted -- using codeblock * [X] displaymath * [X] bulletlist / orderedlist - * [X] orderedlist with 1., i., a) etc identification. - * [X] todo lists -- not list builder with attributes? using span. + * [X] todo lists -- using span. * [X] table * [X] centered table -- using div - * [O] colspan and rowspan -- pandoc limitation, see issue #1024 + * [O] colspan and rowspan -- see issue #1024 * [X] paragraph * [X] definition list * inline parsers: @@ -58,8 +58,7 @@ Conversion of vimwiki text to 'Pandoc' document. * misc: * [X] `TODO:` mark * [X] metadata placeholders: %title and %date - * [O] control placeholders: %template and %nohtml -- %template added to - meta, %nohtml ignored + * [O] control placeholders: %template and %nohtml -- ignored --} module Text.Pandoc.Readers.Vimwiki ( readVimwiki diff --git a/trypandoc/index.html b/trypandoc/index.html index 9b84e14b7..fe4077ce2 100644 --- a/trypandoc/index.html +++ b/trypandoc/index.html @@ -94,6 +94,7 @@ $(document).ready(function() { +
-- cgit v1.2.3 From 700a0843b2310c6b319bf34d2aebd8470cc76b40 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Jun 2017 15:03:51 +0200 Subject: parseBCP47: Parse extensions and private-use as variants. Even though officially they aren't. This suffices for our purposes. --- src/Text/Pandoc/BCP47.hs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 16dd3a032..b4b55c5d4 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -36,7 +36,8 @@ module Text.Pandoc.BCP47 ( ) where import Control.Monad (guard) -import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower) +import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower, + isAlphaNum) import Data.List (intercalate) import Text.Pandoc.Definition import Text.Pandoc.Class (PandocMonad, report) @@ -78,7 +79,9 @@ toLang (Just s) = return Nothing Right l -> return (Just l) --- | Parse a BCP 47 string as a Lang. +-- | Parse a BCP 47 string as a Lang. Currently we parse +-- extensions and private-use fields as "variants," even +-- though officially they aren't. parseBCP47 :: String -> Either String Lang parseBCP47 lang = case P.parse bcp47 "lang" lang of @@ -88,8 +91,8 @@ parseBCP47 lang = language <- pLanguage script <- P.option "" pScript region <- P.option "" pRegion - variants <- P.many pVariant - () <$ P.char '-' P.<|> P.eof + variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) + P.eof return $ Lang{ langLanguage = language , langScript = script , langRegion = region @@ -121,3 +124,16 @@ parseBCP47 lang = then length var >= 5 && length var <= 8 else length var == 4 return $ map toLower var + pExtension = P.try $ do + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 2 && length cs <= 8 + return $ map toLower cs + pPrivateUse = P.try $ do + P.char '-' + P.char 'x' + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 1 && length cs <= 8 + let var = "x-" ++ cs + return $ map toLower var -- cgit v1.2.3 From b2fe009d8fee618cbcd837976b6f2dea7c0a9837 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Jun 2017 15:04:22 +0200 Subject: LaTeX writer: use BCP47 parser. --- src/Text/Pandoc/Writers/LaTeX.hs | 194 +++++++++++++++++++++------------------ 1 file changed, 105 insertions(+), 89 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 53a67a27a..5d505ed15 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -39,12 +39,13 @@ import Control.Monad.State.Strict import Data.Aeson (FromJSON, object, (.=)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, toLower) -import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy, +import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, stripPrefix, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 (Lang(..), toLang, getLang, renderLang) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, @@ -188,7 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let docLangs = nub $ query (extract "lang") blocks + docLangs <- catMaybes <$> + mapM (toLang . Just) (ordNub (query (extract "lang") blocks)) let hasStringValue x = isJust (getField x metadata :: Maybe String) let geometryFromMargins = intercalate [','] $ catMaybes $ map (\(x,y) -> @@ -198,6 +200,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do ,("tmargin","margin-top") ,("bmargin","margin-bottom") ] + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + mblang <- toLang $ case getLang options meta of + Just l -> Just l + Nothing | null docLangs -> Nothing + | otherwise -> Just "en" + -- we need a default here since lang is used in template conditionals + + let dirs = query (extract "dir") blocks + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -235,26 +249,20 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ - -- set lang to something so polyglossia/babel is included - defField "lang" (if null docLangs then ""::String else "en") $ - defField "otherlangs" docLangs $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ - defField "dir" (if (null $ query (extract "dir") blocks) - then ""::String - else "ltr") $ + (if null dirs + then id + else defField "dir" ("ltr" :: String)) $ defField "section-titles" True $ defField "geometry" geometryFromMargins $ metadata - let toPolyObj lang = object [ "name" .= T.pack name - , "options" .= T.pack opts ] - where - (name, opts) = toPolyglossia lang - let lang = maybe [] (splitBy (=='-')) $ getField "lang" context - otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context let context' = - defField "babel-lang" (toBabel lang) - $ defField "babel-otherlangs" (map toBabel otherlangs) + -- note: lang is used in some conditionals in the template, + -- so we need to set it if we have any babel/polyglossia: + maybe id (defField "lang" . renderLang) mblang + $ maybe id (defField "babel-lang" . toBabel) mblang + $ defField "babel-otherlangs" (map toBabel docLangs) $ defField "babel-newcommands" (concatMap (\(poly, babel) -> -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that @@ -274,16 +282,12 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) -- find polyglossia and babel names of languages used in the document - $ map (\l -> - let lng = splitBy (=='-') l - in (fst $ toPolyglossia lng, toBabel lng) - ) - docLangs ) - $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) - $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of - Just "rtl" -> True - _ -> False) + $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs + ) + $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang + $ defField "polyglossia-otherlangs" (map toPolyObj docLangs) + $ defField "latex-dir-rtl" + (getField "dir" context == Just ("rtl" :: String)) $ context case writerTemplate options of Nothing -> return main @@ -443,11 +447,12 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do -> "\\leavevmode" <> linkAnchor' <> "%" _ -> linkAnchor' let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs let wrapDir = case lookup "dir" kvs of Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case lang of Just lng -> let (l, o) = toPolyglossiaEnv lng ops = if null o then "" @@ -918,13 +923,14 @@ inlineToLaTeX :: PandocMonad m -> LW m Doc inlineToLaTeX (Span (id',classes,kvs) ils) = do linkAnchor <- hypertarget False id' empty + lang <- toLang $ lookup "lang" kvs let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ ["textnormal" | "csl-no-strong" `elem` classes || "csl-no-smallcaps" `elem` classes] ++ ["RL" | ("dir", "rtl") `elem` kvs] ++ ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + (case lang of + Just lng -> let (l, o) = toPolyglossia lng ops = if null o then "" else ("[" ++ o ++ "]") in ["text" ++ l ++ ops] Nothing -> []) @@ -1254,85 +1260,95 @@ lookKey :: String -> Attr -> [String] lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv :: Lang -> (String, String) toPolyglossiaEnv l = - case toPolyglossia $ (splitBy (=='-')) l of + case toPolyglossia l of ("arabic", o) -> ("Arabic", o) x -> x -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: [String] -> (String, String) -toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") -toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") -toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") -toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") -toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") -toPolyglossia ("de":"1901":_) = ("german", "spelling=old") -toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") -toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") -toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") -toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") -toPolyglossia ("de":_) = ("german", "") -toPolyglossia ("dsb":_) = ("lsorbian", "") -toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") -toPolyglossia ("en":"AU":_) = ("english", "variant=australian") -toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") -toPolyglossia ("en":"GB":_) = ("english", "variant=british") -toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") -toPolyglossia ("en":"UK":_) = ("english", "variant=british") -toPolyglossia ("en":"US":_) = ("english", "variant=american") -toPolyglossia ("grc":_) = ("greek", "variant=ancient") -toPolyglossia ("hsb":_) = ("usorbian", "") -toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") -toPolyglossia ("sl":_) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia :: Lang -> (String, String) +toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ "AT" vars) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ "CH" vars) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: [String] -> String -toBabel ("de":"1901":_) = "german" -toBabel ("de":"AT":"1901":_) = "austrian" -toBabel ("de":"AT":_) = "naustrian" -toBabel ("de":"CH":"1901":_) = "swissgerman" -toBabel ("de":"CH":_) = "nswissgerman" -toBabel ("de":_) = "ngerman" -toBabel ("dsb":_) = "lowersorbian" -toBabel ("el":"polyton":_) = "polutonikogreek" -toBabel ("en":"AU":_) = "australian" -toBabel ("en":"CA":_) = "canadian" -toBabel ("en":"GB":_) = "british" -toBabel ("en":"NZ":_) = "newzealand" -toBabel ("en":"UK":_) = "british" -toBabel ("en":"US":_) = "american" -toBabel ("fr":"CA":_) = "canadien" -toBabel ("fra":"aca":_) = "acadian" -toBabel ("grc":_) = "polutonikogreek" -toBabel ("hsb":_) = "uppersorbian" -toBabel ("la":"x":"classic":_) = "classiclatin" -toBabel ("sl":_) = "slovene" -toBabel x = commonFromBcp47 x +toBabel :: Lang -> String +toBabel (Lang "de" _ "AT" vars) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ "CH" vars) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ "AU" _) = "australian" +toBabel (Lang "en" _ "CA" _) = "canadian" +toBabel (Lang "en" _ "GB" _) = "british" +toBabel (Lang "en" _ "NZ" _) = "newzealand" +toBabel (Lang "en" _ "UK" _) = "british" +toBabel (Lang "en" _ "US" _) = "american" +toBabel (Lang "fr" _ "CA" _) = "canadien" +toBabel (Lang "fra" _ _ vars) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "sl" _ _ _) = "slovene" +toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: [String] -> String -commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazil" +commonFromBcp47 :: Lang -> String +commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. -commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" -commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" -commonFromBcp47 x = fromIso $ head x +commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" +commonFromBcp47 (Lang "zh" "Latn" _ vars) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" -- cgit v1.2.3 From fa515e46f36fa3e73b26b89b721a2de1738cf4e3 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Mon, 26 Jun 2017 16:07:45 +0300 Subject: Muse writer: fix hlint errors (#3764) --- src/Text/Pandoc/Writers/Muse.hs | 30 +++++++++++++----------------- test/Tests/Writers/Muse.hs | 4 ++-- 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 3d9e232ae..b386a85b9 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -97,8 +97,7 @@ pandocToMuse (Pandoc meta blocks) = do body <- blockListToMuse blocks notes <- liftM (reverse . stNotes) get >>= notesToMuse let main = render colwidth $ body $+$ notes - let context = defField "body" main - $ metadata + let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context @@ -129,14 +128,14 @@ blockToMuse (Para inlines) = do blockToMuse (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] + (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToMuse ls contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) return $ blankline $$ "" $$ contents $$ "" <> blankline -blockToMuse (CodeBlock (_,_,_) str) = do +blockToMuse (CodeBlock (_,_,_) str) = return $ "" $$ text str $$ "" $$ blankline blockToMuse (RawBlock (Format format) str) = return $ blankline $$ " text format <> "\">" $$ @@ -154,11 +153,10 @@ blockToMuse (OrderedList (start, style, _) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToMuse item num) $ - zip markers' items + contents <- zipWithM orderedListItemToMuse markers' items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where orderedListItemToMuse :: PandocMonad m => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) @@ -170,7 +168,7 @@ blockToMuse (BulletList items) = do contents <- mapM bulletListItemToMuse items -- ensure that sublists have preceding blank line topLevel <- gets stTopLevel - return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where bulletListItemToMuse :: PandocMonad m => [Block] -> StateT WriterState m Doc @@ -179,7 +177,7 @@ blockToMuse (BulletList items) = do return $ hang 2 "- " contents blockToMuse (DefinitionList items) = do contents <- mapM definitionListItemToMuse items - return $ cr $$ (nest 1 $ vcat $ contents) $$ blankline + return $ cr $$ nest 1 (vcat contents) $$ blankline where definitionListItemToMuse :: PandocMonad m => ([Inline], [[Block]]) -> StateT WriterState m Doc @@ -218,8 +216,8 @@ blockToMuse (Table caption _ _ headers rows) = do -- FIXME: Muse doesn't allow blocks with height more than 1. let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where h = maximum (1 : map height blocks) - sep' = lblock (length sep) $ vcat (map text $ replicate h sep) - let makeRow sep = (" " <>) . (hpipeBlocks sep . zipWith lblock widthsInChars) + sep' = lblock (length sep) $ vcat (replicate h (text sep)) + let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars let head' = makeRow " || " headers' let rowSeparator = if noHeaders then " | " else " | " rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row @@ -236,9 +234,7 @@ blockToMuse Null = return empty notesToMuse :: PandocMonad m => Notes -> StateT WriterState m Doc -notesToMuse notes = - mapM (\(num, note) -> noteToMuse num note) (zip [1..] notes) >>= - return . vsep +notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes) -- | Return Muse representation of a note. noteToMuse :: PandocMonad m @@ -268,7 +264,7 @@ conditionalEscapeString s inlineListToMuse :: PandocMonad m => [Inline] -> StateT WriterState m Doc -inlineListToMuse lst = mapM inlineToMuse lst >>= return . hcat +inlineListToMuse lst = liftM hcat (mapM inlineToMuse lst) -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m @@ -316,7 +312,7 @@ inlineToMuse Space = return space inlineToMuse SoftBreak = do wrapText <- gets $ writerWrapText . stOptions return $ if wrapText == WrapPreserve then cr else space -inlineToMuse (Link _ txt (src, _)) = do +inlineToMuse (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> return $ "[[" <> text (escapeLink x) <> "]]" @@ -340,7 +336,7 @@ inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ "[" <> text ref <> "]" inlineToMuse (Span (_,name:_,_) inlines) = do contents <- inlineListToMuse inlines diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index 63fdd293c..d83cc5c9b 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -31,14 +31,14 @@ tests = [ testGroup "block elements" , "Second paragraph." ] ] - , "line block" =: lineBlock ([text "Foo", text "bar", text "baz"]) + , "line block" =: lineBlock [text "Foo", text "bar", text "baz"] =?> unlines [ "" , "Foo" , "bar" , "baz" , "" ] - , "code block" =: codeBlock ("int main(void) {\n\treturn 0;\n}") + , "code block" =: codeBlock "int main(void) {\n\treturn 0;\n}" =?> unlines [ "" , "int main(void) {" , "\treturn 0;" -- cgit v1.2.3 From 75f4e41d7d292e011a83d06efebc356060ea812b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Jun 2017 16:07:59 +0200 Subject: Use `table-of-contents` for contents of toc, make `toc` a boolean. Changed markdown, rtf, and HTML-based templates accordingly. This allows you to set `toc: true` in the metadata; this previously produced strange results in some output formats. Closes #2872. For backwards compatibility, `toc` is still set to the toc contents. But it is recommended that you update templates to use `table-of-contents` for the toc contents and `toc` for a boolean flag. --- data/templates/default.commonmark | 2 +- data/templates/default.dzslides | 2 +- data/templates/default.html4 | 2 +- data/templates/default.html5 | 2 +- data/templates/default.markdown | 2 +- data/templates/default.plain | 2 +- data/templates/default.revealjs | 2 +- data/templates/default.rtf | 2 +- data/templates/default.s5 | 2 +- data/templates/default.slideous | 2 +- data/templates/default.slidy | 2 +- src/Text/Pandoc/Writers/HTML.hs | 8 ++++++-- src/Text/Pandoc/Writers/Markdown.hs | 10 +++++++--- src/Text/Pandoc/Writers/RTF.hs | 6 +++++- 14 files changed, 29 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/data/templates/default.commonmark b/data/templates/default.commonmark index 95d7e52cc..9f6ca96de 100644 --- a/data/templates/default.commonmark +++ b/data/templates/default.commonmark @@ -11,7 +11,7 @@ $include-before$ $endfor$ $if(toc)$ -$toc$ +$table-of-contents$ $endif$ $body$ diff --git a/data/templates/default.dzslides b/data/templates/default.dzslides index ef4f03c8d..479d2f06e 100644 --- a/data/templates/default.dzslides +++ b/data/templates/default.dzslides @@ -186,7 +186,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ $for(include-before)$ diff --git a/data/templates/default.html4 b/data/templates/default.html4 index 9745f3ba8..837b20080 100644 --- a/data/templates/default.html4 +++ b/data/templates/default.html4 @@ -57,7 +57,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ $body$ diff --git a/data/templates/default.html5 b/data/templates/default.html5 index dfc66cf4e..203001f21 100644 --- a/data/templates/default.html5 +++ b/data/templates/default.html5 @@ -60,7 +60,7 @@ $endif$ $endif$ $if(toc)$ $endif$ $body$ diff --git a/data/templates/default.markdown b/data/templates/default.markdown index 95d7e52cc..9f6ca96de 100644 --- a/data/templates/default.markdown +++ b/data/templates/default.markdown @@ -11,7 +11,7 @@ $include-before$ $endfor$ $if(toc)$ -$toc$ +$table-of-contents$ $endif$ $body$ diff --git a/data/templates/default.plain b/data/templates/default.plain index 95d7e52cc..9f6ca96de 100644 --- a/data/templates/default.plain +++ b/data/templates/default.plain @@ -11,7 +11,7 @@ $include-before$ $endfor$ $if(toc)$ -$toc$ +$table-of-contents$ $endif$ $body$ diff --git a/data/templates/default.revealjs b/data/templates/default.revealjs index a3f39885e..4350c80e8 100644 --- a/data/templates/default.revealjs +++ b/data/templates/default.revealjs @@ -79,7 +79,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ diff --git a/data/templates/default.rtf b/data/templates/default.rtf index 59e132b3f..a7f79376d 100644 --- a/data/templates/default.rtf +++ b/data/templates/default.rtf @@ -18,7 +18,7 @@ $if(spacer)$ {\pard \ql \f0 \sa180 \li0 \fi0 \par} $endif$ $if(toc)$ -$toc$ +$table-of-contents$ $endif$ $for(include-before)$ $include-before$ diff --git a/data/templates/default.s5 b/data/templates/default.s5 index e4fa970d3..1ba3998c3 100644 --- a/data/templates/default.s5 +++ b/data/templates/default.s5 @@ -78,7 +78,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ $body$ diff --git a/data/templates/default.slideous b/data/templates/default.slideous index c7dbb5969..7f6364cfc 100644 --- a/data/templates/default.slideous +++ b/data/templates/default.slideous @@ -83,7 +83,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ $body$ diff --git a/data/templates/default.slidy b/data/templates/default.slidy index e7b3278ff..48bb254ae 100644 --- a/data/templates/default.slidy +++ b/data/templates/default.slidy @@ -69,7 +69,7 @@ $endif$ $endif$ $if(toc)$
-$toc$ +$table-of-contents$
$endif$ $body$ diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 45c878781..451123a6d 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -241,7 +241,7 @@ pandocToHtml opts (Pandoc meta blocks) = do then blocks else prepSlides slideLevel blocks toc <- if writerTableOfContents opts && slideVariant /= S5Slides - then tableOfContents opts sects + then fmap renderHtml' <$> tableOfContents opts sects else return Nothing blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects @@ -292,7 +292,11 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax _ -> True _ -> False) $ defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml') toc $ + -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + maybe id (defField "toc") toc $ + maybe id (defField "table-of-contents") toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ defField "pagetitle" (stringifyHTML (docTitle meta)) $ diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 6c7e662bf..b951288bc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -209,8 +209,8 @@ pandocToMarkdown opts (Pandoc meta blocks) = do Nothing -> empty let headerBlocks = filter isHeaderBlock blocks toc <- if writerTableOfContents opts - then tableOfContents opts headerBlocks - else return empty + then render' <$> tableOfContents opts headerBlocks + else return "" -- Strip off final 'references' header if markdown citations enabled let blocks' = if isEnabled Ext_citations opts then case reverse blocks of @@ -220,7 +220,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts let main = render' $ body <> notesAndRefs' - let context = defField "toc" (render' toc) + let context = -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + defField "toc" toc + $ defField "table-of-contents" toc $ defField "body" main $ (if isNullMeta meta then id diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 6666f6549..48d31c7bf 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -122,7 +122,11 @@ writeRTF options doc = do let context = defField "body" body $ defField "spacer" spacer $ (if writerTableOfContents options - then defField "toc" toc + then defField "table-of-contents" toc + -- for backwards compatibility, + -- we populate toc with the contents + -- of the toc rather than a boolean: + . defField "toc" toc else id) $ metadata T.pack <$> -- cgit v1.2.3 From 19d9482fc400cf486547b6a670c946d3634401cf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 26 Jun 2017 16:46:56 +0200 Subject: OpenDocument/ODT writer: Added support for table of contents. Closes #2836. Thanks to @anayrat. --- MANUAL.txt | 3 ++- src/Text/Pandoc/Writers/OpenDocument.hs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index 6499426e1..b5cea779e 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1228,7 +1228,8 @@ as the following: : non-null value if `--toc/--table-of-contents` was specified `toc-title` -: title of table of contents (works only with EPUB and docx) +: title of table of contents (works only with EPUB, + opendocument, odt, docx) `include-before` : contents specified by `-B/--include-before-body` (may have diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 6c53ab4ab..ed3dabb87 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -220,6 +220,7 @@ writeOpenDocument opts (Pandoc meta blocks) = do let listStyles = map listStyle (stListStyles s) let automaticStyles = vcat $ reverse $ styles ++ listStyles let context = defField "body" body + $ defField "toc" (writerTableOfContents opts) $ defField "automatic-styles" (render' automaticStyles) $ metadata case writerTemplate opts of -- cgit v1.2.3 From 460b6c470bae26f31d7c0d72b85aef8eb254b7f7 Mon Sep 17 00:00:00 2001 From: bucklereed Date: Tue, 27 Jun 2017 09:19:37 +0100 Subject: HTML reader: Use the lang value of to set the lang meta value. (#3765) * HTML reader: Use the lang value of to set the lang meta value. * Fix for pre-AMP environments. --- src/Text/Pandoc/Readers/HTML.hs | 9 +++++++++ test/Tests/Readers/HTML.hs | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 301afa207..b07b65019 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -53,6 +53,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M +import Data.Foldable ( for_ ) import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) @@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) type TagParser m = HTMLParser m [Tag Text] +pHtml :: PandocMonad m => TagParser m Blocks +pHtml = try $ do + (TagOpen "html" attr) <- lookAhead $ pAnyTag + for_ (lookup "lang" attr) $ + updateState . B.setMeta "lang" . B.text . T.unpack + pInTags "html" block + pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block @@ -175,6 +183,7 @@ block = do , pList , pHrule , pTable + , pHtml , pHead , pBody , pDiv diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 8647540b6..da6298e76 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -30,4 +30,10 @@ tests = [ testGroup "base tag" [ test html "anchor without href" $ "" =?> plain (spanWith ("anchor",[],[]) mempty) ] + , testGroup "lang" + [ test html "lang on " $ "hola" =?> + setMeta "lang" (text "es") (doc (plain (text "hola"))) + , test html "xml:lang on " $ "hola" =?> + setMeta "lang" (text "es") (doc (plain (text "hola"))) + ] ] -- cgit v1.2.3 From a868b238f253423281b2648896f184e7cdc05014 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 12:42:56 +0200 Subject: Docx writer: Allow 9 list levels. Closes #3519. --- src/Text/Pandoc/Writers/Docx.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 06318b20c..fb6b2013a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -657,6 +657,9 @@ mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] +maxListLevel :: Int +maxListLevel = 8 + mkNum :: ListMarker -> Int -> Element mkNum marker numid = mknode "w:num" [("w:numId",show numid)] @@ -666,7 +669,8 @@ mkNum marker numid = BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] + $ mknode "w:startOverride" [("w:val",show start)] ()) + [0..maxListLevel] mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element mkAbstractNum marker = do @@ -675,7 +679,8 @@ mkAbstractNum marker = do return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () - : map (mkLvl marker) [0..6] + : map (mkLvl marker) + [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = @@ -706,7 +711,7 @@ mkLvl marker lvl = bulletFor 3 = "\x2013" bulletFor 4 = "\x2022" bulletFor 5 = "\x2013" - bulletFor _ = "\x2022" + bulletFor x = bulletFor (x `mod` 6) styleFor UpperAlpha _ = "upperLetter" styleFor LowerAlpha _ = "lowerLetter" styleFor UpperRoman _ = "upperRoman" @@ -718,6 +723,7 @@ mkLvl marker lvl = styleFor DefaultStyle 4 = "decimal" styleFor DefaultStyle 5 = "lowerLetter" styleFor DefaultStyle 6 = "lowerRoman" + styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 7) styleFor _ _ = "decimal" patternFor OneParen s = s ++ ")" patternFor TwoParens s = "(" ++ s ++ ")" -- cgit v1.2.3 From 563c9c8687a62acc7361fb49126a1d2030f3a11e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 14:35:03 +0200 Subject: RST reader: Handle chained link definitions. For example, .. _hello: .. _goodbye: example.com Here both `hello` and `goodbye` should link to `example.com`. Fixes the first part of #262. --- src/Text/Pandoc/Readers/RST.hs | 27 ++++++++++++++++++++------- test/command/262.md | 9 +++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 test/command/262.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d13f697b7..c790d5188 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where -import Control.Monad (guard, liftM, mzero, when) +import Control.Monad (guard, liftM, mzero, when, forM_) import Control.Monad.Identity (Identity(..)) import Control.Monad.Except (throwError) import Data.Char (isHexDigit, isSpace, toLower, toUpper) @@ -1054,16 +1054,29 @@ stripTicks = reverse . stripTick . reverse . stripTick where stripTick ('`':xs) = xs stripTick xs = xs +referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames = do + let rn = try $ do + string ".. _" + (_, ref) <- withRaw referenceName + char ':' + return ref + first <- rn + rest <- many (try (blanklines *> rn)) + return (first:rest) + regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' + -- we allow several references to the same URL, e.g. + -- .. _hello: + -- .. _goodbye: url.com + refs <- referenceNames src <- targetURI - let key = toKey $ stripTicks ref --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ - stateKeys s } + let keys = map (toKey . stripTicks) refs + forM_ keys $ \key -> + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do diff --git a/test/command/262.md b/test/command/262.md new file mode 100644 index 000000000..e23e2d866 --- /dev/null +++ b/test/command/262.md @@ -0,0 +1,9 @@ +``` +% pandoc -f rst +`hello`_ and `goodbye`_ + +.. _hello: +.. _goodbye: example.com +^D +

hello and goodbye

+``` -- cgit v1.2.3 From 33a29fbf8720c0d7eec40b7014e3f819b05474ef Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 15:03:16 +0200 Subject: RST reader: support anchors. E.g. `hello` .. _hello: paragraph This is supported by putting "paragraph" in a Div with id `hello`. Closes #262. --- src/Text/Pandoc/Readers/RST.hs | 24 +++++++++++++++++++++++- test/command/262.md | 17 +++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index c790d5188..2daf60a89 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -170,7 +170,8 @@ parseRST = do -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> citationBlock <|> + manyTill (referenceKey <|> anchorDef <|> + noteBlock <|> citationBlock <|> headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos @@ -217,6 +218,7 @@ block = choice [ codeBlock , fieldList , include , directive + , anchor , comment , header , hrule @@ -1072,12 +1074,32 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI + guard $ not (null src) --TODO: parse width, height, class and name attributes let keys = map (toKey . stripTicks) refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } +anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef = try $ do + (refs, raw) <- withRaw (try (referenceNames <* blanklines)) + let keys = map stripTicks refs + forM_ keys $ \rawkey -> + updateState $ \s -> s { stateKeys = + M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + -- keep this for 2nd round of parsing, where we'll add the divs (anchor) + return raw + +anchor :: PandocMonad m => RSTParser m Blocks +anchor = try $ do + refs <- referenceNames + blanklines + b <- block + -- put identifier on next block: + let addDiv ref = B.divWith (ref, [], []) + return $ foldr addDiv b refs + headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') diff --git a/test/command/262.md b/test/command/262.md index e23e2d866..bda2acb35 100644 --- a/test/command/262.md +++ b/test/command/262.md @@ -7,3 +7,20 @@ ^D

hello and goodbye

``` + +``` +% pandoc -f rst +`hello`_ `goodbye`_ + +.. _hello: +.. _goodbye: + +paragraph +^D +

hello goodbye

+
+
+

paragraph

+
+
+``` -- cgit v1.2.3 From 7d9d77ca44afa0c69abfefe07d7b027f81c8f1a0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Jun 2017 15:25:37 +0200 Subject: Require nonempty alt text for `implicit_figures`. A figure with an empty caption doesn't make sense. Closes #2844. --- MANUAL.txt | 15 +++++++-------- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index b5cea779e..a5121dc2f 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3227,17 +3227,16 @@ The link text will be used as the image's alt text: #### Extension: `implicit_figures` #### -An image occurring by itself in a paragraph will be rendered as -a figure with a caption.[^5] (In LaTeX, a figure environment will be -used; in HTML, the image will be placed in a `div` with class -`figure`, together with a caption in a `p` with class `caption`.) -The image's alt text will be used as the caption. +An image with nonempty alt text, occurring by itself in a +paragraph, will be rendered as a figure with a caption. The +image's alt text will be used as the caption. ![This is the caption](/url/of/image.png) -[^5]: This feature is not yet implemented for RTF, OpenDocument, or - ODT. In those formats, you'll just get an image in a paragraph by - itself, with no caption. +How this is rendered depends on the output format. Some output +formats (e.g. RTF) do not yet support figures. In those +formats, you'll just get an image in a paragraph by itself, with +no caption. If you just want a regular inline image, just make sure it is not the only thing in the paragraph. One way to do this is to insert a diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 31b51f237..49007ad35 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1035,7 +1035,8 @@ para = try $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `extensionEnabled` exts -> + | not (null alt) && + Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) -- cgit v1.2.3 From beb78a552cb3480d55b8eca8c0c77bccd5804506 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 27 Jun 2017 17:11:42 +0200 Subject: Text.Pandoc.Lua: simplify filter function runner The code still allowed to pass an arbitrary number of arguments to the filter function, as element properties were passed as function arguments at some point. Now we only pass the element as the single arg, so the code to handle multiple arguments is no longer necessary. --- src/Text/Pandoc/Lua.hs | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 90f72d685..858212df1 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -188,34 +188,20 @@ instance StackValue LuaFilter where push = undefined peek lua idx = fmap (LuaFilter lua) <$> Lua.peek lua idx --- | Helper class for pushing a single value to the stack via a lua function. --- See @pushViaCall@. -class PushViaFilterFunction a where - pushViaFilterFunction' :: LuaState -> LuaFilterFunction -> IO () -> Int -> a - -instance StackValue a => PushViaFilterFunction (IO a) where - pushViaFilterFunction' lua lf pushArgs num = do - pushFilterFunction lua lf - pushArgs - Lua.call lua num 1 - mbres <- Lua.peek lua (-1) - case mbres of - Nothing -> throwIO $ LuaException - ("Error while trying to get a filter's return " - ++ "value from lua stack.") - Just res -> res <$ Lua.pop lua 1 - -instance (StackValue a, PushViaFilterFunction b) => - PushViaFilterFunction (a -> b) where - pushViaFilterFunction' lua lf pushArgs num x = - pushViaFilterFunction' lua lf (pushArgs *> push lua x) (num + 1) - -- | Push a value to the stack via a lua filter function. The filter function is -- called with all arguments that are passed to this function and is expected to -- return a single value. -runFilterFunction :: PushViaFilterFunction a - => LuaState -> LuaFilterFunction -> a -runFilterFunction lua lf = pushViaFilterFunction' lua lf (return ()) 0 +runFilterFunction :: StackValue a => LuaState -> LuaFilterFunction -> a -> IO a +runFilterFunction lua lf x = do + pushFilterFunction lua lf + Lua.push lua x + Lua.call lua 1 1 + mbres <- Lua.peek lua (-1) + case mbres of + Nothing -> throwIO $ LuaException + ("Error while trying to get a filter's return " + ++ "value from lua stack.") + Just res -> res <$ Lua.pop lua 1 -- | Push the filter function to the top of the stack. pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO () -- cgit v1.2.3 From 4282abbd0781cf5e6731a9b43dc8cfeb1dca58fa Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 27 Jun 2017 17:11:42 +0200 Subject: Text.Pandoc.Lua: keep element unchanged if filter returns nil This was suggested by jgm and is consistent with the behavior of other filtering libraries. --- src/Text/Pandoc/Lua.hs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 858212df1..3770880f3 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -189,19 +189,24 @@ instance StackValue LuaFilter where peek lua idx = fmap (LuaFilter lua) <$> Lua.peek lua idx -- | Push a value to the stack via a lua filter function. The filter function is --- called with all arguments that are passed to this function and is expected to --- return a single value. +-- called with given element as argument and is expected to return an element. +-- Alternatively, the function can return nothing or nil, in which case the +-- element is left unchanged. runFilterFunction :: StackValue a => LuaState -> LuaFilterFunction -> a -> IO a runFilterFunction lua lf x = do pushFilterFunction lua lf Lua.push lua x Lua.call lua 1 1 - mbres <- Lua.peek lua (-1) - case mbres of - Nothing -> throwIO $ LuaException - ("Error while trying to get a filter's return " - ++ "value from lua stack.") - Just res -> res <$ Lua.pop lua 1 + resType <- Lua.ltype lua (-1) + case resType of + Lua.TNIL -> Lua.pop lua 1 *> return x + _ -> do + mbres <- Lua.peek lua (-1) + case mbres of + Nothing -> throwIO $ LuaException + ("Error while trying to get a filter's return " + ++ "value from lua stack.") + Just res -> res <$ Lua.pop lua 1 -- | Push the filter function to the top of the stack. pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO () -- cgit v1.2.3 From f5f84859230568ddafb2e7e23b5d9b3e98fdbba5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 27 Jun 2017 17:55:47 +0200 Subject: Text.Pandoc.Lua: catch lua errors in filter functions Replace lua errors with `LuaException`s. --- src/Text/Pandoc/Lua.hs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 3770880f3..2ee8d0847 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -196,17 +196,26 @@ runFilterFunction :: StackValue a => LuaState -> LuaFilterFunction -> a -> IO a runFilterFunction lua lf x = do pushFilterFunction lua lf Lua.push lua x - Lua.call lua 1 1 - resType <- Lua.ltype lua (-1) - case resType of - Lua.TNIL -> Lua.pop lua 1 *> return x - _ -> do - mbres <- Lua.peek lua (-1) - case mbres of - Nothing -> throwIO $ LuaException - ("Error while trying to get a filter's return " - ++ "value from lua stack.") - Just res -> res <$ Lua.pop lua 1 + z <- Lua.pcall lua 1 1 0 + if (z /= 0) + then do + msg <- Lua.peek lua (-1) + let prefix = "Error while running filter function: " + throwIO . LuaException $ + case msg of + Nothing -> prefix ++ "could not read error message" + Just msg' -> prefix ++ msg' + else do + resType <- Lua.ltype lua (-1) + case resType of + Lua.TNIL -> Lua.pop lua 1 *> return x + _ -> do + mbres <- Lua.peek lua (-1) + case mbres of + Nothing -> throwIO $ LuaException + ("Error while trying to get a filter's return " + ++ "value from lua stack.") + Just res -> res <$ Lua.pop lua 1 -- | Push the filter function to the top of the stack. pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO () -- cgit v1.2.3 From cd690d04015431e89feefa7f68e9609efab1f16b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Jun 2017 14:20:53 +0200 Subject: LaTeX writer: fixed detection of otherlangs. We weren't recursing into inline contexts. Closes #3770. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 5d505ed15..07ddddcb0 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1245,9 +1245,9 @@ mbBraced x = if not (all isAlphaNum x) -- Extract a key from divs and spans extract :: String -> Block -> [String] extract key (Div attr _) = lookKey key attr -extract key (Plain ils) = concatMap (extractInline key) ils -extract key (Para ils) = concatMap (extractInline key) ils -extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract key (Plain ils) = query (extractInline key) ils +extract key (Para ils) = query (extractInline key) ils +extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans -- cgit v1.2.3 From 79cc56726c7e876314c7c21f5bb5f65084e7d8b7 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Wed, 28 Jun 2017 15:32:53 +0300 Subject: Muse reader: parse indented blockquotes (#3769) --- src/Text/Pandoc/Readers/Muse.hs | 23 ++++++++++++++++++++++- test/Tests/Readers/Muse.hs | 7 ++++++- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index eb0769e0b..a51306347 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -187,6 +187,7 @@ blockElements = choice [ comment , orderedList , table , commentTag + , indentedBlock , noteBlock ] @@ -209,7 +210,8 @@ separator = try $ do header :: PandocMonad m => MuseParser m (F Blocks) header = try $ do st <- stateParserContext <$> getState - getPosition >>= \pos -> guard (st == NullState && sourceColumn pos == 1) + q <- stateQuoteContext <$> getState + getPosition >>= \pos -> guard (st == NullState && q == NoQuote && sourceColumn pos == 1) level <- liftM length $ many1 $ char '*' guard $ level <= 5 skipSpaces @@ -248,6 +250,25 @@ quoteTag = blockTag B.blockQuote "quote" commentTag :: PandocMonad m => MuseParser m (F Blocks) commentTag = parseHtmlContent "comment" block >> return mempty +-- Indented block is either center, right or quote +indentedLine :: PandocMonad m => MuseParser m (Int, String) +indentedLine = try $ do + indent <- length <$> many1 spaceChar + line <- anyLine + return (indent, line) + +rawIndentedBlock :: PandocMonad m => MuseParser m (Int, String) +rawIndentedBlock = try $ do + lns <- many1 indentedLine + let indent = minimum $ map fst lns + return (indent, unlines $ map snd lns) + +indentedBlock :: PandocMonad m => MuseParser m (F Blocks) +indentedBlock = try $ do + (indent, raw) <- rawIndentedBlock + contents <- withQuoteContext InDoubleQuote $ parseFromString parseBlocks raw + return $ (if indent >= 2 && indent < 6 then B.blockQuote else id) <$> contents + para :: PandocMonad m => MuseParser m (F Blocks) para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement where diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 3d7baf8f0..fe0a59992 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -102,7 +102,12 @@ tests = , "5 dashes is a horizontal rule" =: "-----" =?> horizontalRule , "4 dashes with spaces is a horizontal rule" =: "---- " =?> horizontalRule ] - , "Quote" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + , "Quote tag" =: "Hello, world" =?> blockQuote (para $ text "Hello, world") + , "Quote" =: " This is a quotation\n" =?> blockQuote (para $ text "This is a quotation") + , "Multiline quote" =: T.unlines [ " This is a quotation" + , " with a continuation" + ] + =?> blockQuote (para $ text "This is a quotation with a continuation") , "Center" =: "
Hello, world
" =?> para (text "Hello, world") , "Right" =: "Hello, world" =?> para (text "Hello, world") , testGroup "Comments" -- cgit v1.2.3 From 2902260b636b36134c0157e32291900603e1011d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Jun 2017 15:07:35 +0200 Subject: Make `papersize: a4` work regardless of the case of `a4`. It is converted to `a4` in LaTeX and `A4` in ConTeXt. --- src/Text/Pandoc/Writers/ConTeXt.hs | 5 +++++ src/Text/Pandoc/Writers/LaTeX.hs | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 7886bc052..3c901cab6 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2007-2017 John MacFarlane @@ -103,6 +104,10 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) $ maybe id (defField "context-lang") mblang + $ (case getField "papersize" metadata of + Just ("a4" :: String) -> resetField "papersize" + ("A4" :: String) + _ -> id) $ metadata let context' = defField "context-dir" (toContextDir $ getField "dir" context) context diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 07ddddcb0..55ecda819 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -256,6 +256,10 @@ pandocToLaTeX options (Pandoc meta blocks) = do else defField "dir" ("ltr" :: String)) $ defField "section-titles" True $ defField "geometry" geometryFromMargins $ + (case getField "papersize" metadata of + Just ("A4" :: String) -> resetField "papersize" + ("a4" :: String) + _ -> id) $ metadata let context' = -- note: lang is used in some conditionals in the template, -- cgit v1.2.3 From 6ad74815f66cb36ec4039c597b38473db853eb6c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 14:00:26 +0200 Subject: Text.Pandoc.Lua: use generics to reduce boilerplate. I tested this with the str.lua filter on MANUAL.txt, and I could see no significant performance degradation. Doing things this way will ease maintenance, as we won't have to manually modify this module when types change. @tarleb, do we really need special cases for things like DoubleQuoted and InlineMath? --- src/Text/Pandoc/Lua.hs | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 2ee8d0847..85a080277 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -36,6 +36,7 @@ module Text.Pandoc.Lua ( LuaException(..), import Control.Exception import Control.Monad (unless, when, (>=>)) import Control.Monad.Trans (MonadIO (..)) +import Data.Data (toConstr) import Data.Map (Map) import Data.Typeable (Typeable) import Scripting.Lua (LuaState, StackValue (..)) @@ -129,21 +130,7 @@ execBlockLuaFilter lua fnMap x = do case Map.lookup filterFnName fnMap of Nothing -> return x Just fn -> runFilterFunction lua fn x - case x of - BlockQuote{} -> tryFilter "BlockQuote" - BulletList{} -> tryFilter "BulletList" - CodeBlock{} -> tryFilter "CodeBlock" - DefinitionList{} -> tryFilter "DefinitionList" - Div{} -> tryFilter "Div" - Header{} -> tryFilter "Header" - HorizontalRule -> tryFilter "HorizontalRule" - LineBlock{} -> tryFilter "LineBlock" - Null -> tryFilter "Null" - Para{} -> tryFilter "Para" - Plain{} -> tryFilter "Plain" - RawBlock{} -> tryFilter "RawBlock" - OrderedList{} -> tryFilter "OrderedList" - Table{} -> tryFilter "Table" + tryFilter (show (toConstr x)) execInlineLuaFilter :: LuaState -> FunctionMap @@ -161,27 +148,11 @@ execInlineLuaFilter lua fnMap x = do Nothing -> tryFilterAlternatives alternatives Just fn -> runFilterFunction lua fn x case x of - Cite{} -> tryFilter "Cite" - Code{} -> tryFilter "Code" - Emph{} -> tryFilter "Emph" - Image{} -> tryFilter "Image" - LineBreak -> tryFilter "LineBreak" - Link{} -> tryFilter "Link" Math DisplayMath _ -> tryFilterAlternatives ["DisplayMath", "Math"] Math InlineMath _ -> tryFilterAlternatives ["InlineMath", "Math"] - Note{} -> tryFilter "Note" Quoted DoubleQuote _ -> tryFilterAlternatives ["DoubleQuoted", "Quoted"] Quoted SingleQuote _ -> tryFilterAlternatives ["SingleQuoted", "Quoted"] - RawInline{} -> tryFilter "RawInline" - SmallCaps{} -> tryFilter "SmallCaps" - SoftBreak -> tryFilter "SoftBreak" - Space -> tryFilter "Space" - Span{} -> tryFilter "Span" - Str{} -> tryFilter "Str" - Strikeout{} -> tryFilter "Strikeout" - Strong{} -> tryFilter "Strong" - Subscript{} -> tryFilter "Subscript" - Superscript{} -> tryFilter "Superscript" + _ -> tryFilter (show (toConstr x)) instance StackValue LuaFilter where valuetype _ = Lua.TTABLE -- cgit v1.2.3 From 5c80aca0e20492eaa31b9280fb5524d76f5e8098 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 14:31:02 +0200 Subject: Text.Pandoc.Lua: refactored to remove duplicated code. --- src/Text/Pandoc/Lua.hs | 59 +++++++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 34 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 85a080277..3bb11b705 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -104,55 +104,46 @@ newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } execDocLuaFilter :: LuaState -> FunctionMap -> Pandoc -> IO Pandoc -execDocLuaFilter lua fnMap x = do - let docFnName = "Doc" - case Map.lookup docFnName fnMap of - Nothing -> return x - Just fn -> runFilterFunction lua fn x +execDocLuaFilter lua fnMap = tryFilter lua fnMap "Doc" execMetaLuaFilter :: LuaState -> FunctionMap -> Pandoc -> IO Pandoc -execMetaLuaFilter lua fnMap pd@(Pandoc meta blks) = do - let metaFnName = "Meta" - case Map.lookup metaFnName fnMap of - Nothing -> return pd - Just fn -> do - meta' <- runFilterFunction lua fn meta - return $ Pandoc meta' blks +execMetaLuaFilter lua fnMap (Pandoc meta blks) = do + meta' <- tryFilter lua fnMap "Meta" meta + return $ Pandoc meta' blks execBlockLuaFilter :: LuaState -> FunctionMap -> Block -> IO Block execBlockLuaFilter lua fnMap x = do - let tryFilter :: String -> IO Block - tryFilter filterFnName = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runFilterFunction lua fn x - tryFilter (show (toConstr x)) + tryFilter lua fnMap (show (toConstr x)) x + +tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a +tryFilter lua fnMap filterFnName x = + case Map.lookup filterFnName fnMap of + Nothing -> return x + Just fn -> runFilterFunction lua fn x + +tryFilterAlternatives :: StackValue a + => LuaState -> FunctionMap -> [String] -> a -> IO a +tryFilterAlternatives _ _ [] x = return x +tryFilterAlternatives lua fnMap (fnName : alternatives) x = + case Map.lookup fnName fnMap of + Nothing -> tryFilterAlternatives lua fnMap alternatives x + Just fn -> runFilterFunction lua fn x execInlineLuaFilter :: LuaState -> FunctionMap -> Inline -> IO Inline execInlineLuaFilter lua fnMap x = do - let tryFilter :: String -> IO Inline - tryFilter filterFnName = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runFilterFunction lua fn x - let tryFilterAlternatives :: [String] -> IO Inline - tryFilterAlternatives [] = return x - tryFilterAlternatives (fnName : alternatives) = - case Map.lookup fnName fnMap of - Nothing -> tryFilterAlternatives alternatives - Just fn -> runFilterFunction lua fn x + let tryAlt = tryFilterAlternatives lua fnMap case x of - Math DisplayMath _ -> tryFilterAlternatives ["DisplayMath", "Math"] - Math InlineMath _ -> tryFilterAlternatives ["InlineMath", "Math"] - Quoted DoubleQuote _ -> tryFilterAlternatives ["DoubleQuoted", "Quoted"] - Quoted SingleQuote _ -> tryFilterAlternatives ["SingleQuoted", "Quoted"] - _ -> tryFilter (show (toConstr x)) + Math DisplayMath _ -> tryAlt ["DisplayMath", "Math"] x + Math InlineMath _ -> tryAlt ["InlineMath", "Math"] x + Quoted DoubleQuote _ -> tryAlt ["DoubleQuoted", "Quoted"] x + Quoted SingleQuote _ -> tryAlt ["SingleQuoted", "Quoted"] x + _ -> tryFilter lua fnMap (show (toConstr x)) x instance StackValue LuaFilter where valuetype _ = Lua.TTABLE -- cgit v1.2.3 From 780a65f8a87b40d1a9ee269cd7a51699c42d497e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 15:47:27 +0200 Subject: Lua filters: Remove special treatment of Quoted, Math. No more SingleQuoted, DoubleQuoted, InlineMath, DisplayMath. This makes everything uniform and predictable, though it does open up a difference btw lua filters and custom writers. --- src/Text/Pandoc/Lua.hs | 32 ++++++++------------------------ test/lua/single-to-double-quoted.lua | 6 ++++-- 2 files changed, 12 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 3bb11b705..fd7bba0ac 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -101,6 +101,12 @@ data LuaFilter = LuaFilter LuaState FunctionMap newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } +tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a +tryFilter lua fnMap filterFnName x = + case Map.lookup filterFnName fnMap of + Nothing -> return x + Just fn -> runFilterFunction lua fn x + execDocLuaFilter :: LuaState -> FunctionMap -> Pandoc -> IO Pandoc @@ -116,34 +122,12 @@ execMetaLuaFilter lua fnMap (Pandoc meta blks) = do execBlockLuaFilter :: LuaState -> FunctionMap -> Block -> IO Block -execBlockLuaFilter lua fnMap x = do - tryFilter lua fnMap (show (toConstr x)) x - -tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a -tryFilter lua fnMap filterFnName x = - case Map.lookup filterFnName fnMap of - Nothing -> return x - Just fn -> runFilterFunction lua fn x - -tryFilterAlternatives :: StackValue a - => LuaState -> FunctionMap -> [String] -> a -> IO a -tryFilterAlternatives _ _ [] x = return x -tryFilterAlternatives lua fnMap (fnName : alternatives) x = - case Map.lookup fnName fnMap of - Nothing -> tryFilterAlternatives lua fnMap alternatives x - Just fn -> runFilterFunction lua fn x +execBlockLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x execInlineLuaFilter :: LuaState -> FunctionMap -> Inline -> IO Inline -execInlineLuaFilter lua fnMap x = do - let tryAlt = tryFilterAlternatives lua fnMap - case x of - Math DisplayMath _ -> tryAlt ["DisplayMath", "Math"] x - Math InlineMath _ -> tryAlt ["InlineMath", "Math"] x - Quoted DoubleQuote _ -> tryAlt ["DoubleQuoted", "Quoted"] x - Quoted SingleQuote _ -> tryAlt ["SingleQuoted", "Quoted"] x - _ -> tryFilter lua fnMap (show (toConstr x)) x +execInlineLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x instance StackValue LuaFilter where valuetype _ = Lua.TTABLE diff --git a/test/lua/single-to-double-quoted.lua b/test/lua/single-to-double-quoted.lua index 45c184c95..b985b215c 100644 --- a/test/lua/single-to-double-quoted.lua +++ b/test/lua/single-to-double-quoted.lua @@ -1,7 +1,9 @@ return { { - SingleQuoted = function (elem) - elem.quotetype = "DoubleQuote" + Quoted = function (elem) + if elem.quotetype == "SingleQuote" then + elem.quotetype = "DoubleQuote" + end return elem end, } -- cgit v1.2.3 From cb25326fa313690c3c67caa2a8b44642409fd24c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 17:07:30 +0200 Subject: Text.Pandoc.Lua: more code simplification. Also, now we check before running walkM that the function table actually does contain something relevant. E.g. if your filter just defines Str, there's no need to run walkM for blocks, meta, or the whole document. This should help performance a bit (and it does, in my tests). --- src/Text/Pandoc/Lua.hs | 56 +++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index fd7bba0ac..87fb8fd6b 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright © 2017 Albert Krewinkel @@ -34,10 +35,11 @@ module Text.Pandoc.Lua ( LuaException(..), pushPandocModule ) where import Control.Exception -import Control.Monad (unless, when, (>=>)) +import Control.Monad (unless, when, (>=>), mplus) import Control.Monad.Trans (MonadIO (..)) -import Data.Data (toConstr) +import Data.Data (toConstr, showConstr, dataTypeOf, dataTypeConstrs, Data) import Data.Map (Map) +import Data.Maybe (isJust) import Data.Typeable (Typeable) import Scripting.Lua (LuaState, StackValue (..)) import Text.Pandoc.Definition @@ -91,44 +93,38 @@ runAll = foldr ((>=>) . walkMWithLuaFilter) return walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc walkMWithLuaFilter (LuaFilter lua fnMap) = - walkM (execInlineLuaFilter lua fnMap) >=> - walkM (execBlockLuaFilter lua fnMap) >=> - walkM (execMetaLuaFilter lua fnMap) >=> - walkM (execDocLuaFilter lua fnMap) + (if hasOneOf (constructorsFor (dataTypeOf (Str []))) + then walkM (tryFilter lua fnMap :: Inline -> IO Inline) + else return) + >=> + (if hasOneOf (constructorsFor (dataTypeOf (Para []))) + then walkM (tryFilter lua fnMap :: Block -> IO Block) + else return) + >=> + (case Map.lookup "Meta" fnMap of + Just fn -> walkM (\(Pandoc meta blocks) -> do + meta' <- runFilterFunction lua fn meta + return $ Pandoc meta' blocks) + Nothing -> return) + >=> + (case Map.lookup "Pandoc" fnMap `mplus` Map.lookup "Doc" fnMap of + Just fn -> (runFilterFunction lua fn) :: Pandoc -> IO Pandoc + Nothing -> return) + where hasOneOf = any (\k -> isJust (Map.lookup k fnMap)) + constructorsFor x = map show (dataTypeConstrs x) type FunctionMap = Map String LuaFilterFunction data LuaFilter = LuaFilter LuaState FunctionMap newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } -tryFilter :: StackValue a => LuaState -> FunctionMap -> String -> a -> IO a -tryFilter lua fnMap filterFnName x = +tryFilter :: (Data a, StackValue a) => LuaState -> FunctionMap -> a -> IO a +tryFilter lua fnMap x = + let filterFnName = showConstr (toConstr x) in case Map.lookup filterFnName fnMap of Nothing -> return x Just fn -> runFilterFunction lua fn x -execDocLuaFilter :: LuaState - -> FunctionMap - -> Pandoc -> IO Pandoc -execDocLuaFilter lua fnMap = tryFilter lua fnMap "Doc" - -execMetaLuaFilter :: LuaState - -> FunctionMap - -> Pandoc -> IO Pandoc -execMetaLuaFilter lua fnMap (Pandoc meta blks) = do - meta' <- tryFilter lua fnMap "Meta" meta - return $ Pandoc meta' blks - -execBlockLuaFilter :: LuaState - -> FunctionMap - -> Block -> IO Block -execBlockLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x - -execInlineLuaFilter :: LuaState - -> FunctionMap - -> Inline -> IO Inline -execInlineLuaFilter lua fnMap x = tryFilter lua fnMap (show (toConstr x)) x - instance StackValue LuaFilter where valuetype _ = Lua.TTABLE push = undefined -- cgit v1.2.3 From 5e00cf8086e0960e81c31f7cd981ace646623f09 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 29 Jun 2017 17:13:19 +0200 Subject: Added parameter for user data directory to runLuaFilter. in Text.Pandoc.Lua. Also to pushPandocModule. This change allows users to override pandoc.lua with a file in their local data directory, adding custom functions, etc. @tarleb, if you think this is a bad idea, you can revert this. But in general our data files are all overridable. --- src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/Lua.hs | 6 +++--- src/Text/Pandoc/Lua/PandocModule.hs | 10 +++++----- test/Tests/Lua.hs | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index ee74d39c0..c119fa255 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -795,7 +795,7 @@ applyLuaFilters :: MonadIO m applyLuaFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters let go f d' = liftIO $ do - res <- E.try (runLuaFilter f args d') + res <- E.try (runLuaFilter mbDatadir f args d') case res of Right x -> return x Left (LuaException s) -> E.throw (PandocFilterError f s) diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 87fb8fd6b..22b68d5e0 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -56,12 +56,12 @@ newtype LuaException = LuaException String instance Exception LuaException runLuaFilter :: (MonadIO m) - => FilePath -> [String] -> Pandoc -> m Pandoc -runLuaFilter filterPath args pd = liftIO $ do + => Maybe FilePath -> FilePath -> [String] -> Pandoc -> m Pandoc +runLuaFilter datadir filterPath args pd = liftIO $ do lua <- Lua.newstate Lua.openlibs lua -- store module in global "pandoc" - pushPandocModule lua + pushPandocModule datadir lua Lua.setglobal lua "pandoc" top <- Lua.gettop lua status <- Lua.loadfile lua filterPath diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs index fccfbebf3..2d0baf4f8 100644 --- a/src/Text/Pandoc/Lua/PandocModule.hs +++ b/src/Text/Pandoc/Lua/PandocModule.hs @@ -41,9 +41,9 @@ import Text.Pandoc.Readers (Reader (..), getReader) import Text.Pandoc.Shared (readDataFile) -- | Push the "pandoc" on the lua stack. -pushPandocModule :: LuaState -> IO () -pushPandocModule lua = do - script <- pandocModuleScript +pushPandocModule :: Maybe FilePath -> LuaState -> IO () +pushPandocModule datadir lua = do + script <- pandocModuleScript datadir status <- loadstring lua script "pandoc.lua" unless (status /= 0) $ call lua 0 1 push lua "__read" @@ -51,8 +51,8 @@ pushPandocModule lua = do rawset lua (-3) -- | Get the string representation of the pandoc module -pandocModuleScript :: IO String -pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua" +pandocModuleScript :: Maybe FilePath -> IO String +pandocModuleScript datadir = unpack <$> readDataFile datadir "pandoc.lua" read_doc :: String -> String -> IO (Either String Pandoc) read_doc formatSpec content = do diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index cd8604ab9..ebd39366b 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -68,7 +68,7 @@ tests = map (localOption (QuickCheckTests 20)) assertFilterConversion :: String -> FilePath -> Pandoc -> Pandoc -> Assertion assertFilterConversion msg filterPath docIn docExpected = do - docRes <- runLuaFilter ("lua" filterPath) [] docIn + docRes <- runLuaFilter Nothing ("lua" filterPath) [] docIn assertEqual msg docExpected docRes roundtripEqual :: (Eq a, Lua.StackValue a) => a -> IO Bool @@ -78,7 +78,7 @@ roundtripEqual x = (x ==) <$> roundtripped roundtripped = do lua <- Lua.newstate Lua.openlibs lua - pushPandocModule lua + pushPandocModule Nothing lua Lua.setglobal lua "pandoc" oldSize <- Lua.gettop lua Lua.push lua x -- cgit v1.2.3 From e574d50b1cec1a8aea58db70a2c88ad10f1c4cb2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 17:41:25 +0200 Subject: Markdown writer: Ensure that `+` and `-` are escaped properly... so they don't cause spurious lists. Previously they were only if succeeded by a space, not if they were at end of line. Closes #3773. --- src/Text/Pandoc/Writers/Markdown.hs | 3 +++ test/command/3773.md | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 test/command/3773.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index b951288bc..1e0d8bde2 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -416,6 +416,9 @@ blockToMarkdown' opts (Plain inlines) = do '+':s:_ | not isPlain && isSpace s -> "\\" <> contents '*':s:_ | not isPlain && isSpace s -> "\\" <> contents '-':s:_ | not isPlain && isSpace s -> "\\" <> contents + '+':[] | not isPlain -> "\\" <> contents + '*':[] | not isPlain -> "\\" <> contents + '-':[] | not isPlain -> "\\" <> contents '|':_ | (isEnabled Ext_line_blocks opts || isEnabled Ext_pipe_tables opts) && isEnabled Ext_all_symbols_escapable opts diff --git a/test/command/3773.md b/test/command/3773.md new file mode 100644 index 000000000..7ee8a3941 --- /dev/null +++ b/test/command/3773.md @@ -0,0 +1,14 @@ +``` +% pandoc -t markdown +A. \# +B. \+ +C. \* +D. o +E. o or \* +^D +A. \# +B. \+ +C. \* +D. o +E. o or \* +``` -- cgit v1.2.3 From 69b2cb38a867cd8b761e4c6ec65020bedbafcda1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 22:23:15 +0200 Subject: Make `east_asian_line_breaks` affect all readers/writers. Closes #3703. --- src/Text/Pandoc/App.hs | 17 +++++++++++++---- src/Text/Pandoc/Readers/Markdown.hs | 3 +-- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index c119fa255..6fdd2a44c 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -84,7 +84,8 @@ import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile, - readDataFileUTF8, safeRead, tabFilter) + readDataFileUTF8, safeRead, tabFilter, + eastAsianLineBreakFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (toEntities) import Text.Printf @@ -381,9 +382,17 @@ convertWithOpts opts = do "Specify an output file using the -o option." - let transforms = case optBaseHeaderLevel opts of - x | x > 1 -> [headerShift (x - 1)] - | otherwise -> [] + let transforms = (case optBaseHeaderLevel opts of + x | x > 1 -> (headerShift (x - 1) :) + | otherwise -> id) $ + (if extensionEnabled Ext_east_asian_line_breaks + readerExts && + not (extensionEnabled Ext_east_asian_line_breaks + writerExts && + writerWrapText writerOptions == WrapPreserve) + then (eastAsianLineBreakFilter :) + else id) + [] let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" then 0 diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 49007ad35..c2342b9f3 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -380,8 +380,7 @@ parseMarkdown = do meta <- stateMeta' st return $ Pandoc meta bs) st reportLogMessages - (do guardEnabled Ext_east_asian_line_breaks - return $ eastAsianLineBreakFilter doc) <|> return doc + return doc referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do -- cgit v1.2.3 From d3dae1200adf8318fc033f6eed987507be85b71e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 22:26:42 +0200 Subject: Removed `hard_line_breaks` extension from `markdown_github`. GitHub has two Markdown modes, one for long-form documents like READMEs and one for short things like issue coments. In issue comments, a line break is treated as a hard line break. In README, wikis, etc., it is treated as a space as in regular Markdown. Since pandoc is more likely to be used to convert long-form documents from GitHub Markdown, `-hard_line_breaks` is a better default. Closes #3594. --- src/Text/Pandoc/Extensions.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 79e3529e9..bd164635c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -244,7 +244,6 @@ githubMarkdownExtensions = extensionsFromList , Ext_space_in_atx_header , Ext_intraword_underscores , Ext_strikeout - , Ext_hard_line_breaks , Ext_emoji , Ext_lists_without_preceding_blankline , Ext_shortcut_reference_links -- cgit v1.2.3 From 20103ac2bcf547fa201eb5d1e79989d3466a7563 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jun 2017 23:30:18 +0200 Subject: Allow ibooks-specific metadata in epubs. Closes #2693. You can now have the following fields in your YAML metadata, and it will be treated appropriately in the generated EPUB. ``` ibooks: version: 1.3.4 specified-fonts: false ipad-orientation-lock: portrait-only iphone-orientation-lock: landscape-only binding: true scroll-axis: vertical ``` This commit also fixes a regression in stylesheet paths. --- src/Text/Pandoc/Writers/EPUB.hs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d20eb8a2f..a48fcf415 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -103,6 +103,7 @@ data EPUBMetadata = EPUBMetadata{ , epubCoverImage :: Maybe String , epubStylesheets :: [FilePath] , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] } deriving Show data Date = Date{ @@ -312,6 +313,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverImage = coverImage , epubStylesheets = stylesheets , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -339,6 +341,10 @@ metadataFromMeta opts meta = EPUBMetadata{ Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing + ibooksFields = case lookupMeta "ibooks" meta of + Just (MetaMap mp) + -> M.toList $ M.map metaValueToString mp + _ -> [] -- | Produce an EPUB2 file from a Pandoc document. writeEPUB2 :: PandocMonad m @@ -577,7 +583,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ + ,("unique-identifier","epub-id-1") + ,("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/")] $ [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") @@ -653,7 +660,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () + , unode "content" ! [("src", "text/" ++ src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ @@ -686,7 +693,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] + (unode "a" ! [("href", "text/" ++ + src)] $ tit) : case subs of [] -> [] @@ -719,7 +727,11 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do ] ] else [] - navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"): + -- remove the leading ../ from stylesheet paths: + map (\(k,v) -> if k == "css" + then (k, drop 3 v) + else (k, v)) vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) @@ -761,7 +773,8 @@ metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element metadataElement version md currentTime = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes + ++ languageNodes ++ ibooksNodes ++ creatorNodes ++ contributorNodes ++ subjectNodes ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes @@ -780,6 +793,8 @@ metadataElement version md currentTime = [] -> [] (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] + ibooksNodes = map ibooksNode (epubIbooksFields md) + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md -- cgit v1.2.3 From 1dd769e55897757812a1d8188b80c5df7fcb2971 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 3 Jul 2017 12:36:12 +0200 Subject: Logging: added MacroAlreadyDefined. --- src/Text/Pandoc/Logging.hs | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index e31fb1521..1dcff7470 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -77,6 +77,7 @@ data LogMessage = | CircularReference String SourcePos | ParsingUnescaped String SourcePos | CouldNotLoadIncludeFile String SourcePos + | MacroAlreadyDefined String SourcePos | InlineNotRendered Inline | BlockNotRendered Block | DocxParserWarning String @@ -150,6 +151,11 @@ instance ToJSON LogMessage where "source" .= Text.pack (sourceName pos), "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] + MacroAlreadyDefined name pos -> + ["name" .= Text.pack name, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] InlineNotRendered il -> ["contents" .= toJSON il] BlockNotRendered bl -> @@ -224,6 +230,8 @@ showLogMessage msg = "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos CouldNotLoadIncludeFile fp pos -> "Could not load include file '" ++ fp ++ "' at " ++ showPos pos + MacroAlreadyDefined name pos -> + "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos InlineNotRendered il -> "Not rendering " ++ show il BlockNotRendered bl -> @@ -277,6 +285,7 @@ messageVerbosity msg = ReferenceNotFound{} -> WARNING CircularReference{} -> WARNING CouldNotLoadIncludeFile{} -> WARNING + MacroAlreadyDefined{} -> WARNING ParsingUnescaped{} -> INFO InlineNotRendered{} -> INFO BlockNotRendered{} -> INFO -- cgit v1.2.3 From 0feb7504b1c68cef76b30ea9987e2eae3101714c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 1 Jul 2017 19:31:43 +0200 Subject: Rewrote LaTeX reader with proper tokenization. This rewrite is primarily motivated by the need to get macros working properly. A side benefit is that the reader is significantly faster (27s -> 19s in one benchmark, and there is a lot of room for further optimization). We now tokenize the input text, then parse the token stream. Macros modify the token stream, so they should now be effective in any context, including math. Thus, we no longer need the clunky macro processing capacities of texmath. A custom state LaTeXState is used instead of ParserState. This, plus the tokenization, will require some rewriting of the exported functions rawLaTeXInline, inlineCommand, rawLaTeXBlock. * Added Text.Pandoc.Readers.LaTeX.Types (new exported module). Exports Macro, Tok, TokType, Line, Column. [API change] * Text.Pandoc.Parsing: adjusted type of `insertIncludedFile` so it can be used with token parser. * Removed old texmath macro stuff from Parsing. Use Macro from Text.Pandoc.Readers.LaTeX.Types instead. * Removed texmath macro material from Markdown reader. * Changed types for Text.Pandoc.Readers.LaTeX's rawLaTeXInline and rawLaTeXBlock. (Both now return a String, and they are polymorphic in state.) * Added orgMacros field to OrgState. [API change] * Removed readerApplyMacros from ReaderOptions. Now we just check the `latex_macros` reader extension. * Allow `\newcommand\foo{blah}` without braces. Fixes #1390. Fixes #2118. Fixes #3236. Fixes #3779. Fixes #934. Fixes #982. --- pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 1 - src/Text/Pandoc/Error.hs | 3 + src/Text/Pandoc/Extensions.hs | 1 + src/Text/Pandoc/Options.hs | 2 - src/Text/Pandoc/Parsing.hs | 67 +- src/Text/Pandoc/Readers/LaTeX.hs | 2777 +++++++++++++++++----------- src/Text/Pandoc/Readers/LaTeX/Types.hs | 48 + src/Text/Pandoc/Readers/Markdown.hs | 19 +- src/Text/Pandoc/Readers/Muse.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 5 +- src/Text/Pandoc/Readers/Org/ParserState.hs | 10 + src/Text/Pandoc/Readers/TWiki.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/Readers/Txt2Tags.hs | 2 +- test/Tests/Readers/LaTeX.hs | 3 +- test/command/1390.md | 20 + test/command/2118.md | 11 + test/command/3113.md | 2 +- test/command/3236.md | 9 + test/command/3558.md | 8 +- test/command/3779.md | 28 + test/command/934.md | 12 + test/command/982.md | 11 + test/latex-reader.latex | 1 - test/latex-reader.native | 2 +- test/markdown-reader-more.native | 4 +- 27 files changed, 1876 insertions(+), 1177 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Types.hs create mode 100644 test/command/1390.md create mode 100644 test/command/2118.md create mode 100644 test/command/3236.md create mode 100644 test/command/3779.md create mode 100644 test/command/934.md create mode 100644 test/command/982.md (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 70475568e..dd92690ce 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -366,6 +366,7 @@ Library Text.Pandoc.Readers, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, + Text.Pandoc.Readers.LaTeX.Types, Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.CommonMark, Text.Pandoc.Readers.MediaWiki, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6fdd2a44c..689c0a784 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -308,7 +308,6 @@ convertWithOpts opts = do , readerColumns = optColumns opts , readerTabStop = optTabStop opts , readerIndentedCodeClasses = optIndentedCodeClasses opts - , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = optDefaultImageExtension opts , readerTrackChanges = optTrackChanges opts diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 60bc699ab..24186720c 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -64,6 +64,7 @@ data PandocError = PandocIOError String IOError | PandocTemplateError String | PandocAppError String | PandocEpubSubdirectoryError String + | PandocMacroLoop String deriving (Show, Typeable, Generic) instance Exception PandocError @@ -107,6 +108,8 @@ handleError (Left e) = PandocAppError s -> err 1 s PandocEpubSubdirectoryError s -> err 31 $ "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + PandocMacroLoop s -> err 91 $ + "Loop encountered in expanding macro " ++ s err :: Int -> String -> IO a err exitCode msg = do diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index bd164635c..28459d4e6 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -318,6 +318,7 @@ getDefaultExtensions "epub2" = getDefaultExtensions "epub" getDefaultExtensions "epub3" = getDefaultExtensions "epub" getDefaultExtensions "latex" = extensionsFromList [Ext_smart, + Ext_latex_macros, Ext_auto_identifiers] getDefaultExtensions "context" = extensionsFromList [Ext_smart, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 6519f807c..d7e77010e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -61,7 +61,6 @@ data ReaderOptions = ReaderOptions{ , readerStandalone :: Bool -- ^ Standalone document with header , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations @@ -75,7 +74,6 @@ instance Default ReaderOptions , readerStandalone = False , readerColumns = 80 , readerTabStop = 4 - , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index eb5b37f40..f6263c782 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -109,8 +109,6 @@ module Text.Pandoc.Parsing ( anyLine, dash, nested, citeKey, - macro, - applyMacros', Parser, ParserT, F, @@ -130,6 +128,7 @@ module Text.Pandoc.Parsing ( anyLine, runParser, runParserT, parse, + tokenPrim, anyToken, getInput, setInput, @@ -178,13 +177,16 @@ module Text.Pandoc.Parsing ( anyLine, sourceLine, setSourceColumn, setSourceLine, - newPos + newPos, + Line, + Column ) where +import Data.Text (Text) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..), trimInlines) +import Text.Pandoc.Builder (Blocks, Inlines, HasMeta(..), trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) @@ -195,7 +197,7 @@ import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, import Data.List ( intercalate, transpose, isSuffixOf ) import Text.Pandoc.Shared import qualified Data.Map as M -import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, pMacroDefinition) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.HTML.TagSoup.Entity ( lookupEntity ) import Text.Pandoc.Asciify (toAsciiChar) import Data.Monoid ((<>)) @@ -994,7 +996,7 @@ data ParserState = ParserState stateIdentifiers :: Set.Set String, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateMacros :: [Macro], -- ^ List of macros defined so far + stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: @@ -1057,8 +1059,8 @@ instance HasIdentifierList ParserState where updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } class HasMacros st where - extractMacros :: st -> [Macro] - updateMacros :: ([Macro] -> [Macro]) -> st -> st + extractMacros :: st -> M.Map Text Macro + updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st instance HasMacros ParserState where extractMacros = stateMacros @@ -1112,7 +1114,7 @@ defaultParserState = stateIdentifiers = Set.empty, stateNextExample = 1, stateExamples = M.empty, - stateMacros = [], + stateMacros = M.empty, stateRstDefaultRole = "title-reference", stateRstCustomRoles = M.empty, stateCaption = Nothing, @@ -1341,33 +1343,6 @@ token :: (Stream s m t) -> ParsecT s st m a token pp pos match = tokenPrim pp (\_ t _ -> pos t) match --- --- Macros --- - --- | Parse a \newcommand or \newenviroment macro definition. -macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) - => ParserT [Char] st m Blocks -macro = do - apply <- getOption readerApplyMacros - (m, def') <- withRaw pMacroDefinition - if apply - then do - updateState $ \st -> updateMacros (m:) st - return mempty - else return $ rawBlock "latex" def' - --- | Apply current macros to string. -applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) - => String - -> ParserT [Char] st m String -applyMacros' target = do - apply <- getOption readerApplyMacros - if apply - then do macros <- extractMacros <$> getState - return $ applyMacros macros target - else return target - infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) @@ -1385,10 +1360,11 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st, Functor mf, Applicative mf, Monad mf) - => ParserT String st m (mf Blocks) + => ParserT [a] st m (mf Blocks) + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m (mf Blocks) -insertIncludedFile' blocks dirs f = do + -> ParserT [a] st m (mf Blocks) +insertIncludedFile' blocks totoks dirs f = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState @@ -1402,7 +1378,7 @@ insertIncludedFile' blocks dirs f = do report $ CouldNotLoadIncludeFile f oldPos return "" setPosition $ newPos f 1 1 - setInput contents + setInput $ totoks contents bs <- blocks setInput oldInput setPosition oldPos @@ -1412,11 +1388,12 @@ insertIncludedFile' blocks dirs f = do -- | Parse content of include file as blocks. Circular includes result in an -- @PandocParseError@. insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT String st m Blocks + => ParserT [a] st m Blocks + -> (String -> [a]) -> [FilePath] -> FilePath - -> ParserT String st m Blocks -insertIncludedFile blocks dirs f = - runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f + -> ParserT [a] st m Blocks +insertIncludedFile blocks totoks dirs f = + runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. @@ -1424,4 +1401,4 @@ insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) => ParserT String st m (Future st Blocks) -> [FilePath] -> FilePath -> ParserT String st m (Future st Blocks) -insertIncludedFileF = insertIncludedFile' +insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 090dc5fdb..d82e6a5dc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2017 John MacFarlane @@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Portability : portable Conversion of LaTeX to 'Pandoc' document. + -} module Text.Pandoc.Readers.LaTeX ( readLaTeX, + applyMacros, rawLaTeXInline, rawLaTeXBlock, - inlineCommand, + macro, + inlineCommand ) where import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (chr, isAlphaNum, isLetter, ord) -import Data.Text (Text, unpack) +import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M +import qualified Data.Set as Set import Data.Maybe (fromMaybe, maybeToList) import Safe (minimumDef) import System.FilePath (addExtension, replaceExtension, takeExtension) @@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional, - space, (<|>)) +import Text.Pandoc.Parsing hiding (many, optional, withRaw, + mathInline, mathDisplay, + space, (<|>), spaces, blankline) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..), + TokType(..), Line, Column) import Text.Pandoc.Walk +import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop)) + +-- for debugging: +-- import Text.Pandoc.Extensions (getDefaultExtensions) +-- import Text.Pandoc.Class (runIOorExplode, PandocIO) +-- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: PandocMonad m @@ -63,18 +81,18 @@ readLaTeX :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } - (unpack (crFilter ltx)) + parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" + (tokenize (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError e + Left e -> throwError $ PandocParsecError (T.unpack ltx) e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do bs <- blocks eof st <- getState - let meta = stateMeta st + let meta = sMeta st let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] @@ -88,177 +106,476 @@ parseLaTeX = do else id) doc' return $ Pandoc meta bs' -type LP m = ParserT String ParserState m - -anyControlSeq :: PandocMonad m => LP m String -anyControlSeq = do - char '\\' - next <- option '\n' anyChar - case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - -controlSeq :: PandocMonad m => String -> LP m String -controlSeq name = try $ do - char '\\' - case name of - "" -> mzero - [c] | not (isLetter c) -> string [c] - cs -> string cs <* notFollowedBy letter <* optional sp - return name - -dimenarg :: PandocMonad m => LP m String -dimenarg = try $ do - ch <- option "" $ string "=" - num <- many1 digit - dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - return $ ch ++ num ++ dim +-- testParser :: LP PandocIO a -> Text -> IO a +-- testParser p t = do +-- res <- runIOorExplode (runParserT p defaultLaTeXState{ +-- sOptions = def{ readerExtensions = +-- enableExtension Ext_raw_tex $ +-- getDefaultExtensions "latex" }} "source" (tokenize t)) +-- case res of +-- Left e -> error (show e) +-- Right r -> return r + +data LaTeXState = LaTeXState{ sOptions :: ReaderOptions + , sMeta :: Meta + , sQuoteContext :: QuoteContext + , sMacros :: M.Map Text Macro + , sContainers :: [String] + , sHeaders :: M.Map Inlines String + , sLogMessages :: [LogMessage] + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: Maybe Inlines + , sInListItem :: Bool + , sInTableCell :: Bool + } + deriving Show + +defaultLaTeXState :: LaTeXState +defaultLaTeXState = LaTeXState{ sOptions = def + , sMeta = nullMeta + , sQuoteContext = NoQuote + , sMacros = M.empty + , sContainers = [] + , sHeaders = M.empty + , sLogMessages = [] + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = Nothing + , sInListItem = False + , sInTableCell = False + } + +instance PandocMonad m => HasQuoteContext LaTeXState m where + getQuoteContext = sQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = sQuoteContext oldState + setState oldState { sQuoteContext = context } + result <- parser + newState <- getState + setState newState { sQuoteContext = oldQuoteContext } + return result + +instance HasLogMessages LaTeXState where + addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } + getLogMessages st = reverse $ sLogMessages st + +instance HasIdentifierList LaTeXState where + extractIdentifierList = sIdentifiers + updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } + +instance HasIncludeFiles LaTeXState where + getIncludeFiles = sContainers + addIncludeFile f s = s{ sContainers = f : sContainers s } + dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } + +instance HasHeaderMap LaTeXState where + extractHeaderMap = sHeaders + updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } + +instance HasMacros LaTeXState where + extractMacros st = sMacros st + updateMacros f st = st{ sMacros = f (sMacros st) } + +instance HasReaderOptions LaTeXState where + extractReaderOptions = sOptions + +instance HasMeta LaTeXState where + setMeta field val st = + st{ sMeta = setMeta field val $ sMeta st } + deleteMeta field st = + st{ sMeta = deleteMeta field $ sMeta st } + +instance Default LaTeXState where + def = defaultLaTeXState + +type LP m = ParserT [Tok] LaTeXState m + +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXBlock = do + lookAhead (try (char '\\' >> letter)) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- try $ + withRaw (environment <|> macroDef <|> blockCommand) + return raw + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right raw -> count (T.length (untokenize raw)) anyChar + +macro :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m Blocks +macro = do + guardEnabled Ext_latex_macros + lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *> + oneOfStrings ["command", "environment"]) + inp <- getInput + let toks = tokenize $ T.pack inp + let rawblock = do + (_, raw) <- withRaw $ try macroDef + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawblock lstate "source" toks + case res of + Left _ -> mzero + Right (raw, st) -> do + updateState (updateMacros (const $ sMacros st)) + mempty <$ count (T.length (untokenize raw)) anyChar + +applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => String -> ParserT String s m String +applyMacros s = do + (guardEnabled Ext_latex_macros >> + do let retokenize = doMacros 0 *> (toksToString <$> getInput) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT retokenize lstate "math" (tokenize (T.pack s)) + case res of + Left e -> fail (show e) + Right s' -> return s') <|> return s + +rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXInline = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + count (T.length (untokenize raw)) anyChar + +inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand = do + lookAhead (try (char '\\' >> letter) <|> char '$') + inp <- getInput + let toks = tokenize $ T.pack inp + let rawinline = do + (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand') + st <- getState + return (il, raw, st) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT rawinline lstate "source" toks + case res of + Left _ -> mzero + Right (il, raw, s) -> do + updateState $ updateMacros (const $ sMacros s) + count (T.length (untokenize raw)) anyChar + return il + +tokenize :: Text -> [Tok] +tokenize = totoks (1, 1) + +totoks :: (Line, Column) -> Text -> [Tok] +totoks (lin,col) t = + case T.uncons t of + Nothing -> [] + Just (c, rest) + | c == '\n' -> + Tok (lin, col) Newline "\n" + : totoks (lin + 1,1) rest + | isSpaceOrTab c -> + let (sps, rest') = T.span isSpaceOrTab t + in Tok (lin, col) Spaces sps + : totoks (lin, col + T.length sps) rest' + | isAlphaNum c -> + let (ws, rest') = T.span isAlphaNum t + in Tok (lin, col) Word ws + : totoks (lin, col + T.length ws) rest' + | c == '%' -> + let (cs, rest') = T.break (== '\n') rest + in Tok (lin, col) Comment ("%" <> cs) + : totoks (lin, col + 1 + T.length cs) rest' + | c == '\\' -> + case T.uncons rest of + Nothing -> [Tok (lin, col) Symbol (T.singleton c)] + Just (d, rest') + | isLetter d -> + let (ws, rest'') = T.span isLetter rest + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (lin, + col + 1 + T.length ws + T.length ss) rest''' + | d == '\t' || d == '\n' -> + Tok (lin, col) Symbol ("\\") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d]) + : totoks (lin, col + 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok (lin, col) (Arg i) ("#" <> t1) + : totoks (lin, col + 1 + T.length t1) t2 + Nothing -> + Tok (lin, col) Symbol ("#") + : totoks (lin, col + 1) t2 + | c == '^' -> + case T.uncons rest of + Just ('^', rest') -> + case T.uncons rest' of + Just (d, rest'') + | isLowerHex d -> + case T.uncons rest'' of + Just (e, rest''') | isLowerHex e -> + Tok (lin, col) Esc2 (T.pack ['^','^',d,e]) + : totoks (lin, col + 4) rest''' + _ -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + | d < '\128' -> + Tok (lin, col) Esc1 (T.pack ['^','^',d]) + : totoks (lin, col + 3) rest'' + _ -> [Tok (lin, col) Symbol ("^"), + Tok (lin, col + 1) Symbol ("^")] + _ -> Tok (lin, col) Symbol ("^") + : totoks (lin, col + 1) rest + | otherwise -> + Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest + + where isSpaceOrTab ' ' = True + isSpaceOrTab '\t' = True + isSpaceOrTab _ = False + +isLowerHex :: Char -> Bool +isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' + +untokenize :: [Tok] -> Text +untokenize = mconcat . map untoken + +untoken :: Tok -> Text +untoken (Tok _ _ t) = t + +satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok +satisfyTok f = + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res + where matcher t | f t = Just t + | otherwise = Nothing + updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos + updatePos spos _ (Tok (lin,col) _ _ : _) = + setSourceColumn (setSourceLine spos lin) col + updatePos spos _ [] = spos + +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + verbatimMode <- sVerbatimMode <$> getState + when (not verbatimMode) $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro numargs optarg newtoks) -> do + setInput ts + let getarg = spaces >> braced + args <- case optarg of + Nothing -> count numargs getarg + Just o -> + (:) <$> option o bracketedToks + <*> count (numargs - 1) getarg + let addTok (Tok _ (Arg i) _) acc | i > 0 + , i <= numargs = + map (setpos spos) (args !! (i - 1)) ++ acc + addTok t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr addTok ts' newtoks + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) + +setpos :: (Line, Column) -> Tok -> Tok +setpos spos (Tok _ tt txt) = Tok spos tt txt + +anyControlSeq :: PandocMonad m => LP m Tok +anyControlSeq = satisfyTok isCtrlSeq + where isCtrlSeq (Tok _ (CtrlSeq _) _) = True + isCtrlSeq _ = False + +anySymbol :: PandocMonad m => LP m Tok +anySymbol = satisfyTok isSym + where isSym (Tok _ Symbol _) = True + isSym _ = False + +spaces :: PandocMonad m => LP m () +spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +spaces1 :: PandocMonad m => LP m () +spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +tokTypeIn :: [TokType] -> Tok -> Bool +tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes + +controlSeq :: PandocMonad m => Text -> LP m Tok +controlSeq name = satisfyTok isNamed + where isNamed (Tok _ (CtrlSeq n) _) = n == name + isNamed _ = False + +symbol :: PandocMonad m => Char -> LP m Tok +symbol c = satisfyTok isc + where isc (Tok _ Symbol d) = case T.uncons d of + Just (c',_) -> c == c' + _ -> False + isc _ = False + +symbolIn :: PandocMonad m => [Char] -> LP m Tok +symbolIn cs = satisfyTok isInCs + where isInCs (Tok _ Symbol d) = case T.uncons d of + Just (c,_) -> c `elem` cs + _ -> False + isInCs _ = False sp :: PandocMonad m => LP m () sp = whitespace <|> endline whitespace :: PandocMonad m => LP m () -whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') +whitespace = () <$ satisfyTok isSpaceTok + where isSpaceTok (Tok _ Spaces _) = True + isSpaceTok _ = False -endline :: PandocMonad m => LP m () -endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) - -isLowerHex :: Char -> Bool -isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' +newlineTok :: PandocMonad m => LP m () +newlineTok = () <$ satisfyTok isNewlineTok -tildeEscape :: PandocMonad m => LP m Char -tildeEscape = try $ do - string "^^" - c <- satisfy (\x -> x >= '\0' && x <= '\128') - d <- if isLowerHex c - then option "" $ count 1 (satisfy isLowerHex) - else return "" - if null d - then case ord c of - x | x >= 64 && x <= 127 -> return $ chr (x - 64) - | otherwise -> return $ chr (x + 64) - else return $ chr $ read ('0':'x':c:d) +isNewlineTok :: Tok -> Bool +isNewlineTok (Tok _ Newline _) = True +isNewlineTok _ = False comment :: PandocMonad m => LP m () -comment = do - char '%' - skipMany (satisfy (/='\n')) - optional newline - return () +comment = () <$ satisfyTok isCommentTok + where isCommentTok (Tok _ Comment _) = True + isCommentTok _ = False + +anyTok :: PandocMonad m => LP m Tok +anyTok = satisfyTok (const True) -bgroup :: PandocMonad m => LP m () +endline :: PandocMonad m => LP m () +endline = try $ do + newlineTok + lookAhead anyTok + notFollowedBy blankline + +blankline :: PandocMonad m => LP m () +blankline = try $ skipMany whitespace *> newlineTok + +primEscape :: PandocMonad m => LP m Char +primEscape = do + Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) + case toktype of + Esc1 -> case T.uncons (T.drop 2 t) of + Just (c, _) + | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) + | otherwise -> return (chr (ord c + 64)) + Nothing -> fail "Empty content of Esc1" + Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Just x -> return (chr x) + Nothing -> fail $ "Could not read: " ++ T.unpack t + _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen + +bgroup :: PandocMonad m => LP m Tok bgroup = try $ do - skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) - () <$ char '{' - <|> () <$ controlSeq "bgroup" - <|> () <$ controlSeq "begingroup" + skipMany sp + symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" -egroup :: PandocMonad m => LP m () -egroup = () <$ char '}' - <|> () <$ controlSeq "egroup" - <|> () <$ controlSeq "endgroup" +egroup :: PandocMonad m => LP m Tok +egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup") -grouped :: PandocMonad m => Monoid a => LP m a -> LP m a +grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do bgroup -- first we check for an inner 'grouped', because -- {{a,b}} should be parsed the same as {a,b} - try (grouped parser <* egroup) - <|> (mconcat <$> manyTill parser egroup) - -braced :: PandocMonad m => LP m String -braced = grouped chunk - where chunk = - many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) - <|> try (string "\\}") - <|> try (string "\\{") - <|> try (string "\\\\") - <|> ((\x -> "{" ++ x ++ "}") <$> braced) - <|> count 1 anyChar + try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' 1 + where braced' (n :: Int) = + handleEgroup n <|> handleBgroup n <|> handleOther n + handleEgroup n = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' (n - 1) + handleBgroup n = do + t <- bgroup + (t:) <$> braced' (n + 1) + handleOther n = do + t <- anyTok + (t:) <$> braced' n bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a -bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) - -mathDisplay :: PandocMonad m => LP m String -> LP m Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) - -mathInline :: PandocMonad m => LP m String -> LP m Inlines -mathInline p = math <$> (try p >>= applyMacros') - -mathChars :: PandocMonad m => LP m String -mathChars = - concat <$> many (escapedChar - <|> (snd <$> withRaw braced) - <|> many1 (satisfy isOrdChar)) - where escapedChar = try $ do char '\\' - c <- anyChar - return ['\\',c] - isOrdChar '$' = False - isOrdChar '{' = False - isOrdChar '}' = False - isOrdChar '\\' = False - isOrdChar _ = True - -quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines -quoted' f starter ender = do - startchs <- starter - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then do - ils <- many (notFollowedBy ender >> inline) - (ender >> return (f (mconcat ils))) <|> - (<> mconcat ils) <$> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) - else lit startchs +bracketed parser = try $ do + symbol '[' + mconcat <$> manyTill parser (symbol ']') -doubleQuote :: PandocMonad m => LP m Inlines -doubleQuote = do - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') +dimenarg :: PandocMonad m => LP m Text +dimenarg = try $ do + ch <- option False $ True <$ symbol '=' + Tok _ _ s <- satisfyTok isWordTok + guard $ (T.take 2 (T.reverse s)) `elem` + ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + let num = T.take (T.length s - 2) s + guard $ T.length num > 0 + guard $ T.all isDigit num + return $ T.pack ['=' | ch] <> s -singleQuote :: PandocMonad m => LP m Inlines -singleQuote = do - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - else str <$> many1 (oneOf "`\'‘’") +-- inline elements: -inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> inlineText - <|> inlineCommand - <|> inlineEnvironment - <|> inlineGroup - <|> (char '-' *> option (str "-") - (char '-' *> option (str "–") (str "—" <$ char '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (string "''")) - <|> (str "”" <$ char '”') - <|> (str "’" <$ char '\'') - <|> (str "’" <$ char '’') - <|> (str "\160" <$ char '~') - <|> mathDisplay (string "$$" *> mathChars <* string "$$") - <|> mathInline (char '$' *> mathChars <* char '$') - <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str . (:[]) <$> tildeEscape) - <|> (do res <- oneOf "#&~^'`\"[]" - pos <- getPosition - report $ ParsingUnescaped [res] pos - return $ str [res]) +word :: PandocMonad m => LP m Inlines +word = (str . T.unpack . untoken) <$> satisfyTok isWordTok -inlines :: PandocMonad m => LP m Inlines -inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) +regularSymbol :: PandocMonad m => LP m Inlines +regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol + where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t + isRegularSymbol _ = False + isSpecial c = c `Set.member` specialChars + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" + +isWordTok :: Tok -> Bool +isWordTok (Tok _ Word _) = True +isWordTok _ = False inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do @@ -269,233 +586,564 @@ inlineGroup = do -- we need the span so we can detitlecase bibtex entries; -- we need to know when something is {C}apitalized -block :: PandocMonad m => LP m Blocks -block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) - <|> environment - <|> include - <|> macro - <|> blockCommand - <|> paragraph - <|> grouped block - -blocks :: PandocMonad m => LP m Blocks -blocks = mconcat <$> many block +doLHSverb :: PandocMonad m => LP m Inlines +doLHSverb = + (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -getRawCommand :: PandocMonad m => String -> LP m String -getRawCommand name' = do - rawargs <- withRaw (many (try (optional sp *> opt)) *> - option "" (try (optional sp *> dimenarg)) *> - many braced) - return $ '\\' : name' ++ snd rawargs +mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines +mkImage options src = do + let replaceTextwidth (k,v) = + case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth + $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let attr = ("",[], kvs) + let alt = str "image" + case takeExtension src of + "" -> do + defaultExt <- getOption readerDefaultImageExtension + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v -lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines +dosiunitx = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- tok + let emptyOr160 "" = "" + emptyOr160 _ = "\160" + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] -blockCommand :: PandocMonad m => LP m Blocks -blockCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*" <* optional sp) - let name' = name ++ star - let raw = do - rawcommand <- getRawCommand name' - transformed <- applyMacros' rawcommand - guard $ transformed /= rawcommand - notFollowedBy $ parseFromString' inlines transformed - parseFromString' blocks transformed - lookupListDefault raw [name',name] blockCommands +lit :: String -> LP m Inlines +lit = pure . str -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" +removeDoubleQuotes :: Text -> Text +removeDoubleQuotes t = + maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" --- eat an optional argument and one or more arguments in braces -ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines) -ignoreInlines name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawInline "latex" rawCommand) - doraw <|> ignore rawCommand +doubleQuote :: PandocMonad m => LP m Inlines +doubleQuote = do + quoted' doubleQuoted (try $ count 2 $ symbol '`') + (void $ try $ count 2 $ symbol '\'') + <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`']) + (void $ try $ sequence [symbol '"', symbol '\'']) + <|> quoted' doubleQuoted ((:[]) <$> symbol '"') + (void $ symbol '"') -guardRaw :: PandocMonad m => LP m () -guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex +singleQuote :: PandocMonad m => LP m Inlines +singleQuote = do + quoted' singleQuoted ((:[]) <$> symbol '`') + (try $ symbol '\'' >> + notFollowedBy (satisfyTok startsWithLetter)) + <|> quoted' singleQuoted ((:[]) <$> symbol '‘') + (try $ symbol '’' >> + notFollowedBy (satisfyTok startsWithLetter)) + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + +quoted' :: PandocMonad m + => (Inlines -> Inlines) + -> LP m [Tok] + -> LP m () + -> LP m Inlines +quoted' f starter ender = do + startchs <- (T.unpack . untokenize) <$> starter + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions + if smart + then do + ils <- many (notFollowedBy ender >> inline) + (ender >> return (f (mconcat ils))) <|> + (<> mconcat ils) <$> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + cs -> cs) + else lit startchs -optargs :: PandocMonad m => LP m String -optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced)) +enquote :: PandocMonad m => LP m Inlines +enquote = do + skipopts + quoteContext <- sQuoteContext <$> getState + if quoteContext == InDoubleQuote + then singleQuoted <$> withQuoteContext InSingleQuote tok + else doubleQuoted <$> withQuoteContext InDoubleQuote tok -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a -ignore raw = do - pos <- getPosition - report $ SkippedContent raw pos - return mempty +doverb :: PandocMonad m => LP m Inlines +doverb = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + (code . T.unpack . untokenize) <$> + manyTill (verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok) + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar) + : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp + return $ Tok (lin, col) toktype t1 -ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) -ignoreBlocks name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawBlock "latex" rawCommand) - doraw <|> ignore rawCommand +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do + options <- option [] keyvals + let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + (codeWith ("",classes,[]) . T.unpack . untokenize) <$> + manyTill (verbTok stopchar) (symbol stopchar) -blockCommands :: PandocMonad m => M.Map String (LP m Blocks) -blockCommands = M.fromList $ - [ ("par", mempty <$ skipopts) - , ("parbox", braced >> grouped blocks) - , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) - , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) - , ("author", mempty <$ (skipopts *> authors)) - -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) - , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - -- Koma-script metadata commands - , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) - -- sectioning - , ("part", section nullAttr (-1)) - , ("part*", section nullAttr (-1)) - , ("chapter", section nullAttr 0) - , ("chapter*", section ("",["unnumbered"],[]) 0) - , ("section", section nullAttr 1) - , ("section*", section ("",["unnumbered"],[]) 1) - , ("subsection", section nullAttr 2) - , ("subsection*", section ("",["unnumbered"],[]) 2) - , ("subsubsection", section nullAttr 3) - , ("subsubsection*", section ("",["unnumbered"],[]) 3) - , ("paragraph", section nullAttr 4) - , ("paragraph*", section ("",["unnumbered"],[]) 4) - , ("subparagraph", section nullAttr 5) - , ("subparagraph*", section ("",["unnumbered"],[]) 5) - -- beamer slides - , ("frametitle", section nullAttr 3) - , ("framesubtitle", section nullAttr 4) - -- letters - , ("opening", (para . trimInlines) <$> (skipopts *> tok)) - , ("closing", skipopts *> closing) - -- - , ("hrule", pure horizontalRule) - , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> looseItem) - , ("documentclass", skipopts *> braced *> preamble) - , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> setCaption) - , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - -- includes - , ("lstinputlisting", inputListing) - , ("graphicspath", graphicsPath) - -- hyperlink - , ("hypertarget", braced >> grouped block) - ] ++ map ignoreBlocks - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" - , "bibliographystyle" - , "maketitle", "makeindex", "makeglossary" - , "addcontentsline", "addtocontents", "addtocounter" - -- \ignore{} is used conventionally in literate haskell for definitions - -- that are to be processed by the compiler but not printed. - , "ignore" - , "hyperdef" - , "markboth", "markright", "markleft" - , "hspace", "vspace" - , "newpage" - , "clearpage" - , "pagebreak" - ] +keyval :: PandocMonad m => LP m (String, String) +keyval = try $ do + Tok _ Word key <- satisfyTok isWordTok + let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"] + isSpecSym _ = False + val <- option [] $ do + symbol '=' + braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym + <|> anyControlSeq)) + optional sp + optional (symbol ',') + optional sp + return (T.unpack key, T.unpack . untokenize $ val) -graphicsPath :: PandocMonad m => LP m Blocks -graphicsPath = do - ps <- bgroup *> (manyTill braced egroup) - getResourcePath >>= setResourcePath . (++ ps) - return mempty +keyvals :: PandocMonad m => LP m [(String, String)] +keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ stateMeta = addMetaField field val $ stateMeta st } +accent :: (Char -> String) -> Inlines -> LP m Inlines +accent f ils = + case toList ils of + (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) + [] -> mzero + _ -> return ils -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +grave :: Char -> String +grave 'A' = "À" +grave 'E' = "È" +grave 'I' = "Ì" +grave 'O' = "Ò" +grave 'U' = "Ù" +grave 'a' = "à" +grave 'e' = "è" +grave 'i' = "ì" +grave 'o' = "ò" +grave 'u' = "ù" +grave c = [c] -setCaption :: PandocMonad m => LP m Blocks -setCaption = do - ils <- tok - mblabel <- option Nothing $ - try $ spaces' >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("data-label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ stateCaption = Just ils' } - return mempty +acute :: Char -> String +acute 'A' = "Á" +acute 'E' = "É" +acute 'I' = "Í" +acute 'O' = "Ó" +acute 'U' = "Ú" +acute 'Y' = "Ý" +acute 'a' = "á" +acute 'e' = "é" +acute 'i' = "í" +acute 'o' = "ó" +acute 'u' = "ú" +acute 'y' = "ý" +acute 'C' = "Ć" +acute 'c' = "ć" +acute 'L' = "Ĺ" +acute 'l' = "ĺ" +acute 'N' = "Ń" +acute 'n' = "ń" +acute 'R' = "Ŕ" +acute 'r' = "ŕ" +acute 'S' = "Ś" +acute 's' = "ś" +acute 'Z' = "Ź" +acute 'z' = "ź" +acute c = [c] -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ stateCaption = Nothing } +circ :: Char -> String +circ 'A' = "Â" +circ 'E' = "Ê" +circ 'I' = "Î" +circ 'O' = "Ô" +circ 'U' = "Û" +circ 'a' = "â" +circ 'e' = "ê" +circ 'i' = "î" +circ 'o' = "ô" +circ 'u' = "û" +circ 'C' = "Ĉ" +circ 'c' = "ĉ" +circ 'G' = "Ĝ" +circ 'g' = "ĝ" +circ 'H' = "Ĥ" +circ 'h' = "ĥ" +circ 'J' = "Ĵ" +circ 'j' = "ĵ" +circ 'S' = "Ŝ" +circ 's' = "ŝ" +circ 'W' = "Ŵ" +circ 'w' = "ŵ" +circ 'Y' = "Ŷ" +circ 'y' = "ŷ" +circ c = [c] -authors :: PandocMonad m => LP m () -authors = try $ do - bgroup - let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> - (inline <|> mempty <$ blockCommand)) - -- skip e.g. \vspace{10pt} - auths <- sepBy oneAuthor (controlSeq "and") - egroup - addMeta "author" (map trimInlines auths) +tilde :: Char -> String +tilde 'A' = "Ã" +tilde 'a' = "ã" +tilde 'O' = "Õ" +tilde 'o' = "õ" +tilde 'I' = "Ĩ" +tilde 'i' = "ĩ" +tilde 'U' = "Ũ" +tilde 'u' = "ũ" +tilde 'N' = "Ñ" +tilde 'n' = "ñ" +tilde c = [c] -section :: PandocMonad m => Attr -> Int -> LP m Blocks -section (ident, classes, kvs) lvl = do - skipopts - contents <- grouped inline - lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) - attr' <- registerHeader (lab, classes, kvs) contents - return $ headerWith attr' lvl contents +umlaut :: Char -> String +umlaut 'A' = "Ä" +umlaut 'E' = "Ë" +umlaut 'I' = "Ï" +umlaut 'O' = "Ö" +umlaut 'U' = "Ü" +umlaut 'a' = "ä" +umlaut 'e' = "ë" +umlaut 'i' = "ï" +umlaut 'o' = "ö" +umlaut 'u' = "ü" +umlaut c = [c] -inlineCommand :: PandocMonad m => LP m Inlines -inlineCommand = try $ do - (name, raw') <- withRaw anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*") - let name' = name ++ star +hungarumlaut :: Char -> String +hungarumlaut 'A' = "A̋" +hungarumlaut 'E' = "E̋" +hungarumlaut 'I' = "I̋" +hungarumlaut 'O' = "Ő" +hungarumlaut 'U' = "Ű" +hungarumlaut 'Y' = "ӳ" +hungarumlaut 'a' = "a̋" +hungarumlaut 'e' = "e̋" +hungarumlaut 'i' = "i̋" +hungarumlaut 'o' = "ő" +hungarumlaut 'u' = "ű" +hungarumlaut 'y' = "ӳ" +hungarumlaut c = [c] + +dot :: Char -> String +dot 'C' = "Ċ" +dot 'c' = "ċ" +dot 'E' = "Ė" +dot 'e' = "ė" +dot 'G' = "Ġ" +dot 'g' = "ġ" +dot 'I' = "İ" +dot 'Z' = "Ż" +dot 'z' = "ż" +dot c = [c] + +macron :: Char -> String +macron 'A' = "Ā" +macron 'E' = "Ē" +macron 'I' = "Ī" +macron 'O' = "Ō" +macron 'U' = "Ū" +macron 'a' = "ā" +macron 'e' = "ē" +macron 'i' = "ī" +macron 'o' = "ō" +macron 'u' = "ū" +macron c = [c] + +cedilla :: Char -> String +cedilla 'c' = "ç" +cedilla 'C' = "Ç" +cedilla 's' = "ş" +cedilla 'S' = "Ş" +cedilla 't' = "ţ" +cedilla 'T' = "Ţ" +cedilla 'e' = "ȩ" +cedilla 'E' = "Ȩ" +cedilla 'h' = "ḩ" +cedilla 'H' = "Ḩ" +cedilla 'o' = "o̧" +cedilla 'O' = "O̧" +cedilla c = [c] + +hacek :: Char -> String +hacek 'A' = "Ǎ" +hacek 'a' = "ǎ" +hacek 'C' = "Č" +hacek 'c' = "č" +hacek 'D' = "Ď" +hacek 'd' = "ď" +hacek 'E' = "Ě" +hacek 'e' = "ě" +hacek 'G' = "Ǧ" +hacek 'g' = "ǧ" +hacek 'H' = "Ȟ" +hacek 'h' = "ȟ" +hacek 'I' = "Ǐ" +hacek 'i' = "ǐ" +hacek 'j' = "ǰ" +hacek 'K' = "Ǩ" +hacek 'k' = "ǩ" +hacek 'L' = "Ľ" +hacek 'l' = "ľ" +hacek 'N' = "Ň" +hacek 'n' = "ň" +hacek 'O' = "Ǒ" +hacek 'o' = "ǒ" +hacek 'R' = "Ř" +hacek 'r' = "ř" +hacek 'S' = "Š" +hacek 's' = "š" +hacek 'T' = "Ť" +hacek 't' = "ť" +hacek 'U' = "Ǔ" +hacek 'u' = "ǔ" +hacek 'Z' = "Ž" +hacek 'z' = "ž" +hacek c = [c] + +breve :: Char -> String +breve 'A' = "Ă" +breve 'a' = "ă" +breve 'E' = "Ĕ" +breve 'e' = "ĕ" +breve 'G' = "Ğ" +breve 'g' = "ğ" +breve 'I' = "Ĭ" +breve 'i' = "ĭ" +breve 'O' = "Ŏ" +breve 'o' = "ŏ" +breve 'U' = "Ŭ" +breve 'u' = "ŭ" +breve c = [c] + +toksToString :: [Tok] -> String +toksToString = T.unpack . untokenize + +mathDisplay :: String -> Inlines +mathDisplay = displayMath . trim + +mathInline :: String -> Inlines +mathInline = math . trim + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + contents <- trim . toksToString <$> + many (notFollowedBy (symbol '$') >> anyTok) + if display + then do + mathDisplay contents <$ try (symbol '$' >> symbol '$') + <|> (guard (null contents) >> return (mathInline "")) + else mathInline contents <$ (symbol '$') + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: PandocMonad m => LP m [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> (manyTill citationLabel egroup) + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + +citationLabel :: PandocMonad m => LP m String +citationLabel = do + optional sp + toksToString <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* optional sp + <* optional (symbol ',') + <* optional sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char] + +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw)) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines +complexNatbibCitation mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` (symbol ';') + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ toksToString raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + +inlineCommand' :: PandocMonad m => LP m Inlines +inlineCommand' = try $ do + Tok _ (CtrlSeq name) cmd <- anyControlSeq + guard $ name /= "begin" && name /= "end" + (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] -- check non-starred as fallback let raw = do - guard $ not (isBlockCommand name) - rawargs <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = raw' ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - exts <- getOption readerExtensions - if transformed /= rawcommand - then parseFromString' inlines transformed - else if extensionEnabled Ext_raw_tex exts - then return $ rawInline "latex" rawcommand - else ignore rawcommand - (lookupListDefault raw [name',name] inlineCommands <* - optional (try (string "{}"))) - -rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines -rawInlineOr name' fallback = do - parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions - if parseRaw - then rawInline "latex" <$> getRawCommand name' - else fallback + guard $ isInlineCommand name || not (isBlockCommand name) + (_, rawargs) <- withRaw + (skipangles *> skipopts *> option "" dimenarg *> many braced) + let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs) + (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) + <|> ignore rawcommand + lookupListDefault raw names inlineCommands + +tok :: PandocMonad m => LP m Inlines +tok = grouped inline <|> inlineCommand' <|> singleChar + where singleChar = try $ do + Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t + if T.length t > 1 + then do + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ (Tok (lin, col + 1) toktype t2) : inp + return $ str (T.unpack t1) + else return $ str (T.unpack t) + +opt :: PandocMonad m => LP m Inlines +opt = bracketed inline + +rawopt :: PandocMonad m => LP m Text +rawopt = do + symbol '[' + inner <- untokenize <$> manyTill anyTok (symbol ']') + optional sp + return $ "[" <> inner <> "]" + +skipopts :: PandocMonad m => LP m () +skipopts = skipMany rawopt + +-- opts in angle brackets are used in beamer +rawangle :: PandocMonad m => LP m () +rawangle = try $ do + symbol '<' + () <$ manyTill anyTok (symbol '>') + +skipangles :: PandocMonad m => LP m () +skipangles = skipMany rawangle + +ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore raw = do + pos <- getPosition + report $ SkippedContent raw pos + return mempty + +withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) +withRaw parser = do + inp <- getInput + result <- parser + nxt <- option (Tok (0,0) Word "") (lookAhead anyTok) + let raw = takeWhile (/= nxt) inp + return (result, raw) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +unescapeURL :: String -> String +unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) +unescapeURL (x:xs) = x:unescapeURL xs +unescapeURL [] = "" + +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ + "\\end{" ++ T.unpack y ++ "}" -isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) +mathEnv :: PandocMonad m => Text -> LP m String +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ T.unpack $ untokenize res +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name inlineEnvironments -inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) -inlineEnvironments = M.fromList - [ ("displaymath", mathEnvWith id Nothing "displaymath") +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") , ("math", math <$> mathEnv "math") , ("equation", mathEnvWith id Nothing "equation") , ("equation*", mathEnvWith id Nothing "equation*") @@ -511,7 +1159,7 @@ inlineEnvironments = M.fromList , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") ] -inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) +inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineCommands = M.fromList $ [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) @@ -540,9 +1188,9 @@ inlineCommands = M.fromList $ , ("textgreek", tok) , ("sep", lit ",") , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty - , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) - , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline braced) + , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . toksToString <$> braced) , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -592,7 +1240,10 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional (bracketed inline) + spaces)) , (",", lit "\8198") , ("@", pure mempty) , (" ", lit "\160") @@ -607,13 +1258,14 @@ inlineCommands = M.fromList $ , ("verb", doverb) , ("lstinline", dolstinline) , ("Verb", doverb) - , ("url", (unescapeURL <$> braced) >>= \url -> - pure (link url "" (str url))) - , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> - tok >>= \lab -> - pure (link url "" lab)) + , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + pure (link url "" (str url))) + , ("href", (unescapeURL . toksToString <$> + braced <* optional sp) >>= \url -> + tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . removeDoubleQuotes <$> braced + src <- unescapeURL . T.unpack . + removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" NormalCitation False) @@ -686,362 +1338,456 @@ inlineCommands = M.fromList $ -- fontawesome , ("faCheck", lit "\10003") , ("faClose", lit "\10007") - ] ++ map ignoreInlines - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks: + ] + +ttfamily :: PandocMonad m => LP m Inlines +ttfamily = (code . stringify . toList) <$> tok + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' + else fallback + +getRawCommand :: PandocMonad m => Text -> LP m String +getRawCommand txt = do + (_, rawargs) <- withRaw + (many (try (optional sp *> opt)) *> + option "" (try (optional sp *> dimenarg)) *> + many braced) + return $ T.unpack (txt <> untokenize rawargs) + +isBlockCommand :: Text -> Bool +isBlockCommand s = + s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks)) + || s `Set.member` treatAsBlock + +treatAsBlock :: Set.Set Text +treatAsBlock = Set.fromList + [ "newcommand", "renewcommand" + , "newenvironment", "renewenvironment" + , "providecommand", "provideenvironment" + -- newcommand, etc. should be parsed by macroDef, but we need this + -- here so these aren't parsed as inline commands to ignore + , "special", "pdfannot", "pdfstringdef" + , "bibliographystyle" + , "maketitle", "makeindex", "makeglossary" + , "addcontentsline", "addtocontents", "addtocounter" + -- \ignore{} is used conventionally in literate haskell for definitions + -- that are to be processed by the compiler but not printed. + , "ignore" + , "hyperdef" + , "markboth", "markright", "markleft" + , "hspace", "vspace" + , "newpage" + , "clearpage" + , "pagebreak" + ] + +isInlineCommand :: Text -> Bool +isInlineCommand s = + s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines)) + || s `Set.member` treatAsInline + +treatAsInline :: Set.Set Text +treatAsInline = Set.fromList [ "index" , "hspace" , "vspace" + , "noindent" , "newpage" , "clearpage" , "pagebreak" ] -ttfamily :: PandocMonad m => LP m Inlines -ttfamily = (code . stringify . toList) <$> tok +lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where lookupList l m = msum $ map (`M.lookup` m) l -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do - let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") - _ -> (k, v) - let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options - let attr = ("",[], kvs) - let alt = str "image" - case takeExtension src of - "" -> do - defaultExt <- getOption readerDefaultImageExtension - return $ imageWith attr (addExtension src defaultExt) "" alt - _ -> return $ imageWith attr src "" alt +inline :: PandocMonad m => LP m Inlines +inline = (mempty <$ comment) + <|> (space <$ whitespace) + <|> (softbreak <$ endline) + <|> word + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + <|> (symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-'))) + <|> doubleQuote + <|> singleQuote + <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) + <|> (str "”" <$ symbol '”') + <|> (str "’" <$ symbol '\'') + <|> (str "’" <$ symbol '’') + <|> (str "\160" <$ symbol '~') + <|> dollarsMath + <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) + <|> (str . (:[]) <$> primEscape) + <|> regularSymbol + <|> (do res <- symbolIn "#^'`\"[]" + pos <- getPosition + let s = T.unpack (untoken res) + report $ ParsingUnescaped s pos + return $ str s) -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." +inlines :: PandocMonad m => LP m Inlines +inlines = mconcat <$> many inline -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" - -enquote :: PandocMonad m => LP m Inlines -enquote = do - skipopts - context <- stateQuoteContext <$> getState - if context == InDoubleQuote - then singleQuoted <$> withQuoteContext InSingleQuote tok - else doubleQuoted <$> withQuoteContext InDoubleQuote tok - -doverb :: PandocMonad m => LP m Inlines -doverb = do - marker <- anyChar - code <$> manyTill (satisfy (/='\n')) (char marker) - -dolstinline :: PandocMonad m => LP m Inlines -dolstinline = do - options <- option [] keyvals - let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage - marker <- char '{' <|> anyChar - codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker) - -doLHSverb :: PandocMonad m => LP m Inlines -doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') - --- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" -dosiunitx :: PandocMonad m => LP m Inlines -dosiunitx = do - skipopts - value <- tok - valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']')) - unit <- tok - let emptyOr160 "" = "" - emptyOr160 _ = "\160" - return . mconcat $ [valueprefix, - emptyOr160 valueprefix, - value, - emptyOr160 unit, - unit] - -lit :: String -> LP m Inlines -lit = pure . str - -accent :: (Char -> String) -> Inlines -> LP m Inlines -accent f ils = - case toList ils of - (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) - [] -> mzero - _ -> return ils - -grave :: Char -> String -grave 'A' = "À" -grave 'E' = "È" -grave 'I' = "Ì" -grave 'O' = "Ò" -grave 'U' = "Ù" -grave 'a' = "à" -grave 'e' = "è" -grave 'i' = "ì" -grave 'o' = "ò" -grave 'u' = "ù" -grave c = [c] - -acute :: Char -> String -acute 'A' = "Á" -acute 'E' = "É" -acute 'I' = "Í" -acute 'O' = "Ó" -acute 'U' = "Ú" -acute 'Y' = "Ý" -acute 'a' = "á" -acute 'e' = "é" -acute 'i' = "í" -acute 'o' = "ó" -acute 'u' = "ú" -acute 'y' = "ý" -acute 'C' = "Ć" -acute 'c' = "ć" -acute 'L' = "Ĺ" -acute 'l' = "ĺ" -acute 'N' = "Ń" -acute 'n' = "ń" -acute 'R' = "Ŕ" -acute 'r' = "ŕ" -acute 'S' = "Ś" -acute 's' = "ś" -acute 'Z' = "Ź" -acute 'z' = "ź" -acute c = [c] - -circ :: Char -> String -circ 'A' = "Â" -circ 'E' = "Ê" -circ 'I' = "Î" -circ 'O' = "Ô" -circ 'U' = "Û" -circ 'a' = "â" -circ 'e' = "ê" -circ 'i' = "î" -circ 'o' = "ô" -circ 'u' = "û" -circ 'C' = "Ĉ" -circ 'c' = "ĉ" -circ 'G' = "Ĝ" -circ 'g' = "ĝ" -circ 'H' = "Ĥ" -circ 'h' = "ĥ" -circ 'J' = "Ĵ" -circ 'j' = "ĵ" -circ 'S' = "Ŝ" -circ 's' = "ŝ" -circ 'W' = "Ŵ" -circ 'w' = "ŵ" -circ 'Y' = "Ŷ" -circ 'y' = "ŷ" -circ c = [c] +-- block elements: -tilde :: Char -> String -tilde 'A' = "Ã" -tilde 'a' = "ã" -tilde 'O' = "Õ" -tilde 'o' = "õ" -tilde 'I' = "Ĩ" -tilde 'i' = "ĩ" -tilde 'U' = "Ũ" -tilde 'u' = "ũ" -tilde 'N' = "Ñ" -tilde 'n' = "ñ" -tilde c = [c] +begin_ :: PandocMonad m => Text -> LP m () +begin_ t = (try $ do + controlSeq "begin" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard (t == txt)) ("\\begin{" ++ T.unpack t ++ "}") -umlaut :: Char -> String -umlaut 'A' = "Ä" -umlaut 'E' = "Ë" -umlaut 'I' = "Ï" -umlaut 'O' = "Ö" -umlaut 'U' = "Ü" -umlaut 'a' = "ä" -umlaut 'e' = "ë" -umlaut 'i' = "ï" -umlaut 'o' = "ö" -umlaut 'u' = "ü" -umlaut c = [c] +end_ :: PandocMonad m => Text -> LP m () +end_ t = (try $ do + controlSeq "end" + spaces + symbol '{' + spaces + Tok _ Word txt <- satisfyTok isWordTok + spaces + symbol '}' + guard $ t == txt) ("\\end{" ++ T.unpack t ++ "}") -hungarumlaut :: Char -> String -hungarumlaut 'A' = "A̋" -hungarumlaut 'E' = "E̋" -hungarumlaut 'I' = "I̋" -hungarumlaut 'O' = "Ő" -hungarumlaut 'U' = "Ű" -hungarumlaut 'Y' = "ӳ" -hungarumlaut 'a' = "a̋" -hungarumlaut 'e' = "e̋" -hungarumlaut 'i' = "i̋" -hungarumlaut 'o' = "ő" -hungarumlaut 'u' = "ű" -hungarumlaut 'y' = "ӳ" -hungarumlaut c = [c] +preamble :: PandocMonad m => LP m Blocks +preamble = mempty <$ many preambleBlock + where preambleBlock = spaces1 + <|> void include + <|> void macroDef + <|> void blockCommand + <|> void braced + <|> (notFollowedBy (begin_ "document") >> void anyTok) -dot :: Char -> String -dot 'C' = "Ċ" -dot 'c' = "ċ" -dot 'E' = "Ė" -dot 'e' = "ė" -dot 'G' = "Ġ" -dot 'g' = "ġ" -dot 'I' = "İ" -dot 'Z' = "Ż" -dot 'z' = "ż" -dot c = [c] +paragraph :: PandocMonad m => LP m Blocks +paragraph = do + x <- trimInlines . mconcat <$> many1 inline + if x == mempty + then return mempty + else return $ para x -macron :: Char -> String -macron 'A' = "Ā" -macron 'E' = "Ē" -macron 'I' = "Ī" -macron 'O' = "Ō" -macron 'U' = "Ū" -macron 'a' = "ā" -macron 'e' = "ē" -macron 'i' = "ī" -macron 'o' = "ō" -macron 'u' = "ū" -macron c = [c] +include :: PandocMonad m => LP m Blocks +include = do + (Tok _ (CtrlSeq name) _) <- + controlSeq "include" <|> controlSeq "input" <|> + controlSeq "subfile" <|> controlSeq "usepackage" + skipMany $ bracketed inline -- skip options + fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced + let fs' = if name == "usepackage" + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs' -cedilla :: Char -> String -cedilla 'c' = "ç" -cedilla 'C' = "Ç" -cedilla 's' = "ş" -cedilla 'S' = "Ş" -cedilla 't' = "ţ" -cedilla 'T' = "Ţ" -cedilla 'e' = "ȩ" -cedilla 'E' = "Ȩ" -cedilla 'h' = "ḩ" -cedilla 'H' = "Ḩ" -cedilla 'o' = "o̧" -cedilla 'O' = "O̧" -cedilla c = [c] +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp -hacek :: Char -> String -hacek 'A' = "Ǎ" -hacek 'a' = "ǎ" -hacek 'C' = "Č" -hacek 'c' = "č" -hacek 'D' = "Ď" -hacek 'd' = "ď" -hacek 'E' = "Ě" -hacek 'e' = "ě" -hacek 'G' = "Ǧ" -hacek 'g' = "ǧ" -hacek 'H' = "Ȟ" -hacek 'h' = "ȟ" -hacek 'I' = "Ǐ" -hacek 'i' = "ǐ" -hacek 'j' = "ǰ" -hacek 'K' = "Ǩ" -hacek 'k' = "ǩ" -hacek 'L' = "Ľ" -hacek 'l' = "ľ" -hacek 'N' = "Ň" -hacek 'n' = "ň" -hacek 'O' = "Ǒ" -hacek 'o' = "ǒ" -hacek 'R' = "Ř" -hacek 'r' = "ř" -hacek 'S' = "Š" -hacek 's' = "š" -hacek 'T' = "Ť" -hacek 't' = "ť" -hacek 'U' = "Ǔ" -hacek 'u' = "ǔ" -hacek 'Z' = "Ž" -hacek 'z' = "ž" -hacek c = [c] +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } -breve :: Char -> String -breve 'A' = "Ă" -breve 'a' = "ă" -breve 'E' = "Ĕ" -breve 'e' = "ĕ" -breve 'G' = "Ğ" -breve 'g' = "ğ" -breve 'I' = "Ĭ" -breve 'i' = "ĭ" -breve 'O' = "Ŏ" -breve 'o' = "ŏ" -breve 'U' = "Ŭ" -breve 'u' = "ŭ" -breve c = [c] +authors :: PandocMonad m => LP m () +authors = try $ do + bgroup + let oneAuthor = mconcat <$> + many1 (notFollowedBy' (controlSeq "and") >> + (inline <|> mempty <$ blockCommand)) + -- skip e.g. \vspace{10pt} + auths <- sepBy oneAuthor (controlSeq "and") + egroup + addMeta "author" (map trimInlines auths) -tok :: PandocMonad m => LP m Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar +macroDef :: PandocMonad m => LP m Blocks +macroDef = do + guardEnabled Ext_latex_macros + mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + where commandDef = do + (name, macro') <- newcommand + updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) } + environmentDef = do + (name, macro1, macro2) <- newenvironment + updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" + optional $ symbol '*' + Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- braced + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro numargs optarg contents) + +newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + optional $ symbol '*' + symbol '{' + spaces + Tok _ Word name <- satisfyTok isWordTok + spaces + symbol '}' + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + startcontents <- braced + spaces + endcontents <- braced + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro numargs optarg startcontents, + Macro 0 Nothing endcontents) + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + manyTill anyTok (symbol ']') + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead (T.unpack ds) of + Just i -> return i + _ -> return 0 -opt :: PandocMonad m => LP m Inlines -opt = bracketed inline +setCaption :: PandocMonad m => LP m Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ sCaption = Just ils' } + return mempty -rawopt :: PandocMonad m => LP m String -rawopt = do - contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> - try (string "\\[") <|> rawopt) - optional sp - return $ "[" ++ contents ++ "]" +looseItem :: PandocMonad m => LP m Blocks +looseItem = do + inListItem <- sInListItem <$> getState + guard $ not inListItem + skipopts + return mempty -skipopts :: PandocMonad m => LP m () -skipopts = skipMany rawopt +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing } --- opts in angle brackets are used in beamer -rawangle :: PandocMonad m => LP m () -rawangle = try $ do - char '<' - skipMany (noneOf ">") - char '>' - return () +section :: PandocMonad m => Attr -> Int -> LP m Blocks +section (ident, classes, kvs) lvl = do + skipopts + contents <- grouped inline + lab <- option ident $ + try (spaces >> controlSeq "label" + >> spaces >> toksToString <$> braced) + attr' <- registerHeader (lab, classes, kvs) contents + return $ headerWith attr' lvl contents -skipangles :: PandocMonad m => LP m () -skipangles = skipMany rawangle +blockCommand :: PandocMonad m => LP m Blocks +blockCommand = try $ do + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] + let raw = do + guard $ isBlockCommand name || not (isInlineCommand name) + rawBlock "latex" <$> getRawCommand txt + lookupListDefault raw names blockCommands -inlineText :: PandocMonad m => LP m Inlines -inlineText = str <$> many1 inlineChar +closing :: PandocMonad m => LP m Blocks +closing = do + contents <- tok + st <- getState + let extractInlines (MetaBlocks [Plain ys]) = ys + extractInlines (MetaBlocks [Para ys ]) = ys + extractInlines _ = [] + let sigs = case lookupMeta "author" (sMeta st) of + Just (MetaList xs) -> + para $ trimInlines $ fromList $ + intercalate [LineBreak] $ map extractInlines xs + _ -> mempty + return $ para (trimInlines contents) <> sigs -inlineChar :: PandocMonad m => LP m Char -inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" +blockCommands :: PandocMonad m => M.Map Text (LP m Blocks) +blockCommands = M.fromList $ + [ ("par", mempty <$ skipopts) + , ("parbox", braced >> grouped blocks) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) + , ("author", mempty <$ (skipopts *> authors)) + -- -- in letter class, temp. store address & sig as title, author + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) + , ("signature", mempty <$ (skipopts *> authors)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) + -- Koma-script metadata commands + , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) + -- sectioning + , ("part", section nullAttr (-1)) + , ("part*", section nullAttr (-1)) + , ("chapter", section nullAttr 0) + , ("chapter*", section ("",["unnumbered"],[]) 0) + , ("section", section nullAttr 1) + , ("section*", section ("",["unnumbered"],[]) 1) + , ("subsection", section nullAttr 2) + , ("subsection*", section ("",["unnumbered"],[]) 2) + , ("subsubsection", section nullAttr 3) + , ("subsubsection*", section ("",["unnumbered"],[]) 3) + , ("paragraph", section nullAttr 4) + , ("paragraph*", section ("",["unnumbered"],[]) 4) + , ("subparagraph", section nullAttr 5) + , ("subparagraph*", section ("",["unnumbered"],[]) 5) + -- beamer slides + , ("frametitle", section nullAttr 3) + , ("framesubtitle", section nullAttr 4) + -- letters + , ("opening", (para . trimInlines) <$> (skipopts *> tok)) + , ("closing", skipopts *> closing) + -- + , ("hrule", pure horizontalRule) + , ("strut", pure mempty) + , ("rule", skipopts *> tok *> tok *> pure horizontalRule) + , ("item", looseItem) + , ("documentclass", skipopts *> braced *> preamble) + , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) + , ("caption", skipopts *> setCaption) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + -- includes + , ("lstinputlisting", inputListing) + , ("graphicspath", graphicsPath) + -- hyperlink + , ("hypertarget", try $ braced >> grouped block) + ] + + +environments :: PandocMonad m => M.Map Text (LP m Blocks) +environments = M.fromList + [ ("document", env "document" blocks) + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) + , ("minipage", env "minipage" $ + skipopts *> spaces *> optional braced *> spaces *> blocks) + , ("figure", env "figure" $ skipopts *> figure) + , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) + , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> simpTable "longtable" False >>= addTableCaption) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", orderedList') + , ("alltt", alltt <$> env "alltt" blocks) + , ("code", guardEnabled Ext_literate_haskell *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") + , ("verbatim", codeBlock <$> verbEnv "verbatim") + , ("Verbatim", fancyverbEnv "Verbatim") + , ("BVerbatim", fancyverbEnv "BVerbatim") + , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals + codeBlockWith attr <$> verbEnv "lstlisting") + , ("minted", minted) + , ("obeylines", obeylines) + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") + ] environment :: PandocMonad m => LP m Blocks environment = do controlSeq "begin" - name <- braced + name <- untokenize <$> braced M.findWithDefault mzero name environments <|> rawEnv name -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name inlineEnvironments +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name -rawEnv :: PandocMonad m => String -> LP m Blocks +rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts rawOptions <- mconcat <$> many rawopt - let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions + let beginCommand = "\\begin{" <> name <> "}" <> rawOptions pos1 <- getPosition (bs, raw) <- withRaw $ env name blocks - raw' <- applyMacros' $ beginCommand ++ raw - if raw' /= beginCommand ++ raw - then parseFromString' blocks raw' - else if parseRaw - then return $ rawBlock "latex" $ beginCommand ++ raw' - else do - unless parseRaw $ do - report $ SkippedContent beginCommand pos1 - pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 - return bs - -rawVerbEnv :: PandocMonad m => String -> LP m Blocks + if parseRaw + then return $ rawBlock "latex" + $ T.unpack $ beginCommand <> untokenize raw + else do + unless parseRaw $ do + report $ SkippedContent (T.unpack beginCommand) pos1 + pos2 <- getPosition + report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + return bs + +rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{tikzpicture}" ++ raw + let raw' = "\\begin{tikzpicture}" ++ toksToString raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1050,36 +1796,106 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty ----- +verbEnv :: PandocMonad m => Text -> LP m String +verbEnv name = withVerbatimMode $ do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ toksToString res + +fancyverbEnv :: PandocMonad m => Text -> LP m Blocks +fancyverbEnv name = do + options <- option [] keyvals + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv name + +obeylines :: PandocMonad m => LP m Blocks +obeylines = do + para . fromList . removeLeadingTrailingBreaks . + walk softBreakToHard . toList <$> env "obeylines" inlines + where softBreakToHard SoftBreak = LineBreak + softBreakToHard x = x + removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak . + reverse . dropWhile isLineBreak + isLineBreak LineBreak = True + isLineBreak _ = False + +minted :: PandocMonad m => LP m Blocks +minted = do + options <- option [] keyvals + lang <- toksToString <$> braced + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ lang | not (null lang) ] ++ + [ "numberLines" | + lookup "linenos" options == Just "true" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv "minted" + +letterContents :: PandocMonad m => LP m Blocks +letterContents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case lookupMeta "address" (sMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty + return $ addr <> bs -- sig added by \closing + +figure :: PandocMonad m => LP m Blocks +figure = try $ do + resetCaption + blocks >>= addImageCaption + +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks +addImageCaption = walkM go + where go (Image attr alt (src,tit)) + | not ("fig:" `isPrefixOf` tit) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) + Nothing -> Image attr alt (src,tit) + go x = return x -maybeAddExtension :: String -> FilePath -> FilePath -maybeAddExtension ext fp = - if null (takeExtension fp) - then addExtension fp ext - else fp +graphicsPath :: PandocMonad m => LP m Blocks +graphicsPath = do + ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + getResourcePath >>= setResourcePath . (++ ps) + return mempty -include :: PandocMonad m => LP m Blocks -include = do - fs' <- try $ do - char '\\' - name <- try (string "include") - <|> try (string "input") - <|> try (string "subfile") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> manyTill anyChar (char ']') - fs <- (map trim . splitBy (==',')) <$> braced - return $ if name == "usepackage" - then map (maybeAddExtension ".sty") fs - else map (maybeAddExtension ".tex") fs - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mconcat <$> mapM (insertIncludedFile blocks dirs) fs' +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + +alltt :: Blocks -> Blocks +alltt = walk strToCode + where strToCode (Str s) = Code nullAttr s + strToCode Space = RawInline (Format "latex") "\\ " + strToCode SoftBreak = LineBreak + strToCode x = x + +parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions options = + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + ++ maybeToList (lookup "language" options + >>= fromListingsLanguage) + in (fromMaybe "" (lookup "label" options), classes, kvs) inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') <$> braced + f <- filter (/='"') . toksToString <$> braced dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mbCode <- readFileFromDirs dirs f codeLines <- case mbCode of @@ -1098,169 +1914,10 @@ inputListing = do drop (firstline - 1) codeLines return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents -parseListingsOptions :: [(String, String)] -> Attr -parseListingsOptions options = - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - ++ maybeToList (lookup "language" options - >>= fromListingsLanguage) - in (fromMaybe "" (lookup "label" options), classes, kvs) - ----- - -keyval :: PandocMonad m => LP m (String, String) -keyval = try $ do - key <- many1 alphaNum - val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\")) - skipMany spaceChar - optional (char ',') - skipMany spaceChar - return (key, val) - - -keyvals :: PandocMonad m => LP m [(String, String)] -keyvals = try $ char '[' *> manyTill keyval (char ']') - -alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString' blocks - (substitute " " "\\ " $ substitute "%" "\\%" $ - intercalate "\\\\\n" $ lines t) - where strToCode (Str s) = Code nullAttr s - strToCode x = x - -rawLaTeXBlock :: PandocMonad m => LP m String -rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) - -rawLaTeXInline :: PandocMonad m => LP m Inline -rawLaTeXInline = do - raw <- (snd <$> withRaw inlineCommand) - <|> (snd <$> withRaw inlineEnvironment) - <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw - -addImageCaption :: PandocMonad m => Blocks -> LP m Blocks -addImageCaption = walkM go - where go (Image attr alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) - Nothing -> Image attr alt (src,tit) - go x = return x - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table c als ws hs rs) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Table (toList ils) als ws hs rs - Nothing -> Table c als ws hs rs - go x = return x - -environments :: PandocMonad m => M.Map String (LP m Blocks) -environments = M.fromList - [ ("document", env "document" blocks <* skipMany anyChar) - , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) - , ("letter", env "letter" letterContents) - , ("minipage", env "minipage" $ - skipopts *> spaces' *> optional braced *> spaces' *> blocks) - , ("figure", env "figure" $ skipopts *> figure) - , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) - , ("center", env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) - , ("quote", blockQuote <$> env "quote" blocks) - , ("quotation", blockQuote <$> env "quotation" blocks) - , ("verse", blockQuote <$> env "verse" blocks) - , ("itemize", bulletList <$> listenv "itemize" (many item)) - , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", orderedList') - , ("alltt", alltt =<< verbEnv "alltt") - , ("code", guardEnabled Ext_literate_haskell *> - (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - verbEnv "code")) - , ("comment", mempty <$ verbEnv "comment") - , ("verbatim", codeBlock <$> verbEnv "verbatim") - , ("Verbatim", fancyverbEnv "Verbatim") - , ("BVerbatim", fancyverbEnv "BVerbatim") - , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals - codeBlockWith attr <$> verbEnv "lstlisting") - , ("minted", do options <- option [] keyvals - lang <- grouped (many1 $ satisfy (/='}')) - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ - [ "numberLines" | - lookup "linenos" options == Just "true" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv "minted") - , ("obeylines", parseFromString - (para . trimInlines . mconcat <$> many inline) =<< - intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnvWith para Nothing "displaymath") - , ("equation", mathEnvWith para Nothing "equation") - , ("equation*", mathEnvWith para Nothing "equation*") - , ("gather", mathEnvWith para (Just "gathered") "gather") - , ("gather*", mathEnvWith para (Just "gathered") "gather*") - , ("multline", mathEnvWith para (Just "gathered") "multline") - , ("multline*", mathEnvWith para (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") - , ("align", mathEnvWith para (Just "aligned") "align") - , ("align*", mathEnvWith para (Just "aligned") "align*") - , ("alignat", mathEnvWith para (Just "aligned") "alignat") - , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") - , ("tikzpicture", rawVerbEnv "tikzpicture") - ] - -figure :: PandocMonad m => LP m Blocks -figure = try $ do - resetCaption - blocks >>= addImageCaption - -letterContents :: PandocMonad m => LP m Blocks -letterContents = do - bs <- blocks - st <- getState - -- add signature (author) and address (title) - let addr = case lookupMeta "address" (stateMeta st) of - Just (MetaBlocks [Plain xs]) -> - para $ trimInlines $ fromList xs - _ -> mempty - return $ addr <> bs -- sig added by \closing - -closing :: PandocMonad m => LP m Blocks -closing = do - contents <- tok - st <- getState - let extractInlines (MetaBlocks [Plain ys]) = ys - extractInlines (MetaBlocks [Para ys ]) = ys - extractInlines _ = [] - let sigs = case lookupMeta "author" (stateMeta st) of - Just (MetaList xs) -> - para $ trimInlines $ fromList $ - intercalate [LineBreak] $ map extractInlines xs - _ -> mempty - return $ para (trimInlines contents) <> sigs +-- lists item :: PandocMonad m => LP m Blocks -item = blocks *> controlSeq "item" *> skipopts *> blocks - -looseItem :: PandocMonad m => LP m Blocks -looseItem = do - ctx <- stateParserContext `fmap` getState - if ctx == ListItemState - then mzero - else return mempty +item = void blocks *> controlSeq "item" *> skipopts *> blocks descItem :: PandocMonad m => LP m (Inlines, [Blocks]) descItem = do @@ -1271,302 +1928,210 @@ descItem = do bs <- blocks return (ils, [bs]) -env :: PandocMonad m => String -> LP m a -> LP m a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name)) - ("\\end{" ++ name ++ "}")) - -listenv :: PandocMonad m => String -> LP m a -> LP m a +listenv :: PandocMonad m => Text -> LP m a -> LP m a listenv name p = try $ do - oldCtx <- stateParserContext `fmap` getState - updateState $ \st -> st{ stateParserContext = ListItemState } + oldInListItem <- sInListItem `fmap` getState + updateState $ \st -> st{ sInListItem = True } res <- env name p - updateState $ \st -> st{ stateParserContext = oldCtx } + updateState $ \st -> st{ sInListItem = oldInListItem } return res -mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" - -mathEnv :: PandocMonad m => String -> LP m String -mathEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = skipMany comment *> - (many1 (noneOf "\\%") <|> try (string "\\%") - <|> try (string "\\\\") <|> count 1 anyChar) - res <- concat <$> manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -verbEnv :: PandocMonad m => String -> LP m String -verbEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - charMuncher = anyChar - res <- manyTill charMuncher endEnv - return $ stripTrailingNewlines res - -fancyverbEnv :: PandocMonad m => String -> LP m Blocks -fancyverbEnv name = do - options <- option [] keyvals - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv name - orderedList' :: PandocMonad m => LP m Blocks orderedList' = try $ do - optional sp - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ char '[' *> anyOrderedListMarker <* char ']' spaces - optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced + let markerSpec = do + symbol '[' + ts <- toksToString <$> manyTill anyTok (symbol ']') + case runParser anyOrderedListMarker def "option" ts of + Right r -> return r + Left _ -> do + pos <- getPosition + report $ SkippedContent ("[" ++ ts ++ "]") pos + return (1, DefaultStyle, DefaultDelim) + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces - start <- option 1 $ try $ do controlSeq "setcounter" - grouped (string "enum" *> many1 (oneOf "iv")) + optional $ try $ controlSeq "setlength" + *> grouped (count 1 $ controlSeq "itemindent") + *> braced + spaces + start <- option 1 $ try $ do pos <- getPosition + controlSeq "setcounter" + ctr <- toksToString <$> braced + guard $ "enum" `isPrefixOf` ctr + guard $ all (`elem` ['i','v']) (drop 4 ctr) optional sp - num <- grouped (many1 digit) - spaces - return (read num + 1 :: Int) + num <- toksToString <$> braced + case safeRead num of + Just i -> return (i + 1 :: Int) + Nothing -> do + report $ SkippedContent + ("\\setcounter{" ++ ctr ++ + "}{" ++ num ++ "}") pos + return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs -paragraph :: PandocMonad m => LP m Blocks -paragraph = do - x <- trimInlines . mconcat <$> many1 inline - if x == mempty - then return mempty - else return $ para x - -preamble :: PandocMonad m => LP m Blocks -preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = void comment - <|> void sp - <|> void blanklines - <|> void include - <|> void macro - <|> void blockCommand - <|> void anyControlSeq - <|> void braced - <|> void anyChar - -------- - --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - keys <- try $ bgroup *> (manyTill citationLabel egroup) - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m String -citationLabel = optional sp *> - (many1 (satisfy isBibtexKeyChar) - <* optional sp - <* optional (char ',') - <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) +-- tables -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then many1 simpleCiteArgs - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional $ bracketed inline + return () -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - let ils = (toList . trimInlines . mconcat) <$> - many (notFollowedBy (oneOf "\\};") >> inline) - let parseOne = try $ do - skipSpaces - pref <- ils - cit' <- inline -- expect a citation - let citlist = toList cit' - cits' <- case citlist of - [Cite cs _] -> return cs - _ -> mzero - suff <- ils - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff cits' - (c:cits, raw) <- withRaw $ grouped parseOne - return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ raw) +amp :: PandocMonad m => LP m Tok +amp = symbol '&' --- tables +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> do + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + _ -> return () -parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] +parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] parseAligns = try $ do - bgroup - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) - maybeBar - let cAlign = AlignCenter <$ char 'c' - let lAlign = AlignLeft <$ char 'l' - let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ char 'p' - -- algins from tabularx - let xAlign = AlignLeft <$ char 'X' - let mAlign = AlignLeft <$ char 'm' - let bAlign = AlignLeft <$ char 'b' - let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign - let alignPrefix = char '>' >> braced - let alignSuffix = char '<' >> braced + let maybeBar = skipMany $ + sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced let colWidth = try $ do - char '{' - ds <- many1 (oneOf "0123456789.") + symbol '{' + ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") spaces - string "\\linewidth" - char '}' + symbol '}' case safeRead ds of Just w -> return w Nothing -> return 0.0 - let alignSpec = do + let alignSpec = try $ do spaces - pref <- option "" alignPrefix + pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- braced + width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) spaces - suff <- option "" alignSuffix + suff <- option [] alignSuffix return (al, width, (pref, suff)) - aligns' <- sepEndBy alignSpec maybeBar + bgroup + spaces + maybeBar + aligns' <- many (alignSpec <* maybeBar) spaces egroup spaces - return $ aligns' - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces' - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces' - optional $ bracketed (many1 (satisfy (/=']'))) - return () - -lbreak :: PandocMonad m => LP m () -lbreak = () <$ try (spaces' *> - (controlSeq "\\" <|> controlSeq "tabularnewline") <* - spaces') - -amp :: PandocMonad m => LP m () -amp = () <$ try (spaces' *> char '&' <* spaces') + return aligns' parseTableRow :: PandocMonad m - => String -- ^ table environment name - -> [(String, String)] -- ^ pref/suffixes + => Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes -> LP m [Blocks] -parseTableRow envname prefsufs = try $ do +parseTableRow envname prefsufs = do + notFollowedBy (spaces *> end_ envname) let cols = length prefsufs - let tableCellRaw = concat <$> many - (do notFollowedBy amp - notFollowedBy lbreak - notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) - many1 (noneOf "&%\n\r\\") - <|> try (string "\\&") - <|> count 1 anyChar) - let plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - rawcells <- sepBy1 tableCellRaw amp - guard $ length rawcells == cols - let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs - let tableCell = plainify <$> blocks - cells' <- mapM (parseFromString' tableCell) rawcells' - let numcells = length cells' + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- many (notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + >> anyTok) + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref + ++ contents ++ + map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff + rawcells <- sequence (map celltoks prefsufs) + oldInput <- getInput + cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells + setInput oldInput + spaces + let numcells = length cells guard $ numcells <= cols && numcells >= 1 - guard $ cells' /= [mempty] + guard $ cells /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: - let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces' - return cells'' + return $ cells ++ replicate (cols - numcells) mempty -spaces' :: PandocMonad m => LP m () -spaces' = spaces *> skipMany (comment *> spaces) +parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell = do + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + updateState $ \st -> st{ sInTableCell = True } + cells <- plainify <$> blocks + updateState $ \st -> st{ sInTableCell = False } + return cells -simpTable :: PandocMonad m => String -> Bool -> LP m Blocks +simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces' >> tok) + when hasWidthParameter $ () <$ (spaces >> tok) skipopts colspecs <- parseAligns let (aligns, widths, prefsufs) = unzip3 colspecs let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces skipMany hline - spaces' + spaces header' <- option [] $ try (parseTableRow envname prefsufs <* lbreak <* many1 hline) - spaces' + spaces rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) - spaces' + spaces optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces let header'' = if null header' then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns widths) header'' rows -removeDoubleQuotes :: String -> String -removeDoubleQuotes ('"':xs) = - case reverse xs of - '"':ys -> reverse ys - _ -> '"':xs -removeDoubleQuotes xs = xs +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs + go x = return x + + +block :: PandocMonad m => LP m Blocks +block = (mempty <$ spaces1) + <|> environment + <|> include + <|> macroDef + <|> paragraph + <|> blockCommand + <|> grouped block + +blocks :: PandocMonad m => LP m Blocks +blocks = mconcat <$> many block + diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs new file mode 100644 index 000000000..6f84ae1f1 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -0,0 +1,48 @@ +{- +Copyright (C) 2017 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Types + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Types for LaTeX tokens and macros. +-} +module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) + , TokType(..) + , Macro(..) + , Line + , Column ) +where +import Data.Text (Text) +import Text.Parsec.Pos (Line, Column) + +data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | + Esc1 | Esc2 | Arg Int + deriving (Eq, Ord, Show) + +data Tok = Tok (Line, Column) TokType Text + deriving (Eq, Ord, Show) + +data Macro = Macro Int (Maybe [Tok]) [Tok] + deriving Show + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c2342b9f3..ab6a32b78 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -61,7 +61,8 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) -import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros, + macro) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) @@ -1105,10 +1106,11 @@ latexMacro = try $ do rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> + result <- (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) + <|> (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + spaces return $ return result @@ -1553,8 +1555,8 @@ code = try $ do Right attr -> B.codeWith attr result math :: PandocMonad m => MarkdownParser m (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) @@ -1878,9 +1880,8 @@ rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead (char '\\') notFollowedBy' rawConTeXtEnvironment - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index a51306347..1ae73c148 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -58,7 +58,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) -import Text.Pandoc.Parsing hiding (macro, nested) +import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag) import Text.Pandoc.XML (fromEntities) import System.FilePath (takeExtension) diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 66273e05d..42fdfd4dd 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: PandocMonad m => OrgParser m String inlineLaTeXCommand = try $ do rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest + st <- getState + parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest case parsed of - Right (RawInline _ cs) -> do + Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. let cmdNoSpc = dropWhileEnd isSpace cs diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 92f868516..fc98213fb 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState , OrgNoteRecord , HasReaderOptions (..) , HasQuoteContext (..) + , HasMacros (..) , TodoMarker (..) , TodoSequence , TodoState (..) @@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local) import Data.Default (Default (..)) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) import Text.Pandoc.Logging import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), HasIncludeFiles (..), HasLastStrPosition (..), HasLogMessages (..), HasQuoteContext (..), + HasMacros (..), HasReaderOptions (..), ParserContext (..), QuoteContext (..), SourcePos, askF, asksF, returnF, runF, trimInlinesF) @@ -118,6 +122,7 @@ data OrgParserState = OrgParserState , orgStateParserContext :: ParserContext , orgStateTodoSequences :: [TodoSequence] , orgLogMessages :: [LogMessage] + , orgMacros :: M.Map Text Macro } data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } @@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasMacros OrgParserState where + extractMacros st = orgMacros st + updateMacros f st = st{ orgMacros = f (orgMacros st) } + instance HasIncludeFiles OrgParserState where getIncludeFiles = orgStateIncludeFiles addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } @@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState , orgStateParserContext = NullState , orgStateTodoSequences = [] , orgLogMessages = [] + , orgMacros = M.empty } optionsToParserState :: ReaderOptions -> OrgParserState diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 210d3e5aa..d41152de5 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -45,7 +45,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Shared (crFilter) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a80d75340..853d2768f 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -573,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - B.singleton <$> rawLaTeXInline + B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 5708358f6..f000646c2 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -40,7 +40,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) +import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter) import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index afac9e8cb..f2be6de5f 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -58,7 +58,8 @@ tests = [ testGroup "basic" , "blank lines + space + comments" =: "% my comment\n\n \n % another\n\nhi" =?> para "hi" , "comment in paragraph" =: - "hi % this is a comment\nthere\n" =?> para "hi there" + "hi % this is a comment\nthere\n" =?> + para ("hi" <> softbreak <> "there") ] , testGroup "code blocks" diff --git a/test/command/1390.md b/test/command/1390.md new file mode 100644 index 000000000..ffd2cef8d --- /dev/null +++ b/test/command/1390.md @@ -0,0 +1,20 @@ +``` +% pandoc -f latex -t native +\newcommand\foo{+} +Testing: $\mu\foo\eta$. +^D +[Para [Str "Testing:",Space,Math InlineMath "\\mu+\\eta",Str "."]] +``` + + + diff --git a/test/command/2118.md b/test/command/2118.md new file mode 100644 index 000000000..d640e2e2b --- /dev/null +++ b/test/command/2118.md @@ -0,0 +1,11 @@ +``` +% pandoc -f latex -t native +\newcommand{\inclgraph}{\includegraphics[width=0.8\textwidth]} +\begin{figure}[ht] + \inclgraph{setminus.png} + \caption{Set subtraction} + \label{fig:setminus} +\end{figure} +^D +[Para [Image ("",[],[("width","80%")]) [Str "Set",Space,Str "subtraction",Span ("",[],[("data-label","fig:setminus")]) []] ("setminus.png","fig:")]] +``` diff --git a/test/command/3113.md b/test/command/3113.md index f44e25709..5ca171d97 100644 --- a/test/command/3113.md +++ b/test/command/3113.md @@ -8,6 +8,6 @@ C&=&D,\\ E&=&F \end{eqnarray} ^D -[Para [Math DisplayMath "\\begin{aligned}\nA&=&B,\\\\\nC&=&D,\\\\\nE&=&F\\end{aligned}"]] +[Para [Math DisplayMath "\\begin{aligned}\nA&=&B,\\\\\nC&=&D,\\\\\n%\\end{eqnarray}\n%\\begin{eqnarray}\nE&=&F\\end{aligned}"]] ``` diff --git a/test/command/3236.md b/test/command/3236.md new file mode 100644 index 000000000..1d1a9b2c3 --- /dev/null +++ b/test/command/3236.md @@ -0,0 +1,9 @@ +``` +pandoc -f latex -t native +\newcommand{\mycolor}{red} + +\includegraphics[width=17cm]{\mycolor /header} +Magnificent \mycolor{} header. +^D +[Para [Image ("",[],[("width","17cm")]) [Str "image"] ("red/header",""),SoftBreak,Str "Magnificent",Space,Str "red",Space,Str "header."]] +``` diff --git a/test/command/3558.md b/test/command/3558.md index 3f4079064..795858b78 100644 --- a/test/command/3558.md +++ b/test/command/3558.md @@ -1,8 +1,12 @@ ``` % pandoc -t native -\startmulti +\multi + hello + \endmulti ^D -[Para [RawInline (Format "tex") "\\startmulti\n",Str "hello",SoftBreak,RawInline (Format "tex") "\\endmulti"]] +[RawBlock (Format "latex") "\\multi" +,Para [Str "hello"] +,RawBlock (Format "latex") "\\endmulti"] ``` diff --git a/test/command/3779.md b/test/command/3779.md new file mode 100644 index 000000000..1097123f0 --- /dev/null +++ b/test/command/3779.md @@ -0,0 +1,28 @@ +``` +% pandoc -f latex -t native +\newcommand{\fakeitemize}[1]{ + \begin{itemize} + #1 + \end{itemize} +} +\newcommand{\testcmd}[1]{ + #1 +} +\fakeitemize{ + \item Pandoc is 100\% awesome. +} + +\begin{itemize} + \item Pandoc is 200\% awesome. +\end{itemize} + +\testcmd{ + Pandoc is 300\% awesome. +} +^D +[BulletList + [[Para [Str "Pandoc",Space,Str "is",Space,Str "100%",Space,Str "awesome."]]] +,BulletList + [[Para [Str "Pandoc",Space,Str "is",Space,Str "200%",Space,Str "awesome."]]] +,Para [Str "Pandoc",Space,Str "is",Space,Str "300%",Space,Str "awesome."]] +``` diff --git a/test/command/934.md b/test/command/934.md new file mode 100644 index 000000000..ef99abdf9 --- /dev/null +++ b/test/command/934.md @@ -0,0 +1,12 @@ +``` +% pandoc -f latex -t native +\newcommand{\ddb}[2]{ + \textit{``#1''} + + \textbf{#2} +} +\ddb{This should be italic and in quotes}{And this is the attribution} +^D +[Para [Emph [Quoted DoubleQuote [Str "This",Space,Str "should",Space,Str "be",Space,Str "italic",Space,Str "and",Space,Str "in",Space,Str "quotes"]]] +,Para [Strong [Str "And",Space,Str "this",Space,Str "is",Space,Str "the",Space,Str "attribution"]]] +``` diff --git a/test/command/982.md b/test/command/982.md new file mode 100644 index 000000000..5f54f7713 --- /dev/null +++ b/test/command/982.md @@ -0,0 +1,11 @@ +``` +% pandoc -f latex -t native +\newcommand{\BEQ}{\begin{equation}} +\newcommand{\EEQ}{\end{equation}} + +\BEQ +y=x^2 +\EEQ +^D +[Para [Math DisplayMath "y=x^2"]] +``` diff --git a/test/latex-reader.latex b/test/latex-reader.latex index 2ebdfed99..7cbcc9672 100644 --- a/test/latex-reader.latex +++ b/test/latex-reader.latex @@ -4,7 +4,6 @@ \setlength{\parindent}{0pt} \setlength{\parskip}{6pt plus 2pt minus 1pt} -\newcommand{\textsubscript}[1]{\ensuremath{_{\scriptsize\textrm{#1}}}} \usepackage[breaklinks=true,unicode=true]{hyperref} \usepackage[normalem]{ulem} % avoid problems with \sout in headers with hyperref: diff --git a/test/latex-reader.native b/test/latex-reader.native index d481a714d..04be2538e 100644 --- a/test/latex-reader.native +++ b/test/latex-reader.native @@ -261,7 +261,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format "latex") "\\doublespacing\n"]] + ,[Para [RawInline (Format "latex") "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native index 1007dbac7..5d63a21de 100644 --- a/test/markdown-reader-more.native +++ b/test/markdown-reader-more.native @@ -3,7 +3,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,Header 2 ("blank-line-before-url-in-link-reference",[],[]) [Str "Blank",Space,Str "line",Space,Str "before",Space,Str "URL",Space,Str "in",Space,Str "link",Space,Str "reference"] ,Para [Link ("",[],[]) [Str "foo"] ("/url",""),Space,Str "and",Space,Link ("",[],[]) [Str "bar"] ("/url","title")] ,Header 2 ("raw-context-environments",[],[]) [Str "Raw",Space,Str "ConTeXt",Space,Str "environments"] -,Plain [RawInline (Format "tex") "\\placeformula "] +,RawBlock (Format "latex") "\\placeformula " ,RawBlock (Format "context") "\\startformula\n L_{1} = L_{2}\n \\stopformula" ,RawBlock (Format "context") "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" ,Header 2 ("raw-latex-environments",[],[]) [Str "Raw",Space,Str "LaTeX",Space,Str "environments"] @@ -56,7 +56,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,OrderedList (3,Example,TwoParens) [[Plain [Str "Third",Space,Str "example."]]] ,Header 2 ("macros",[],[]) [Str "Macros"] -,Para [Math InlineMath "{\\langle x,y \\rangle}"] +,Para [Math InlineMath "\\langle x,y \\rangle"] ,Header 2 ("case-insensitive-references",[],[]) [Str "Case-insensitive",Space,Str "references"] ,Para [Link ("",[],[]) [Str "Fum"] ("/fum","")] ,Para [Link ("",[],[]) [Str "FUM"] ("/fum","")] -- cgit v1.2.3 From 6f6e83a06e9793d26cb622024098af39c14cb60a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 7 Jul 2017 11:41:28 +0200 Subject: Parsing: added takeP, takeWhileP for efficient parsing of [Char]. --- src/Text/Pandoc/Parsing.hs | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index f6263c782..549042d14 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -35,7 +35,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( anyLine, +module Text.Pandoc.Parsing ( takeWhileP, + takeP, + anyLine, anyLineNewline, indentWith, many1Till, @@ -191,7 +193,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec hiding (token) -import Text.Parsec.Pos (newPos, initialPos) +import Text.Parsec.Pos (newPos, initialPos, updatePosString) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace, isPunctuation ) import Data.List ( intercalate, transpose, isSuffixOf ) @@ -244,6 +246,35 @@ instance Monoid a => Monoid (Future s a) where mappend = liftM2 mappend mconcat = liftM mconcat . sequence +-- | Parse characters while a predicate is true. +takeWhileP :: Stream [Char] m Char + => (Char -> Bool) -> ParserT [Char] st m [Char] +takeWhileP f = do + -- faster than 'many (satisfy f)' + inp <- getInput + pos <- getPosition + let (xs, rest) = span f inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + +-- Parse n characters of input (or the rest of the input if +-- there aren't n characters). +takeP :: Stream [Char] m Char => Int -> ParserT [Char] st m [Char] +takeP n = do + guard (n > 0) + -- faster than 'count n anyChar' + inp <- getInput + pos <- getPosition + let (xs, rest) = splitAt n inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do -- cgit v1.2.3 From 770e0cccc1d028415cc9e180b08b396fb0bc379b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 7 Jul 2017 12:34:42 +0200 Subject: Use takeP in LaTeX reader. --- src/Text/Pandoc/Readers/LaTeX.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d82e6a5dc..fde177f14 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -214,7 +214,7 @@ rawLaTeXBlock = do res <- runParserT rawblock lstate "source" toks case res of Left _ -> mzero - Right raw -> count (T.length (untokenize raw)) anyChar + Right raw -> takeP (T.length (untokenize raw)) macro :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT String s m Blocks @@ -236,7 +236,7 @@ macro = do Left _ -> mzero Right (raw, st) -> do updateState (updateMacros (const $ sMacros st)) - mempty <$ count (T.length (untokenize raw)) anyChar + mempty <$ takeP (T.length (untokenize raw)) applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) => String -> ParserT String s m String @@ -269,7 +269,7 @@ rawLaTeXInline = do Left _ -> mzero Right (raw, s) -> do updateState $ updateMacros (const $ sMacros s) - count (T.length (untokenize raw)) anyChar + takeP (T.length (untokenize raw)) inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines inlineCommand = do @@ -288,7 +288,7 @@ inlineCommand = do Left _ -> mzero Right (il, raw, s) -> do updateState $ updateMacros (const $ sMacros s) - count (T.length (untokenize raw)) anyChar + takeP (T.length (untokenize raw)) return il tokenize :: Text -> [Tok] -- cgit v1.2.3 From 41209ea6765e9898d7e15c4c945c06275b6c0420 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 11 Jul 2017 15:52:38 +0200 Subject: HTML reader: Ensure that paragraphs are closed properly... when the parent block element closes, even without `

`. Closes #3794. --- src/Text/Pandoc/Readers/HTML.hs | 2 ++ test/command/3794.md | 7 +++++++ 2 files changed, 9 insertions(+) create mode 100644 test/command/3794.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b07b65019..734973e33 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -805,6 +805,8 @@ pCloses tagtype = try $ do (TagClose "dl") | tagtype == "dd" -> return () (TagClose "table") | tagtype == "td" -> return () (TagClose "table") | tagtype == "tr" -> return () + (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags + -> return () -- see #3794 _ -> mzero pTagText :: PandocMonad m => TagParser m Inlines diff --git a/test/command/3794.md b/test/command/3794.md new file mode 100644 index 000000000..b56e7b504 --- /dev/null +++ b/test/command/3794.md @@ -0,0 +1,7 @@ +``` +% pandoc -f html -t native +

hello

+^D +[Div ("",[],[]) + [Para [Str "hello"]]] +``` -- cgit v1.2.3 From 013fd1c6b68f2c061202d931f541aa4877ae543f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 12 Jul 2017 13:58:47 +0200 Subject: Make sure \write18 is parsed as raw LaTeX. The change is in the LaTeX reader's treatment of raw commands, but it also affects the Markdown reader. --- src/Text/Pandoc/Readers/LaTeX.hs | 16 +++++++++------- test/command/3494.md | 3 ++- test/command/3577.md | 2 ++ test/command/write18.md | 14 ++++++++++++++ test/latex-reader.native | 2 +- 5 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 test/command/write18.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fde177f14..cd2c7c7f8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1049,14 +1049,12 @@ inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq guard $ name /= "begin" && name /= "end" - (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp) + star <- option "" ("*" <$ symbol '*' <* optional sp) let name' = name <> star let names = ordNub [name', name] -- check non-starred as fallback let raw = do guard $ isInlineCommand name || not (isBlockCommand name) - (_, rawargs) <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs) + rawcommand <- getRawCommand (cmd <> star) (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) <|> ignore rawcommand lookupListDefault raw names inlineCommands @@ -1353,7 +1351,11 @@ rawInlineOr name' fallback = do getRawCommand :: PandocMonad m => Text -> LP m String getRawCommand txt = do (_, rawargs) <- withRaw - (many (try (optional sp *> opt)) *> + ((if txt == "\\write" + then () <$ satisfyTok isWordTok -- digits + else return ()) *> + skipangles *> + skipopts *> option "" (try (optional sp *> dimenarg)) *> many braced) return $ T.unpack (txt <> untokenize rawargs) @@ -1631,7 +1633,7 @@ blockCommand = try $ do let names = ordNub [name', name] let raw = do guard $ isBlockCommand name || not (isInlineCommand name) - rawBlock "latex" <$> getRawCommand txt + rawBlock "latex" <$> getRawCommand (txt <> star) lookupListDefault raw names blockCommands closing :: PandocMonad m => LP m Blocks @@ -2128,8 +2130,8 @@ block = (mempty <$ spaces1) <|> environment <|> include <|> macroDef - <|> paragraph <|> blockCommand + <|> paragraph <|> grouped block blocks :: PandocMonad m => LP m Blocks diff --git a/test/command/3494.md b/test/command/3494.md index 7c480fde6..534041246 100644 --- a/test/command/3494.md +++ b/test/command/3494.md @@ -25,7 +25,8 @@ thank you -blah + +

blah

blah blah diff --git a/test/command/3577.md b/test/command/3577.md index dc88937e9..ca9dba97c 100644 --- a/test/command/3577.md +++ b/test/command/3577.md @@ -15,9 +15,11 @@ \caption{Subfigure with Subfloat} \end{figure} ^D +
Caption 1
Caption 1
+
Caption 2
Caption 2
diff --git a/test/command/write18.md b/test/command/write18.md new file mode 100644 index 000000000..344dfc8cf --- /dev/null +++ b/test/command/write18.md @@ -0,0 +1,14 @@ +Handle \write18{..} as raw tex: +``` +% pandoc -t native +\write18{git --version} +^D +[RawBlock (Format "latex") "\\write18{git --version}"] +``` + +``` +% pandoc -f latex+raw_tex -t native +\write18{git --version} +^D +[RawBlock (Format "latex") "\\write18{git --version}"] +``` diff --git a/test/latex-reader.native b/test/latex-reader.native index 04be2538e..a62f2069e 100644 --- a/test/latex-reader.native +++ b/test/latex-reader.native @@ -261,7 +261,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format "latex") "\\doublespacing"]] + ,[RawBlock (Format "latex") "\\doublespacing"] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] -- cgit v1.2.3 From 050036c036bea4dba65efd033230d552ef637abc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 12 Jul 2017 16:51:30 +0200 Subject: Print informative message when failing with use of `--normalize`. We may want to think of some kind of graceful fallback, but the present behavior has the advantage of forcing people to update scripts when updating to pandoc 2.0. See #3786. --- src/Text/Pandoc/App.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 689c0a784..68bdc1432 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1564,6 +1564,8 @@ handleUnrecognizedOption :: String -> [String] -> [String] handleUnrecognizedOption "--smart" = (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++ "For example: pandoc -f markdown+smart -t markdown-smart.") :) +handleUnrecognizedOption "--normalize" = + ("--normalize has been removed. Normalization is now automatic." :) handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart" handleUnrecognizedOption "--old-dashes" = ("--old-dashes has been removed. Use +old_dashes extension instead." :) -- cgit v1.2.3 From e0025cf4f18e335917f57814a1854f85ce1b6236 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Wed, 12 Jul 2017 18:14:10 +0300 Subject: Remove redundant imports (#3796) --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index cd2c7c7f8..9ec84b3f6 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -66,7 +66,7 @@ import Text.Pandoc.Parsing hiding (many, optional, withRaw, space, (<|>), spaces, blankline) import Text.Pandoc.Shared import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..), - TokType(..), Line, Column) + TokType(..)) import Text.Pandoc.Walk import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop)) -- cgit v1.2.3 From de117fbd9e32e890663eb831b47fd91fcd6419a0 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Wed, 12 Jul 2017 18:16:02 +0300 Subject: Muse writer: indent lists inside with at least one space (#3795) --- src/Text/Pandoc/Writers/Muse.hs | 15 +++++++++++---- test/Tests/Writers/Muse.hs | 11 +++++++++++ test/writer.muse | 6 +++--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index b386a85b9..0383d9d86 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -102,6 +102,13 @@ pandocToMuse (Pandoc meta blocks) = do Nothing -> return main Just tpl -> renderTemplate' tpl context +-- | Convert list of Pandoc block elements to Muse +-- | without setting stTopLevel. +flatBlockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +flatBlockListToMuse blocks = cat <$> mapM blockToMuse blocks + -- | Convert list of Pandoc block elements to Muse. blockListToMuse :: PandocMonad m => [Block] -- ^ List of block elements @@ -111,11 +118,11 @@ blockListToMuse blocks = do modify $ \s -> s { stTopLevel = not $ stInsideBlock s , stInsideBlock = True } - contents <- mapM blockToMuse blocks + result <- flatBlockListToMuse blocks modify $ \s -> s { stTopLevel = stTopLevel oldState , stInsideBlock = stInsideBlock oldState } - return $ cat contents + return result -- | Convert Pandoc block element to Muse. blockToMuse :: PandocMonad m @@ -141,10 +148,10 @@ blockToMuse (RawBlock (Format format) str) = return $ blankline $$ " text format <> "\">" $$ text str $$ "" $$ blankline blockToMuse (BlockQuote blocks) = do - contents <- blockListToMuse blocks + contents <- flatBlockListToMuse blocks return $ blankline <> "" - $$ flush contents -- flush to drop blanklines + $$ nest 0 contents -- nest 0 to remove trailing blank lines $$ "" <> blankline blockToMuse (OrderedList (start, style, _) items) = do diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index d83cc5c9b..ebe5d45cd 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -137,6 +137,17 @@ tests = [ testGroup "block elements" , " second inner definition :: second inner description" ] ] + -- Check that list is intended with one space even inside a quote + , "List inside block quote" =: blockQuote (orderedList [ plain $ text "first" + , plain $ text "second" + , plain $ text "third" + ]) + =?> unlines [ "" + , " 1. first" + , " 2. second" + , " 3. third" + , "" + ] ] , testGroup "headings" [ "normal heading" =: diff --git a/test/writer.muse b/test/writer.muse index 41d1c9a5b..fda025812 100644 --- a/test/writer.muse +++ b/test/writer.muse @@ -65,8 +65,8 @@ sub status { A list: -1. item one -2. item two + 1. item one + 2. item two Nested block quotes: @@ -281,7 +281,7 @@ Multiple blocks with italics:
-orange block quote + orange block quote Multiple definitions, tight: -- cgit v1.2.3 From 8b502dd50ff842bdbbf346a67a607d1a7905bda3 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Wed, 12 Jul 2017 11:19:49 -0400 Subject: Fixed #3760. (#3784) Using the same solution as in the LaTeX reader: equation -> displaymath align -> displaymath \begin{aligned} ... \end{aligned} etc.. --- src/Text/Pandoc/Readers/Vimwiki.hs | 32 ++++++++++++++++++++++++++------ test/vimwiki-reader.native | 12 ++++++++---- test/vimwiki-reader.wiki | 10 ++++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 11faedb24..52bf37d35 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -75,7 +75,8 @@ import qualified Text.Pandoc.Builder as B (headerWith, str, space, strong, emph, strikeout, code, link, image, spanWith, para, horizontalRule, blockQuote, bulletList, plain, orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, - setMeta, definitionList, superscript, subscript) + setMeta, definitionList, superscript, subscript, displayMath, + math) import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition (Pandoc(..), Inline(Space), Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), @@ -265,13 +266,32 @@ displayMath :: PandocMonad m => VwParser m Blocks displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser + many space contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags - | mathTag == "" = "\\[" ++ contents ++ "\n\\]" - | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + | mathTag == "" = contents + | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents ++ "\n\\end{" ++ mathTag ++ "}" - return $ B.plain $ B.str contentsWithTags + return $ B.para $ B.displayMath contentsWithTags + + +mathTagLaTeX :: String -> String +mathTagLaTeX s = case s of + "equation" -> "" + "equation*" -> "" + "gather" -> "gathered" + "gather*" -> "gathered" + "multline" -> "gathered" + "multline*" -> "gathered" + "eqnarray" -> "aligned" + "eqnarray*" -> "aligned" + "align" -> "aligned" + "align*" -> "aligned" + "alignat" -> "aligned" + "alignat*" -> "aligned" + _ -> s + mixedList :: PandocMonad m => VwParser m Blocks mixedList = try $ do @@ -598,7 +618,7 @@ inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ do char '$' contents <- many1Till (noneOf "\n") (char '$') - return $ B.str $ "\\(" ++ contents ++ "\\)" + return $ B.math contents tag :: PandocMonad m => VwParser m Inlines tag = try $ do @@ -650,4 +670,4 @@ mathTagParser = do s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) char '%' >> string s >> char '%' - return s + return $ mathTagLaTeX s diff --git a/test/vimwiki-reader.native b/test/vimwiki-reader.native index 26388b71a..8c9bff3f6 100644 --- a/test/vimwiki-reader.native +++ b/test/vimwiki-reader.native @@ -81,7 +81,7 @@ Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "2017-05-01"]),("title ,Header 2 ("lists",[],[]) [Str "lists"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "ordered",Space,Str "list",Space,Str "item",Space,Str "1,",Space,Str "and",Space,Str "here",Space,Str "is",Space,Str "some",Space,Str "math",Space,Str "belonging",Space,Str "to",Space,Str "list",Space,Str "item",Space,Str "1"] - ,Plain [Str "\\[\n a^2 + b^2 = c^2\n\\]"] + ,Para [Math DisplayMath "a^2 + b^2 = c^2"] ,Plain [Str "and",Space,Str "some",Space,Str "preformatted",Space,Str "and",Space,Str "tables",Space,Str "belonging",Space,Str "to",Space,Str "item",Space,Str "1",Space,Str "as",Space,Str "well"] ,CodeBlock ("",[],[]) "I'm part of item 1." ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] @@ -184,9 +184,13 @@ Pandoc (Meta {unMeta = fromList [("date",MetaInlines [Str "2017-05-01"]),("title ,[Plain [Str "b"]]]]]]] ,[Plain [Span ("",["done4"],[]) [],Str "task",Space,Str "2"]]] ,Header 2 ("math",[],[]) [Str "math"] -,Para [Str "\\( \\sum_i a_i^2 = 1 \\)"] -,Plain [Str "\\[\n\\sum_i a_i^2\n=\n1\n\\]"] -,Plain [Str "\\begin{align}\n\\sum_i a_i^2 &= 1 + 1 \\\\\n&= 2.\n\\end{align}"] +,Para [Math InlineMath " \\sum_i a_i^2 = 1 "] +,Para [Math DisplayMath "\\sum_i a_i^2\n=\n1"] +,Para [Math DisplayMath "\\begin{aligned}\n\\sum_i a_i^2 &= 1 + 1 \\\\\n&= 2.\n\\end{aligned}"] +,Para [Str "edge",Space,Str "case",Space,Str "(the",Space,Code ("",[],[]) "c^2 + ",Space,Str "after",Space,Str "the",Space,Str "multline",Space,Str "tag",Space,Str "is",Space,Str "in",Space,Str "the",Space,Str "equation):"] +,Para [Math DisplayMath "\\begin{gathered}\nc^2 + \na^2 + b^2\n\\end{gathered}"] +,Para [Str "edge",Space,Str "case",Space,Str "(the",Space,Str "tag",Space,Str "is",Space,Code ("",[],[]) "hello%bye",Str ")"] +,Para [Math DisplayMath "\\begin{hello%bye}\n\\int_a^b f(x) dx\n\\end{hello%bye}"] ,Para [Str "Just",Space,Str "two",Space,Str "dollar",Space,Str "signs:",Space,Str "$$"] ,Para [Str "[not",Space,Str "math]",Space,Str "You",Space,Str "have",Space,Str "$1",SoftBreak,Str "and",Space,Str "I",Space,Str "have",Space,Str "$1."] ,Header 2 ("tags",[],[]) [Str "tags"] diff --git a/test/vimwiki-reader.wiki b/test/vimwiki-reader.wiki index ad724e090..63d39b146 100644 --- a/test/vimwiki-reader.wiki +++ b/test/vimwiki-reader.wiki @@ -297,6 +297,16 @@ $ \sum_i a_i^2 = 1 $ &= 2. }}$ +edge case (the `c^2 + ` after the multline tag is in the equation): +{{$%multline%c^2 + +a^2 + b^2 +}}$ + +edge case (the tag is `hello%bye`) +{{$%hello%bye% +\int_a^b f(x) dx +}}$ + Just two dollar signs: $$ [not math] You have $1 -- cgit v1.2.3