diff options
Diffstat (limited to 'src/Text')
78 files changed, 7865 insertions, 1432 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index fd849316b..a4d963221 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,10 +37,12 @@ inline links: > module Main where > import Text.Pandoc +> import Text.Pandoc.Error (handleError) > > markdownToRST :: String -> String -> markdownToRST = -> (writeRST def {writerReferenceLinks = True}) . readMarkdown def +> markdownToRST = handleError . +> writeRST def {writerReferenceLinks = True} . +> readMarkdown def > > main = getContents >>= putStrLn . markdownToRST @@ -65,7 +67,9 @@ module Text.Pandoc , Reader (..) , mkStringReader , readDocx + , readOdt , readMarkdown + , readCommonMark , readMediaWiki , readRST , readOrg @@ -77,11 +81,12 @@ module Text.Pandoc , readHaddock , readNative , readJSON + , readTWiki , readTxt2Tags , readTxt2TagsNoMacros , readEPUB -- * Writers: converting /from/ Pandoc format - , Writer (..) + , Writer (..) , writeNative , writeJSON , writeMarkdown @@ -108,6 +113,7 @@ module Text.Pandoc , writeOrg , writeAsciiDoc , writeHaddock + , writeCommonMark , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -123,6 +129,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.Org @@ -133,7 +140,9 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.TWiki import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.Odt import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.EPUB import Text.Pandoc.Writers.Native @@ -159,11 +168,13 @@ import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.Shared (safeRead, warn, mapLeft) import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Error import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -199,32 +210,35 @@ parseFormatSpec = parse formatSpec "" '-' -> Set.delete ext _ -> Set.insert ext --- auxiliary function for readers: -markdown :: ReaderOptions -> String -> IO Pandoc -markdown o s = do - let (doc, warnings) = readMarkdownWithWarnings o s - mapM_ warn warnings - return doc -data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag)) +data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) -mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader mkStringReader r = StringReader (\o s -> return $ r o s) -mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader +mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader +mkStringReaderWithWarnings r = StringReader $ \o s -> do + case r o s of + Left err -> return $ Left err + Right (doc, warnings) -> do + mapM_ warn warnings + return (Right doc) + +mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) -- | Association list of formats and readers. readers :: [(String, Reader)] readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("json" , mkStringReader readJSON ) - ,("markdown" , StringReader markdown) - ,("markdown_strict" , StringReader markdown) - ,("markdown_phpextra" , StringReader markdown) - ,("markdown_github" , StringReader markdown) - ,("markdown_mmd", StringReader markdown) - ,("rst" , mkStringReader readRST ) + ,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("commonmark" , mkStringReader readCommonMark) + ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) ,("mediawiki" , mkStringReader readMediaWiki) ,("docbook" , mkStringReader readDocBook) ,("opml" , mkStringReader readOPML) @@ -233,7 +247,9 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) + ,("twiki" , mkStringReader readTWiki) ,("docx" , mkBSReader readDocx) + ,("odt" , mkBSReader readOdt) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) ] @@ -294,6 +310,7 @@ writers = [ ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) ,("haddock" , PureStringWriter writeHaddock) + ,("commonmark" , PureStringWriter writeCommonMark) ] getDefaultExtensions :: String -> Set Extension @@ -302,7 +319,7 @@ getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions -getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "plain" = plainExtensions getDefaultExtensions "org" = Set.fromList [Ext_citations] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, @@ -355,8 +372,8 @@ class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy +readJSON :: ReaderOptions -> String -> Either PandocError Pandoc +readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 66490d5c6..c183458e4 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2014 John MacFarlane + Copyright : Copyright (C) 2013-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Compat/Locale.hs b/src/Text/Pandoc/Compat/Locale.hs new file mode 100644 index 000000000..ac791136c --- /dev/null +++ b/src/Text/Pandoc/Compat/Locale.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.Locale ( defaultTimeLocale ) +where + +#if MIN_VERSION_time(1,5,0) +import Data.Time.Format ( defaultTimeLocale ) +#else +import System.Locale ( defaultTimeLocale ) +#endif diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hsb index 28e7f5112..3a0bf8ac4 100644 --- a/src/Text/Pandoc/Data.hsb +++ b/src/Text/Pandoc/Data.hsb @@ -4,4 +4,4 @@ module Text.Pandoc.Data (dataFiles) where import qualified Data.ByteString as B dataFiles :: [(FilePath, B.ByteString)] -dataFiles = %blobs "data" +dataFiles = ("README", %blob "README") : %blobs "data"
\ No newline at end of file diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs new file mode 100644 index 000000000..73d1e8f08 --- /dev/null +++ b/src/Text/Pandoc/Error.hs @@ -0,0 +1,64 @@ +{- +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Error + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This module provides a standard way to deal with possible errors encounted +during parsing. + +-} +module Text.Pandoc.Error (PandocError(..), handleError) where + +import Text.Parsec.Error +import Text.Parsec.Pos hiding (Line) +import Text.Pandoc.Compat.Except + +type Input = String + +data PandocError = -- | Generic parse failure + ParseFailure String + -- | Error thrown by a Parsec parser + | ParsecError Input ParseError + deriving (Show) + + +instance Error PandocError where + strMsg = ParseFailure + + +-- | An unsafe method to handle `PandocError`s. +handleError :: Either PandocError a -> a +handleError (Right r) = r +handleError (Left err) = + case err of + ParseFailure string -> error string + ParsecError input err' -> + let errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + theline = (lines input ++ [""]) !! (errLine - 1) + in error $ "\nError at " ++ show err' ++ "\n" ++ + theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ + "^" + diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 7f975d4c6..d0b945d45 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2014 John MacFarlane + Copyright : Copyright (C) 2008-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 68b34dcf3..09c1dd443 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {- - Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2014 John MacFarlane +Copyright : Copyright (C) 2011-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -38,8 +39,11 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, hush) import qualified Data.Map as M +import Text.Pandoc.Compat.Except +import Control.Monad.Trans +import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -64,17 +68,19 @@ imageType img = case B.take 4 img of "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps - _ -> fail "Unknown image type" + _ -> mzero -imageSize :: ByteString -> Maybe ImageSize -imageSize img = do - t <- imageType img - case t of - Png -> pngSize img - Gif -> gifSize img - Jpeg -> jpegSize img - Eps -> epsSize img - Pdf -> Nothing -- TODO +imageSize :: ByteString -> Either String ImageSize +imageSize img = + case imageType img of + Just Png -> mbToEither "could not determine PNG size" $ pngSize img + Just Gif -> mbToEither "could not determine GIF size" $ gifSize img + Just Jpeg -> jpegSize img + Just Eps -> mbToEither "could not determine EPS size" $ epsSize img + Just Pdf -> Left "could not determine PDF size" -- TODO + Nothing -> Left "could not determine image type" + where mbToEither msg Nothing = Left msg + mbToEither _ (Just x) = Right x defaultSize :: (Integer, Integer) defaultSize = (72, 72) @@ -114,7 +120,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> fail "PNG parse error" + _ -> (hush . Left) "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -143,67 +149,84 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> fail "GIF parse error" + _ -> (hush . Left) "GIF parse error" -jpegSize :: ByteString -> Maybe ImageSize -jpegSize img = do +jpegSize :: ByteString -> Either String ImageSize +jpegSize img = let (hdr, rest) = B.splitAt 4 img - guard $ B.length rest >= 14 - case hdr of - "\xff\xd8\xff\xe0" -> jfifSize rest - "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest - _ -> mzero + in if B.length rest < 14 + then Left "unable to determine JPEG size" + else case hdr of + "\xff\xd8\xff\xe0" -> jfifSize rest + "\xff\xd8\xff\xe1" -> exifSize rest + _ -> Left "unable to determine JPEG size" -jfifSize :: ByteString -> Maybe ImageSize -jfifSize rest = do +jfifSize :: ByteString -> Either String ImageSize +jfifSize rest = let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral $ unpack $ B.take 5 $ B.drop 9 $ rest - let factor = case dpiDensity of + factor = case dpiDensity of 1 -> id 2 -> \x -> (x * 254 `div` 10) _ -> const 72 - let dpix = factor (shift dpix1 8 + dpix2) - let dpiy = factor (shift dpiy1 8 + dpiy2) - (w,h) <- findJfifSize rest - return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy } + dpix = factor (shift dpix1 8 + dpix2) + dpiy = factor (shift dpiy1 8 + dpiy2) + in case findJfifSize rest of + Left msg -> Left msg + Right (w,h) -> Right $ ImageSize { pxX = w + , pxY = h + , dpiX = dpix + , dpiY = dpiy } -findJfifSize :: ByteString -> Maybe (Integer,Integer) -findJfifSize bs = do +findJfifSize :: ByteString -> Either String (Integer,Integer) +findJfifSize bs = let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs - case B.uncons bs' of - Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do + in case B.uncons bs' of + Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of - [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) - _ -> fail "JPEG parse error" - Just (_,bs'') -> do + [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2) + _ -> Left "JFIF parse error" + Just (_,bs'') -> case map fromIntegral $ unpack $ B.take 2 bs'' of - [c1,c2] -> do + [c1,c2] -> let len = shift c1 8 + c2 -- skip variables - findJfifSize $ B.drop len bs'' - _ -> fail "JPEG parse error" - Nothing -> fail "Did not find length record" + in findJfifSize $ B.drop len bs'' + _ -> Left "JFIF parse error" + Nothing -> Left "Did not find JFIF length record" -exifSize :: ByteString -> Maybe ImageSize -exifSize bs = runGet (Just <$> exifHeader bl) bl +runGet' :: Get (Either String a) -> BL.ByteString -> Either String a +runGet' p bl = +#if MIN_VERSION_binary(0,7,0) + case runGetOrFail p bl of + Left (_,_,msg) -> Left msg + Right (_,_,x) -> x +#else + runGet p bl +#endif + + +exifSize :: ByteString -> Either String ImageSize +exifSize bs = runGet' header $ bl where bl = BL.fromChunks [bs] + header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> Get ImageSize +exifHeader :: BL.ByteString -> ExceptT String Get ImageSize exifHeader hdr = do - _app1DataSize <- getWord16be - exifHdr <- getWord32be - unless (exifHdr == 0x45786966) $ fail "Did not find exif header" - zeros <- getWord16be - unless (zeros == 0) $ fail "Expected zeros after exif header" + _app1DataSize <- lift getWord16be + exifHdr <- lift getWord32be + unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" + zeros <- lift getWord16be + unless (zeros == 0) $ throwError "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: let tiffHeader = BL.drop 8 hdr - byteAlign <- getWord16be + byteAlign <- lift getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = if bigEndian @@ -213,48 +236,53 @@ exifHeader hdr = do num <- getWord32 den <- getWord32 return $ fromIntegral num / fromIntegral den - tagmark <- getWord16 - unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" - ifdOffset <- getWord32 - skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- getWord16 - let ifdEntry = do - tag <- getWord16 >>= \t -> - maybe (return UnknownTagType) return - (M.lookup t tagTypeTable) - dataFormat <- getWord16 - numComponents <- getWord32 + tagmark <- lift getWord16 + unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" + ifdOffset <- lift getWord32 + lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- lift getWord16 + let ifdEntry :: ExceptT String Get (TagType, DataFormat) + ifdEntry = do + tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable + <$> lift getWord16 + dataFormat <- lift getWord16 + numComponents <- lift getWord32 (fmt, bytesPerComponent) <- case dataFormat of - 1 -> return (UnsignedByte . runGet getWord8, 1) - 2 -> return (AsciiString, 1) - 3 -> return (UnsignedShort . runGet getWord16, 2) - 4 -> return (UnsignedLong . runGet getWord32, 4) - 5 -> return (UnsignedRational . runGet getRational, 8) - 6 -> return (SignedByte . runGet getWord8, 1) - 7 -> return (Undefined . runGet getWord8, 1) - 8 -> return (SignedShort . runGet getWord16, 2) - 9 -> return (SignedLong . runGet getWord32, 4) - 10 -> return (SignedRational . runGet getRational, 8) - 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) - 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) - _ -> fail $ "Unknown data format " ++ show dataFormat + 1 -> return (UnsignedByte <$> getWord8, 1) + 2 -> return (AsciiString <$> + getLazyByteString + (fromIntegral numComponents), 1) + 3 -> return (UnsignedShort <$> getWord16, 2) + 4 -> return (UnsignedLong <$> getWord32, 4) + 5 -> return (UnsignedRational <$> getRational, 8) + 6 -> return (SignedByte <$> getWord8, 1) + 7 -> return (Undefined <$> getLazyByteString + (fromIntegral numComponents), 1) + 8 -> return (SignedShort <$> getWord16, 2) + 9 -> return (SignedLong <$> getWord32, 4) + 10 -> return (SignedRational <$> getRational, 8) + 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4) + 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8) + _ -> throwError $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent payload <- if totalBytes <= 4 -- data is right here - then fmt <$> - (getLazyByteString (fromIntegral totalBytes) <* - skip (4 - totalBytes)) + then lift $ fmt <* skip (4 - totalBytes) else do -- get data from offset - offs <- getWord32 - return $ fmt $ BL.take (fromIntegral totalBytes) $ - BL.drop (fromIntegral offs) tiffHeader + offs <- lift getWord32 + let bytesAtOffset = + BL.take (fromIntegral totalBytes) + $ BL.drop (fromIntegral offs) tiffHeader + case runGet' (Right <$> fmt) bytesAtOffset of + Left msg -> throwError msg + Right x -> return x return (tag, payload) entries <- sequence $ replicate (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset) -> do - pos <- bytesRead - skip (fromIntegral offset - (fromIntegral pos - 8)) - numsubentries <- getWord16 + pos <- lift bytesRead + lift $ skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- lift getWord16 sequence $ replicate (fromIntegral numsubentries) ifdEntry _ -> return [] @@ -285,7 +313,7 @@ data DataFormat = UnsignedByte Word8 | UnsignedLong Word32 | UnsignedRational Rational | SignedByte Word8 - | Undefined Word8 + | Undefined BL.ByteString | SignedShort Word16 | SignedLong Word32 | SignedRational Rational diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 75b4ff0d2..6fd9ac373 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2014 John MacFarlane + Copyright : Copyright (C) 2011-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -328,7 +328,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("oth","application/vnd.oasis.opendocument.text-web") ,("otp","application/vnd.oasis.opendocument.presentation-template") ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") - ,("otf","application/x-font-opentype") + ,("otf","application/vnd.ms-opentype") ,("ott","application/vnd.oasis.opendocument.text-template") ,("oza","application/x-oz-application") ,("p","text/x-pascal") @@ -433,7 +433,9 @@ mimeTypesList = -- List borrowed from happstack-server. ,("sv4cpio","application/x-sv4cpio") ,("sv4crc","application/x-sv4crc") ,("svg","image/svg+xml") - ,("svgz","image/svg+xml") + -- removed for now, since it causes problems with + -- extensionFromMimeType: see #2183. + -- ,("svgz","image/svg+xml") ,("sw","chemical/x-swissprot") ,("swf","application/x-shockwave-flash") ,("swfl","application/x-shockwave-flash") @@ -497,6 +499,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("wmx","video/x-ms-wmx") ,("wmz","application/x-ms-wmz") ,("woff","application/x-font-woff") + ,("woff2","application/x-font-woff2") ,("wp5","application/wordperfect5.1") ,("wpd","application/wordperfect") ,("wrl","model/vrml") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a55d5417e..1246cdc8f 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} {- Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> @@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) - deriving (Monoid) + deriving (Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index ebfd8f8a9..17eb4a15c 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE DeriveDataTypeable #-} {- -Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,6 +31,7 @@ options. -} module Text.Pandoc.Options ( Extension(..) , pandocExtensions + , plainExtensions , strictExtensions , phpMarkdownExtraExtensions , githubMarkdownExtensions @@ -51,6 +53,8 @@ import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) import Data.Monoid +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Individually selectable syntax extensions. data Extension = @@ -74,7 +78,7 @@ data Extension = | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) | Ext_fenced_code_blocks -- ^ Parse fenced code blocks | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags @@ -109,7 +113,8 @@ data Extension = | Ext_implicit_header_references -- ^ Implicit reference links for headers | Ext_line_blocks -- ^ RST style line blocks | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - deriving (Show, Read, Enum, Eq, Ord, Bounded) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable) pandocExtensions :: Set Extension pandocExtensions = Set.fromList @@ -151,6 +156,25 @@ pandocExtensions = Set.fromList , Ext_header_attributes , Ext_implicit_header_references , Ext_line_blocks + , Ext_shortcut_reference_links + ] + +plainExtensions :: Set Extension +plainExtensions = Set.fromList + [ Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_latex_macros + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_strikeout ] phpMarkdownExtraExtensions :: Set Extension @@ -164,6 +188,7 @@ phpMarkdownExtraExtensions = Set.fromList , Ext_intraword_underscores , Ext_header_attributes , Ext_abbreviations + , Ext_shortcut_reference_links ] githubMarkdownExtensions :: Set Extension @@ -180,6 +205,7 @@ githubMarkdownExtensions = Set.fromList , Ext_strikeout , Ext_hard_line_breaks , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links ] multimarkdownExtensions :: Set Extension @@ -202,7 +228,9 @@ multimarkdownExtensions = Set.fromList strictExtensions :: Set Extension strictExtensions = Set.fromList - [ Ext_raw_html ] + [ Ext_raw_html + , Ext_shortcut_reference_links + ] data ReaderOptions = ReaderOptions{ readerExtensions :: Set Extension -- ^ Syntax extensions @@ -220,7 +248,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges -} deriving (Show, Read) +} deriving (Show, Read, Data, Typeable) instance Default ReaderOptions where def = ReaderOptions{ @@ -242,7 +270,7 @@ instance Default ReaderOptions -- Writer options -- -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read) +data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable) data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js @@ -252,18 +280,18 @@ data HTMLMathMethod = PlainMath | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js | KaTeX String String -- url of stylesheet and katex.js - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -272,13 +300,13 @@ data HTMLSlideVariant = S5Slides | DZSlides | RevealJsSlides | NoSlides - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for writers data WriterOptions = WriterOptions @@ -323,7 +351,9 @@ data WriterOptions = WriterOptions , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader - } deriving Show + , writerVerbose :: Bool -- ^ Verbose debugging output + , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + } deriving (Show, Data, Typeable) instance Default WriterOptions where def = WriterOptions { writerStandalone = False @@ -366,6 +396,8 @@ instance Default WriterOptions where , writerReferenceODT = Nothing , writerReferenceDocx = Nothing , writerMediaBag = mempty + , writerVerbose = False + , writerLaTeXArgs = [] } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index d5f7c609d..8f92a3321 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-} {- -Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -36,10 +36,11 @@ import qualified Data.ByteString.Lazy.Char8 as BC import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath +import System.IO (stderr, stdout) import System.Directory import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment -import Control.Monad (unless, (<=<)) +import Control.Monad (unless, when, (<=<)) import qualified Control.Exception as E import Control.Applicative ((<$)) import Data.List (isInfixOf) @@ -70,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' - tex2pdf' tmpdir program source + args = writerLaTeXArgs opts + tex2pdf' (writerVerbose opts) args tmpdir program source handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -106,8 +108,7 @@ convertImages tmpdir (Image ils (src, tit)) = do img <- convertImage tmpdir src newPath <- case img of - Left e -> src <$ - warn ("Unable to convert image `" ++ src ++ "':\n" ++ e) + Left e -> src <$ warn e Right fp -> return fp return (Image ils (newPath, tit)) convertImages _ x = return x @@ -121,7 +122,8 @@ convertImage tmpdir fname = Just "application/pdf" -> doNothing _ -> JP.readImage fname >>= \res -> case res of - Left msg -> return $ Left msg + Left _ -> return $ Left $ "Unable to convert `" ++ + fname ++ "' for use with pdflatex." Right img -> E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ \(e :: E.SomeException) -> return (Left (show e)) @@ -130,22 +132,25 @@ convertImage tmpdir fname = mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: FilePath -- ^ temp directory for output +tex2pdf' :: Bool -- ^ Verbose output + -> [String] -- ^ Arguments to the latex-engine + -> FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' tmpDir program source = do +tex2pdf' verbose args tmpDir program source = do let numruns = if "\\tableofcontents" `isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source + (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' let extramsg = case logmsg of - x | "! Package inputenc Error" `BC.isPrefixOf` x -> - "\nTry running pandoc with --latex-engine=xelatex." + x | ("! Package inputenc Error" `BC.isPrefixOf` x + && program /= "xelatex") + -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" @@ -170,9 +175,9 @@ extractMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: String -> Int -> FilePath -> String +runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram program runsLeft tmpDir source = do +runTeXProgram verbose program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source @@ -185,16 +190,33 @@ runTeXProgram program runsLeft tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir', file'] + "-output-directory", tmpDir'] ++ args ++ [file'] env' <- getEnvironment let sep = searchPathSeparator:[] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] + when (verbose && runNumber == 1) $ do + putStrLn $ "[makePDF] temp dir:" + putStrLn tmpDir' + putStrLn $ "[makePDF] Command line:" + putStrLn $ program ++ " " ++ unwords (map show programArgs) + putStr "\n" + putStrLn $ "[makePDF] Environment:" + mapM_ print env'' + putStr "\n" + putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" + B.readFile file' >>= B.putStr + putStr "\n" (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty - if runsLeft > 1 - then runTeXProgram program (runsLeft - 1) tmpDir source + when verbose $ do + putStrLn $ "[makePDF] Run #" ++ show runNumber + B.hPutStr stdout out + B.hPutStr stderr err + putStr "\n" + if runNumber <= numRuns + then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1fba1e21..c316e9220 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -5,7 +5,7 @@ , MultiParamTypeClasses , FlexibleInstances #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -65,6 +65,7 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, + readWithWarnings, readWithM, testStringWith, guardEnabled, @@ -162,6 +163,8 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, + addWarning, + (<+?>) ) where @@ -175,7 +178,7 @@ import Text.Parsec hiding (token) import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace ) -import Data.List ( intercalate, transpose ) +import Data.List ( intercalate, transpose, isSuffixOf ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, @@ -190,6 +193,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT @@ -312,12 +317,14 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a +parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput setInput str result <- parser + spaces + eof setInput oldInput setPosition oldPos return result @@ -441,18 +448,17 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' - -- We allow punctuation except at the end, since + -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' || - not (isAscii c) + let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference let punct = skipMany1 (char ',') - <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<')) + <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) let uriChunk = skipMany1 wordChar <|> percentEscaped <|> entity @@ -472,7 +478,12 @@ mathInlineWith op cl = try $ do string op notFollowedBy space words' <- many1Till (count 1 (noneOf " \t\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> (char '\\' >> + -- This next clause is needed because \text{..} can + -- contain $, \(\), etc. + (try (string "text" >> + (("\\text" ++) <$> inBalancedBraces 0 "")) + <|> (\c -> ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') @@ -480,6 +491,23 @@ mathInlineWith op cl = try $ do ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' + where + inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces 0 "" = do + c <- anyChar + if c == '{' + then inBalancedBraces 1 "{" + else mzero + inBalancedBraces 0 s = return $ reverse s + inBalancedBraces numOpen ('\\':xs) = do + c <- anyChar + inBalancedBraces numOpen (c:'\\':xs) + inBalancedBraces numOpen xs = do + c <- anyChar + case c of + '}' -> inBalancedBraces (numOpen - 1) (c:xs) + '{' -> inBalancedBraces (numOpen + 1) (c:xs) + _ -> inBalancedBraces numOpen (c:xs) mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String mathDisplayWith op cl = try $ do @@ -837,27 +865,27 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp +readWithWarnings :: Parser [Char] ParserState a + -> ParserState + -> String + -> Either PandocError (a, [String]) +readWithWarnings p = readWith $ do + doc <- p + warnings <- stateWarnings <$> getState + return (doc, warnings) + -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a @@ -874,7 +902,8 @@ data ParserState = ParserState stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateKeys :: KeyTable, -- ^ List of reference keys + stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) @@ -888,10 +917,9 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: - -- roles), 3) Source language annotation for code (could be used to - -- annotate role classes too). + -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context @@ -973,6 +1001,7 @@ defaultParserState = stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, + stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], stateNotes' = [], @@ -1034,7 +1063,9 @@ type NoteTable' = [(String, F Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) toKey :: String -> Key -toKey = Key . map toLower . unwords . words +toKey = Key . map toLower . unwords . words . unbracket + where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs + unbracket xs = xs type KeyTable = M.Map Key Target @@ -1178,7 +1209,7 @@ citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) char '@' - firstChar <- letter <|> char '_' + firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") @@ -1223,3 +1254,13 @@ applyMacros' target = do then do macros <- extractMacros <$> getState return $ applyMacros macros target else return target + +-- | Append a warning to the log. +addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () +addWarning mbpos msg = + updateState $ \st -> st{ + stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : + stateWarnings st } +infixr 5 <+?> +(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a +a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 1e72c2040..5e6450746 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,6 +1,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a) => Doc -> DocState a renderDoc = renderList . toList . unDoc +data IsBlock = IsBlock Int [String] + +-- This would be nicer with a pattern synonym +-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) + renderList :: (IsString a, Monoid a) => [D] -> DocState a renderList [] = return () @@ -286,6 +291,9 @@ renderList (BlankLines num : xs) = do | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n") renderList xs +renderList (CarriageReturn : BlankLines m : xs) = + renderList (BlankLines m : xs) + renderList (CarriageReturn : xs) = do st <- get if newlines st > 0 || null xs @@ -320,11 +328,11 @@ renderList (BreakingSpace : xs) = do outp 1 " " renderList xs' -renderList (b1@Block{} : b2@Block{} : xs) = - renderList (mergeBlocks False b1 b2 : xs) +renderList (Block i1 s1 : Block i2 s2 : xs) = + renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) -renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = - renderList (mergeBlocks True b1 b2 : xs) +renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = + renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) renderList (Block width lns : xs) = do st <- get @@ -336,15 +344,14 @@ renderList (Block width lns : xs) = do modify $ \s -> s{ prefix = oldPref } renderList xs -mergeBlocks :: Bool -> D -> D -> D -mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = +mergeBlocks :: Bool -> IsBlock -> IsBlock -> D +mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs -mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" blockToDoc :: Int -> [String] -> Doc blockToDoc _ lns = text $ intercalate "\n" lns @@ -531,4 +538,4 @@ charWidth c = -- | Get real length of string, taking into account combining and double-wide -- characters. realLength :: String -> Int -realLength = sum . map charWidth +realLength = foldr (\a b -> charWidth a + b) 0 diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 19872b405..e5245638d 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2014 John MacFarlane + Copyright : Copyright (C) 2013-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs new file mode 100644 index 000000000..51a35c8ad --- /dev/null +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -0,0 +1,119 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of CommonMark-formatted plain text to 'Pandoc' document. + +CommonMark is a strongly specified variant of Markdown: http://commonmark.org. +-} +module Text.Pandoc.Readers.CommonMark (readCommonMark) +where + +import CMark +import Data.Text (unpack, pack) +import Data.List (groupBy) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Error + +-- | Parse a CommonMark formatted string into a 'Pandoc' structure. +readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc +readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack + where opts' = if readerSmart opts + then [optNormalize, optSmart] + else [optNormalize] + +nodeToPandoc :: Node -> Pandoc +nodeToPandoc (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr addBlock [] nodes +nodeToPandoc n = -- shouldn't happen + Pandoc nullMeta $ foldr addBlock [] [n] + +addBlocks :: [Node] -> [Block] +addBlocks = foldr addBlock [] + +addBlock :: Node -> [Block] -> [Block] +addBlock (Node _ PARAGRAPH nodes) = + (Para (addInlines nodes) :) +addBlock (Node _ HRULE _) = + (HorizontalRule :) +addBlock (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks nodes) :) +addBlock (Node _ (HTML t) _) = + (RawBlock (Format "html") (unpack t) :) +addBlock (Node _ (CODE_BLOCK info t) _) = + (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) +addBlock (Node _ (HEADER lev) nodes) = + (Header lev ("",[],[]) (addInlines nodes) :) +addBlock (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks . children) nodes) :) + where constructor = case listType listAttrs of + BULLET_LIST -> BulletList + ORDERED_LIST -> OrderedList + (start, DefaultStyle, delim) + start = listStart listAttrs + setTightness = if listTight listAttrs + then map paraToPlain + else id + paraToPlain (Para xs) = Plain (xs) + paraToPlain x = x + delim = case listDelim listAttrs of + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock (Node _ ITEM _) = id -- handled in LIST +addBlock _ = id + +children :: Node -> [Node] +children (Node _ _ ns) = ns + +addInlines :: [Node] -> [Inline] +addInlines = foldr addInline [] + +addInline :: Node -> [Inline] -> [Inline] +addInline (Node _ (TEXT t) _) = (map toinl clumps ++) + where raw = unpack t + clumps = groupBy samekind raw + samekind ' ' ' ' = True + samekind ' ' _ = False + samekind _ ' ' = False + samekind _ _ = True + toinl (' ':_) = Space + toinl xs = Str xs +addInline (Node _ LINEBREAK _) = (LineBreak :) +addInline (Node _ SOFTBREAK _) = (Space :) +addInline (Node _ (INLINE_HTML t) _) = + (RawInline (Format "html") (unpack t) :) +addInline (Node _ (CODE t) _) = + (Code ("",[],[]) (unpack t) :) +addInline (Node _ EMPH nodes) = + (Emph (addInlines nodes) :) +addInline (Node _ STRONG nodes) = + (Strong (addInlines nodes) :) +addInline (Node _ (LINK url title) nodes) = + (Link (addInlines nodes) (unpack url, unpack title) :) +addInline (Node _ (IMAGE url title) nodes) = + (Image (addInlines nodes) (unpack url, unpack title) :) +addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 59ff3e717..3cc2a4479 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -15,6 +15,9 @@ import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Compat.Except +import Data.Default {- @@ -70,8 +73,8 @@ List of all DocBook tags, with [x] indicating implemented, [x] book - A book [x] bookinfo - Meta-information for a Book [x] bridgehead - A free-floating heading -[ ] callout - A “called out” description of a marked Area -[ ] calloutlist - A list of Callouts +[x] callout - A “called out” description of a marked Area +[x] calloutlist - A list of Callouts [x] caption - A caption [x] caution - A note of caution [x] chapter - A chapter, as of a book @@ -81,7 +84,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] citerefentry - A citation to a reference page [ ] citetitle - The title of a cited work [ ] city - The name of a city in an address -[ ] classname - The name of a class, in the object-oriented programming sense +[x] classname - The name of a class, in the object-oriented programming sense [ ] classsynopsis - The syntax summary for a class definition [ ] classsynopsisinfo - Information supplementing the contents of a ClassSynopsis @@ -164,21 +167,24 @@ List of all DocBook tags, with [x] indicating implemented, [x] glossseealso - A cross-reference from one GlossEntry to another [x] glossterm - A glossary term [ ] graphic - A displayed graphical object (not an inline) + Note: in DocBook v5 `graphic` is discarded [ ] graphicco - A graphic that contains callout areas + Note: in DocBook v5 `graphicco` is discarded [ ] group - A group of elements in a CmdSynopsis [ ] guibutton - The text on a button in a GUI [ ] guiicon - Graphic and/or text appearing as a icon in a GUI [ ] guilabel - The text of a label in a GUI -[ ] guimenu - The name of a menu in a GUI -[ ] guimenuitem - The name of a terminal menu item in a GUI -[ ] guisubmenu - The name of a submenu in a GUI +[x] guimenu - The name of a menu in a GUI +[x] guimenuitem - The name of a terminal menu item in a GUI +[x] guisubmenu - The name of a submenu in a GUI [ ] hardware - A physical part of a computer system [ ] highlights - A summary of the main points of the discussed component [ ] holder - The name of the individual or organization that holds a copyright [o] honorific - The title of a person [ ] html:form - An HTML form -[ ] imagedata - Pointer to external image data -[ ] imageobject - A wrapper for image data and its associated meta-information +[x] imagedata - Pointer to external image data (only `fileref` attribute + implemented but not `entityref` which would require parsing of the DTD) +[x] imageobject - A wrapper for image data and its associated meta-information [ ] imageobjectco - A wrapper for an image object with callouts [x] important - An admonition set off from the text [x] index - An index @@ -206,10 +212,10 @@ List of all DocBook tags, with [x] indicating implemented, other dingbat [ ] itermset - A set of index terms in the meta-information of a document [ ] jobtitle - The title of an individual in an organization -[ ] keycap - The text printed on a key on a keyboard +[x] keycap - The text printed on a key on a keyboard [ ] keycode - The internal, frequently numeric, identifier for a key on a keyboard -[ ] keycombo - A combination of input actions +[x] keycombo - A combination of input actions [ ] keysym - The symbolic name of a key on a keyboard [ ] keyword - One of a set of keywords describing the content of a document [ ] keywordset - A set of keywords describing the content of a document @@ -237,7 +243,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] mediaobject - A displayed media object (video, audio, image, etc.) [ ] mediaobjectco - A media object that contains callouts [x] member - An element of a simple list -[ ] menuchoice - A selection or series of selections from a menu +[x] menuchoice - A selection or series of selections from a menu [ ] methodname - The name of a method [ ] methodparam - Parameters to a method [ ] methodsynopsis - A syntax summary for a method @@ -471,7 +477,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] token - A unit of information [x] tr - A row in an HTML table [ ] trademark - A trademark -[ ] type - The classification of a value +[x] type - The classification of a value [x] ulink - A link that addresses its target by means of a URL (Uniform Resource Locator) [x] uri - A Uniform Resource Identifier @@ -497,7 +503,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = State DBState +type DB = ExceptT PandocError (State DBState) data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -507,16 +513,18 @@ data DBState = DBState{ dbSectionLevel :: Int , dbFigureTitle :: Inlines } deriving Show -readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') - DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - } +instance Default DBState where + def = DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote + , dbMeta = mempty + , dbAcceptsMeta = False + , dbBook = False + , dbFigureTitle = mempty } + + +readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc +readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs + where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' inp' = handleInstructions inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it @@ -603,7 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", - "screen","programlisting","example"] + "screen","programlisting","example","calloutlist"] isBlockElement _ = False -- Trim leading and trailing newline characters @@ -622,18 +630,24 @@ addToStart toadd bs = -- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) -getImage :: Element -> DB Inlines -getImage e = do +-- A DocBook mediaobject is a wrapper around a set of alternative presentations +getMediaobject :: Element -> DB Inlines +getMediaobject e = do imageUrl <- case filterChild (named "imageobject") e of Nothing -> return mempty Just z -> case filterChild (named "imagedata") z of Nothing -> return mempty Just i -> return $ attrValue "fileref" i - caption <- case filterChild - (\x -> named "caption" x || named "textobject" x) e of - Nothing -> gets dbFigureTitle - Just z -> mconcat <$> (mapM parseInline $ elContent z) - return $ image imageUrl "" caption + let getCaption el = case filterChild (\x -> named "caption" x + || named "textobject" x + || named "alt" x) el of + Nothing -> return mempty + Just z -> mconcat <$> (mapM parseInline $ elContent z) + figTitle <- gets dbFigureTitle + let (caption, title) = if isNull figTitle + then (getCaption e, "") + else (return figTitle, "fig:") + liftM (image imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -712,6 +726,7 @@ parseBlock (Elem e) = "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e "abstract" -> blockQuote <$> getBlocks e + "calloutlist" -> bulletList <$> callouts "itemizedlist" -> bulletList <$> listitems "orderedlist" -> do let listStyle = case attrValue "numeration" e of @@ -728,7 +743,7 @@ parseBlock (Elem e) = <$> listitems "variablelist" -> definitionList <$> deflistitems "figure" -> getFigure e - "mediaobject" -> para <$> getImage e + "mediaobject" -> para <$> getMediaobject e "caption" -> return mempty "info" -> metaBlock "articleinfo" -> metaBlock @@ -772,11 +787,6 @@ parseBlock (Elem e) = x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e - strContentRecursive = strContent . (\e' -> e'{ elContent = - map elementToStr $ elContent e' }) - elementToStr :: Content -> Content - elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing - elementToStr x = x parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -785,6 +795,7 @@ parseBlock (Elem e) = contents <- getBlocks e return $ blockQuote (contents <> attrib) listitems = mapM getBlocks $ filterChildren (named "listitem") e + callouts = mapM getBlocks $ filterChildren (named "callout") e deflistitems = mapM parseVarListEntry $ filterChildren (named "varlistentry") e parseVarListEntry e' = do @@ -866,18 +877,29 @@ parseBlock (Elem e) = parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry sect n = do isbook <- gets dbBook let n' = if isbook || n == 0 then n + 1 else n - headerText <- case filterChild (named "title") e of + headerText <- case filterChild (named "title") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "title")) of Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e + let ident = attrValue "id" e modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ header n' headerText <> b + return $ headerWith (ident,[],[]) n' headerText <> b metaBlock = acceptingMetadata (getBlocks e) >> return mempty getInlines :: Element -> DB Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') +strContentRecursive :: Element -> String +strContentRecursive = strContent . + (\e' -> e'{ elContent = map elementToStr $ elContent e' }) + +elementToStr :: Content -> Content +elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing +elementToStr x = x + parseInline :: Content -> DB Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = @@ -889,7 +911,7 @@ parseInline (Elem e) = "inlineequation" -> equation math "subscript" -> subscript <$> innerInlines "superscript" -> superscript <$> innerInlines - "inlinemediaobject" -> getImage e + "inlinemediaobject" -> getMediaobject e "quote" -> do qt <- gets dbQuoteType let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote @@ -901,6 +923,7 @@ parseInline (Elem e) = else doubleQuoted contents "simplelist" -> simpleList "segmentedlist" -> segmentedList + "classname" -> codeWithLang "code" -> codeWithLang "filename" -> codeWithLang "literal" -> codeWithLang @@ -920,6 +943,10 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" + "keycap" -> return (str $ strContent e) + "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) + "menuchoice" -> menuchoice <$> (mapM parseInline $ + filter isGuiMenu $ elContent e) "xref" -> return $ str "?" -- so at least you know something is there "email" -> return $ link ("mailto:" ++ strContent e) "" $ str $ strContent e @@ -959,7 +986,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContent e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -974,3 +1001,10 @@ parseInline (Elem e) = then mempty else strong tit <> linebreak return $ linebreak <> tit' <> segs + keycombo = spanWith ("",["keycombo"],[]) . + mconcat . intersperse (str "+") + menuchoice = spanWith ("",["menuchoice"],[]) . + mconcat . intersperse (text " > ") + isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || + named "guimenuitem" x + isGuiMenu _ = False diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4b5fbfdfc..67a97ae85 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,8 +84,7 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Data.Maybe (isJust) -import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf) +import Data.List (delete, (\\), intersect) import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) @@ -97,14 +96,17 @@ import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Error +import Text.Pandoc.Compat.Except + readDocx :: ReaderOptions -> B.ByteString - -> (Pandoc, MediaBag) + -> Either PandocError (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> (Pandoc meta blks, mediaBag) where - (meta, blks, mediaBag) = (docxToOutput opts docx) - Left _ -> error $ "couldn't parse docx file" + Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) + <$> (docxToOutput opts docx) + Left _ -> Left (ParseFailure "couldn't parse docx file") data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -123,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ReaderT DEnv (State DState) +type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) -evalDocxContext :: DocxContext a -> DEnv -> DState -> a -evalDocxContext ctx env st = evalState (runReaderT ctx env) st +evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a +evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -197,19 +199,9 @@ fixAuthors mv = mv codeStyles :: [String] codeStyles = ["VerbatimChar"] -blockQuoteDivs :: [String] -blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"] - codeDivs :: [String] codeDivs = ["SourceCode"] - --- For the moment, we have English, Danish, German, and French. This --- is fairly ad-hoc, and there might be a more systematic way to do --- it, but it's better than nothing. -headerPrefixes :: [String] -headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"] - runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak @@ -288,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs , s `elem` codeStyles = - return $ code $ concatMap runElemToString runElems + let rPr = resolveDependentRunStyle rs + codeString = code $ concatMap runElemToString runElems + in + return $ case rVertAlign rPr of + Just SupScrpt -> superscript codeString + Just SubScrpt -> subscript codeString + _ -> codeString | otherwise = do let ils = concatReduce (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils @@ -408,7 +406,9 @@ singleParaToPlain blks singleParaToPlain blks = blks cellToBlocks :: Cell -> DocxContext Blocks -cellToBlocks (Cell bps) = concatReduce <$> mapM bodyPartToBlocks bps +cellToBlocks (Cell bps) = do + blks <- concatReduce <$> mapM bodyPartToBlocks bps + return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks rowToBlocksList :: Row -> DocxContext [Blocks] rowToBlocksList (Row cells) = do @@ -434,9 +434,9 @@ parStyleToTransform pPr let pPr' = pPr { pStyle = cs, indentation = Nothing} in (divWith ("", [c], [])) . (parStyleToTransform pPr') - | (c:cs) <- pStyle pPr - , c `elem` blockQuoteDivs = - let pPr' = pPr { pStyle = cs \\ blockQuoteDivs } + | (_:cs) <- pStyle pPr + , Just True <- pBlockQuote pPr = + let pPr' = pPr { pStyle = cs } in blockQuote . (parStyleToTransform pPr') | (_:cs) <- pStyle pPr = @@ -467,12 +467,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ parStyleToTransform pPr $ codeBlock $ concatMap parPartToString parparts - | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr - , Just (prefix, n) <- isHeaderClass c = do + | Just (style, n) <- pHeading pPr = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ (concatReduce <$> mapM parPartToInlines parparts) makeHeaderAnchor $ - headerWith ("", delete (prefix ++ show n) cs, []) n ils + headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do ils <- concatReduce <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) @@ -555,16 +554,7 @@ bodyToOutput (Body bps) = do blks', mediaBag) -docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def - -isHeaderClass :: String -> Maybe (String, Int) -isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes - , Just s' <- stripPrefix pref s = - case reads s' :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just (pref, n) - _ -> Nothing -isHeaderClass _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 2945a1eda..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, ViewPatterns #-} +{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-} {- Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> @@ -65,7 +65,8 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) -import Data.Char (readLitChar, ord, chr) +import Text.Pandoc.Readers.Docx.Util +import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes , envNumbering :: Numbering @@ -73,6 +74,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envMedia :: Media , envFont :: Maybe Font , envCharStyles :: CharStyleMap + , envParStyles :: ParStyleMap } deriving Show @@ -107,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -122,8 +122,12 @@ type Media = [(FilePath, B.ByteString)] type CharStyle = (String, RunStyle) +type ParStyle = (String, ParStyleData) + type CharStyleMap = M.Map String RunStyle +type ParStyleMap = M.Map String ParStyleData + data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show @@ -152,6 +156,9 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool + , pHeading :: Maybe (String, Int) + , pNumInfo :: Maybe (String, String) + , pBlockQuote :: Maybe Bool } deriving Show @@ -159,6 +166,9 @@ defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False + , pHeading = Nothing + , pNumInfo = Nothing + , pBlockQuote = Nothing } @@ -213,6 +223,12 @@ data RunStyle = RunStyle { isBold :: Maybe Bool , rStyle :: Maybe CharStyle} deriving Show +data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) + , isBlockQuote :: Maybe Bool + , numInfo :: Maybe (String, String) + , psStyle :: Maybe ParStyle} + deriving Show + defaultRunStyle :: RunStyle defaultRunStyle = RunStyle { isBold = Nothing , isItalic = Nothing @@ -232,18 +248,14 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive media = archiveToMedia archive - styles = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles + (styles, parstyles) = archiveToStyles archive + rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -252,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -263,47 +275,69 @@ elemToBody ns element | isElem ns "w" "body" element = (\bps -> return $ Body bps) elemToBody _ _ = throwError WrongElem -archiveToStyles :: Archive -> CharStyleMap +archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) archiveToStyles zf = let stylesElem = findEntryByPath "word/styles.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) in case stylesElem of - Nothing -> M.empty + Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in - M.fromList $ buildBasedOnList namespaces styElem Nothing + ( M.fromList $ buildBasedOnList namespaces styElem + (Nothing :: Maybe CharStyle), + M.fromList $ buildBasedOnList namespaces styElem + (Nothing :: Maybe ParStyle) ) -isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool +isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttr (elemName ns "w" "type") element + , styleType == cStyleType parentStyle , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>= findAttr (elemName ns "w" "val") - , Just (parentId, _) <- parentStyle = (basedOnVal == parentId) + , Just ps <- parentStyle = (basedOnVal == getStyleId ps) | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttr (elemName ns "w" "type") element + , styleType == cStyleType parentStyle , Nothing <- findChild (elemName ns "w" "basedOn") element , Nothing <- parentStyle = True | otherwise = False -elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle -elemToCharStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element - , Just styleId <- findAttr (elemName ns "w" "styleId") element = - Just (styleId, elemToRunStyle ns element parentStyle) - | otherwise = Nothing - -getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle] +class ElemToStyle a where + cStyleType :: Maybe a -> String + elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a + getStyleId :: a -> String + +instance ElemToStyle CharStyle where + cStyleType _ = "character" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleId <- findAttr (elemName ns "w" "styleId") element = + Just (styleId, elemToRunStyle ns element parentStyle) + | otherwise = Nothing + getStyleId s = fst s + +instance ElemToStyle ParStyle where + cStyleType _ = "paragraph" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "paragraph" <- findAttr (elemName ns "w" "type") element + , Just styleId <- findAttr (elemName ns "w" "styleId") element = + Just (styleId, elemToParStyleData ns element parentStyle) + | otherwise = Nothing + getStyleId s = fst s + +getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] getStyleChildren ns element parentStyle | isElem ns "w" "styles" element = - mapMaybe (\e -> elemToCharStyle ns e parentStyle) $ + mapMaybe (\e -> elemToStyle ns e parentStyle) $ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element | otherwise = [] -buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle] +buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] buildBasedOnList ns element rootStyle = case (getStyleChildren ns element rootStyle) of [] -> [] @@ -317,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -420,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -449,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element @@ -510,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger} elemToParIndentation _ _ = Nothing - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element | isElem ns "w" "p" element = do - let pPr = findChild (elemName ns "w" "pPr") element - numPr = pPr >>= findChild (elemName ns "w" "numPr") - lvl <- numPr >>= - findChild (elemName ns "w" "ilvl") >>= - findAttr (elemName ns "w" "val") - numId <- numPr >>= - findChild (elemName ns "w" "numId") >>= - findAttr (elemName ns "w" "val") - return (numId, lvl) -elemToNumInfo _ _ = Nothing - testBitMask :: String -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of @@ -542,18 +553,28 @@ elemToBodyPart ns element return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element - , Just (numId, lvl) <- elemToNumInfo ns element = do - let parstyle = elemToParagraphStyle ns element + , Just (numId, lvl) <- getNumInfo ns element = do + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) num <- asks envNumbering case lookupLevel numId lvl num of - Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> throwError WrongElem elemToBodyPart ns element | isElem ns "w" "p" element = do - let parstyle = elemToParagraphStyle ns element - parparts <- mapD (elemToParPart ns) (elChildren element) - return $ Paragraph parstyle parparts + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty + parparts <- mapD (elemToParPart ns) (elChildren element) + case pNumInfo parstyle of + Just (numId, lvl) -> do + num <- asks envNumbering + case lookupLevel numId lvl num of + Just levelInfo -> + return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> + throwError WrongElem + Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -601,6 +622,16 @@ elemToParPart ns element case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) Nothing -> throwError WrongElem +-- The below is an attempt to deal with images in deprecated vml format. +elemToParPart ns element + | isElem ns "w" "r" element + , Just _ <- findChild (elemName ns "w" "pict") element = + let drawing = findElement (elemName ns "v" "imagedata") element + >>= findAttr (elemName ns "r" "id") + in + case drawing of + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) + Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element = elemToRun ns element >>= (\r -> return $ PlainRun r) @@ -625,17 +656,20 @@ elemToParPart ns element return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttr (elemName ns "w" "anchor") element = do + , Just relId <- findAttr (elemName ns "r" "id") element = do runs <- mapD (elemToRun ns) (elChildren element) - return $ InternalHyperLink anchor runs + rels <- asks envRelationships + case lookupRelationship relId rels of + Just target -> do + case findAttr (elemName ns "w" "anchor") element of + Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + Nothing -> return $ ExternalHyperLink target runs + Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttr (elemName ns "r" "id") element = do + , Just anchor <- findAttr (elemName ns "w" "anchor") element = do runs <- mapD (elemToRun ns) (elChildren element) - rels <- asks envRelationships - return $ case lookupRelationship relId rels of - Just target -> ExternalHyperLink target runs - Nothing -> ExternalHyperLink "" runs + return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "m" "oMath" element = (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) @@ -684,14 +718,30 @@ elemToRun ns element return $ Run runStyle runElems elemToRun _ _ = throwError WrongElem -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element +getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a +getParentStyleValue field style + | Just value <- field style = Just value + | Just parentStyle <- psStyle style + = getParentStyleValue field (snd parentStyle) +getParentStyleValue _ _ = Nothing + +getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] -> + Maybe a +getParStyleField field stylemap styles + | x <- mapMaybe (\x -> M.lookup x stylemap) styles + , (y:_) <- mapMaybe (getParentStyleValue field) x + = Just y +getParStyleField _ _ _ = Nothing + +elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle +elemToParagraphStyle ns element sty | Just pPr <- findChild (elemName ns "w" "pPr") element = - ParagraphStyle - {pStyle = + let style = mapMaybe (findAttr (elemName ns "w" "val")) (findChildren (elemName ns "w" "pStyle") pPr) + in ParagraphStyle + {pStyle = style , indentation = findChild (elemName ns "w" "ind") pPr >>= elemToParIndentation ns @@ -703,8 +753,11 @@ elemToParagraphStyle ns element Just "none" -> False Just _ -> True Nothing -> False + , pHeading = getParStyleField headingLev sty style + , pNumInfo = getParStyleField numInfo sty style + , pBlockQuote = getParStyleField isBlockQuote sty style } -elemToParagraphStyle _ _ = defaultParagraphStyle +elemToParagraphStyle _ _ _ = defaultParagraphStyle checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag @@ -758,6 +811,59 @@ elemToRunStyle ns element parentStyle } elemToRunStyle _ _ _ = defaultRunStyle +isNumericNotNull :: String -> Bool +isNumericNotNull str = (str /= []) && (all isDigit str) + +getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) +getHeaderLevel ns element + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , Just index <- stripPrefix "Heading" styleId + , isNumericNotNull index = Just (styleId, read index) + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , Just index <- findChild (elemName ns "w" "name") element >>= + findAttr (elemName ns "w" "val") >>= + stripPrefix "heading " + , isNumericNotNull index = Just (styleId, read index) +getHeaderLevel _ _ = Nothing + +blockQuoteStyleIds :: [String] +blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"] + +blockQuoteStyleNames :: [String] +blockQuoteStyleNames = ["Quote", "Block Text"] + +getBlockQuote :: NameSpaces -> Element -> Maybe Bool +getBlockQuote ns element + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , styleId `elem` blockQuoteStyleIds = Just True + | Just styleName <- findChild (elemName ns "w" "name") element >>= + findAttr (elemName ns "w" "val") + , styleName `elem` blockQuoteStyleNames = Just True +getBlockQuote _ _ = Nothing + +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChild (elemName ns "w" "pPr") element >>= + findChild (elemName ns "w" "numPr") + lvl = fromMaybe "0" (numPr >>= + findChild (elemName ns "w" "ilvl") >>= + findAttr (elemName ns "w" "val")) + numId <- numPr >>= + findChild (elemName ns "w" "numId") >>= + findAttr (elemName ns "w" "val") + return (numId, lvl) + + +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData +elemToParStyleData ns element parentStyle = + ParStyleData + { + headingLev = getHeaderLevel ns element + , isBlockQuote = getBlockQuote ns element + , numInfo = getNumInfo ns element + , psStyle = parentStyle + } + elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element | isElem ns "w" "t" element diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2901ea2a3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,106 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a +insert (Just k) (Just v) m = alterMap (M.insert k v) m +insert _ _ m = m + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = State StyleMaps a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = execState genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + genStyleItem e = do + styleType <- getStyleType e + styleId <- getAttrStyleId e + nameValLowercase <- fmap (map toLower) `fmap` getNameVal e + case styleType of + Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId + _ -> return () + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + +getStyleType :: Element -> StateM (Maybe StyleType) +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + Just "paragraph" -> return $ Just ParaStyle + Just "character" -> return $ Just CharStyle + _ -> return Nothing + +getAttrType :: Element -> StateM (Maybe String) +getAttrType el = do + name <- elemName' "type" + return $ findAttr name el + +getAttrStyleId :: Element -> StateM (Maybe String) +getAttrStyleId el = do + name <- elemName' "styleId" + return $ findAttr name el + +getNameVal :: Element -> StateM (Maybe String) +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + return $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b061d8683..338540533 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) +import Text.Pandoc.Error + type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) +readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) -runEPUB :: Except String a -> a -runEPUB = either error id . runExcept +runEPUB :: Except PandocError a -> Either PandocError a +runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -64,19 +66,20 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root </> path) archive - return $ fixInternalReferences path . + html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname + return $ fixInternalReferences path html mimeToReader s _ path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty @@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -130,7 +133,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -141,7 +144,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError String m => Element -> m Meta +parseMeta :: MonadError PandocError m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -159,7 +162,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError String m => Archive -> m (String, Element) +getManifest :: MonadError PandocError m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError String m => QName -> Element -> m String +findAttrE :: MonadError PandocError m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry +findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError String m => String -> m Element +parseXMLDocE :: MonadError PandocError m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError String m => QName -> Element -> m Element +findElementE :: MonadError PandocError m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError String m => String -> Maybe a -> m a -mkE s = maybe (throwError s) return +mkE :: MonadError PandocError m => String -> Maybe a -> m a +mkE s = maybe (throwError . ParseFailure $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4e0bb375a..fcba16e04 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, +ViewPatterns#-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -43,14 +44,14 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' - , escapeURI, safeRead ) + , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) @@ -61,16 +62,20 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Network.URI (isURI) +import Text.Pandoc.Error + +import Text.Parsec.Error -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readHtml opts inp = - case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result + mapLeft (ParseFailure . getError) . flip runReader def $ + runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -78,6 +83,9 @@ readHtml opts inp = meta <- stateMeta . parserState <$> getState bs' <- replaceNotes (B.toList blocks) return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m replaceNotes :: [Block] -> TagParser [Block] replaceNotes = walkM replaceNotes' @@ -91,7 +99,8 @@ replaceNotes' x = return x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)] + noteTable :: [(String, Blocks)], + baseHref :: Maybe String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -113,7 +122,7 @@ pBody :: TagParser Blocks pBody = pInTags "body" block pHead :: TagParser Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do @@ -125,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) return mempty + pBaseTag = do + bt <- pSatisfy (~== TagOpen "base" []) + let baseH = fromAttrib "href" bt + if null baseH + then return mempty + else do + let baseH' = case reverse baseH of + '/':_ -> baseH + _ -> baseH ++ "/" + updateState $ \st -> st{ baseHref = Just baseH' } + return mempty block :: TagParser Blocks block = do @@ -250,7 +270,14 @@ pOrderedList = try $ do "lower-alpha" -> LowerAlpha "upper-alpha" -> UpperAlpha "decimal" -> Decimal - _ -> DefaultStyle + _ -> + case lookup "type" attribs of + Just "1" -> Decimal + Just "I" -> UpperRoman + Just "i" -> LowerRoman + Just "A" -> UpperAlpha + Just "a" -> LowerAlpha + _ -> DefaultStyle let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -373,13 +400,21 @@ pTable = try $ do skipMany pBlank caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank -- TODO actually read these and take width information from them - widths' <- pColgroup <|> many pCol - head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") - skipMany pBlank - rows <- pOptInTag "tbody" - $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") - skipMany pBlank + widths' <- (mconcat <$> many1 pColgroup) <|> many pCol + let pTh = option [] $ pInTags "tr" (pCell "th") + pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") + pTBody = do pOptInTag "tbody" $ many1 pTr + head'' <- pOptInTag "thead" pTh + head' <- pOptInTag "tbody" $ do + if null head'' + then pTh + else return head'' + rowsLs <- many pTBody + rows' <- pOptInTag "tfoot" $ many pTr TagClose _ <- pSatisfy (~== TagClose "table") + let rows = (concat rowsLs) ++ rows' + -- fail on empty table + guard $ not $ null head' && null rows let isSinglePlain x = case B.toList x of [Plain _] -> True _ -> False @@ -551,7 +586,11 @@ pAnchor = try $ do pRelLink :: TagParser Inlines pRelLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) - let url = fromAttrib "href" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "href" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let spanC = case uid of @@ -563,7 +602,11 @@ pRelLink = try $ do pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - let url = fromAttrib "src" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "src" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag return $ B.image (escapeURI url) title (B.text alt) @@ -624,14 +667,17 @@ pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) mconcat <$> manyTill parser (pCloses tagtype <|> eof) -pOptInTag :: String -> TagParser a - -> TagParser a -pOptInTag tagtype parser = try $ do - open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True) +-- parses p, preceeded by an optional opening tag +-- and followed by an optional closing tags +pOptInTag :: String -> TagParser a -> TagParser a +pOptInTag tagtype p = try $ do + skipMany pBlank + optional $ pSatisfy (~== TagOpen tagtype []) + skipMany pBlank + x <- p skipMany pBlank - x <- parser + optional $ pSatisfy (~== TagClose tagtype) skipMany pBlank - when open $ pCloses tagtype return x pCloses :: String -> TagParser () @@ -740,7 +786,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space -- eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", +eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", "del", "ins", "progress", "map", "area", "noscript", "script", "object", "svg", "video", "source"] @@ -758,7 +804,7 @@ blockHtmlTags :: [String] blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", "blockquote", "body", "button", "canvas", "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "embed", "fieldset", "figcaption", "figure", + "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "isindex", "menu", "noframes", "ol", "output", "p", "pre", @@ -815,6 +861,7 @@ isCommentTag = tagComment (const True) closes :: String -> String -> Bool _ `closes` "body" = False _ `closes` "html" = False +"body" `closes` "head" = True "a" `closes` "a" = True "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True @@ -856,7 +903,7 @@ htmlInBalanced :: (Monad m) -> ParserT String st m String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f - guard $ '/' `notElem` tag -- not a self-closing tag + guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag let stopper = htmlTag (~== TagClose t) let anytag = snd <$> htmlTag (const True) contents <- many $ notFollowedBy' stopper >> @@ -869,17 +916,26 @@ htmlTag :: Monad m => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do - lookAhead $ char '<' >> (oneOf "/!?" <|> letter) - (next : _) <- getInput >>= return . canonicalizeTags . parseTags + lookAhead (char '<') + inp <- getInput + let hasTagWarning (TagWarning _:_) = True + hasTagWarning _ = False + let (next : rest) = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = True } inp guard $ f next - -- advance the parser case next of - TagComment s -> do + TagComment s + | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' return (next, "<!--" ++ s ++ "-->") + | otherwise -> fail "bogus comment mode, HTML5 parse error" _ -> do + -- we get a TagWarning on things like + -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> + -- which should NOT be parsed as an HTML tag, see #2277 + guard $ not $ hasTagWarning rest rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") @@ -925,7 +981,7 @@ instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState instance Default HTMLState where - def = HTMLState def [] + def = HTMLState def [] Nothing instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 4b46c869d..aa2534afc 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -25,11 +26,18 @@ import Documentation.Haddock.Parser import Documentation.Haddock.Types import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse - -> Pandoc -readHaddock opts = B.doc . docHToBlocks . trace' . parseParas + -> Either PandocError Pandoc +readHaddock opts = +#if MIN_VERSION_haddock_library(1,2,0) + Right . B.doc . docHToBlocks . trace' . _doc . parseParas +#else + Right . B.doc . docHToBlocks . trace' . parseParas +#endif where trace' x = if readerTrace opts then trace (show x) x else x diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9f51e9a8f..0da912ea6 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -42,26 +42,25 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Char ( chr, ord ) +import Data.Char ( chr, ord, isLetter, isAlphaNum ) import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter, isAlphaNum) import Control.Applicative import Data.Monoid -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybeToList) import System.Environment (getEnv) -import System.FilePath (replaceExtension, (</>)) -import Data.List (intercalate, intersperse) +import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) +import Data.List (intercalate) import qualified Data.Map as M import qualified Control.Exception as E -import System.FilePath (takeExtension, addExtension) import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc @@ -73,17 +72,16 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' -type LP = Parser [Char] ParserState +type LP = Parser String ParserState anyControlSeq :: LP String anyControlSeq = do char '\\' next <- option '\n' anyChar - name <- case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - return name + case next of + '\n' -> return "" + c | isLetter c -> (c:) <$> (many letter <* optional sp) + | otherwise -> return [c] controlSeq :: String -> LP String controlSeq name = try $ do @@ -103,7 +101,7 @@ dimenarg = try $ do sp :: LP () sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline) + <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' @@ -161,50 +159,47 @@ mathInline :: LP String -> LP Inlines mathInline p = math <$> (try p >>= applyMacros') mathChars :: LP String -mathChars = concat <$> - many ( many1 (satisfy (\c -> c /= '$' && c /='\\')) - <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) - ) +mathChars = (concat <$>) $ + many $ + many1 (satisfy (\c -> c /= '$' && c /='\\')) + <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar) quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines quoted' f starter ender = do startchs <- starter try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs -double_quote :: LP Inlines -double_quote = - ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") +doubleQuote :: LP Inlines +doubleQuote = + quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') -- the following is used by babel for localized quotes: <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') - ) -single_quote :: LP Inlines -single_quote = - ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) +singleQuote :: LP Inlines +singleQuote = + quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - ) inline :: LP Inlines inline = (mempty <$ comment) <|> (space <$ sp) <|> inlineText <|> inlineCommand + <|> inlineEnvironment <|> inlineGroup <|> (char '-' *> option (str "-") - ((char '-') *> option (str "–") (str "—" <$ char '-'))) - <|> double_quote - <|> single_quote + (char '-' *> option (str "–") (str "—" <$ char '-'))) + <|> doubleQuote + <|> singleQuote <|> (str "”" <$ try (string "''")) <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') - <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") - <|> (mathInline $ char '$' *> mathChars <* char '$') - <|> (superscript <$> (char '^' *> tok)) - <|> (subscript <$> (char '_' *> tok)) + <|> mathDisplay (string "$$" *> mathChars <* string "$$") + <|> mathInline (char '$' *> mathChars <* char '$') <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str . (:[]) <$> tildeEscape) <|> (str . (:[]) <$> oneOf "[]") @@ -237,20 +232,32 @@ block = (mempty <$ comment) blocks :: LP Blocks blocks = mconcat <$> many block +getRawCommand :: String -> LP String +getRawCommand name' = do + rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + return $ '\\' : name' ++ snd rawargs + +lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where + lookupList l m = msum $ map (`M.lookup` m) l + blockCommand :: LP Blocks blockCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" star <- option "" (string "*" <* optional sp) let name' = name ++ star - case M.lookup name' blockCommands of - Just p -> p - Nothing -> case M.lookup name blockCommands of - Just p -> p - Nothing -> mzero + let raw = do + rawcommand <- getRawCommand name' + transformed <- applyMacros' rawcommand + guard $ transformed /= rawcommand + notFollowedBy $ parseFromString inlines transformed + parseFromString blocks transformed + lookupListDefault raw [name',name] blockCommands inBrackets :: Inlines -> Inlines -inBrackets x = (str "[") <> x <> (str "]") +inBrackets x = str "[" <> x <> str "]" -- eat an optional argument and one or more arguments in braces ignoreInlines :: String -> (String, LP Inlines) @@ -258,19 +265,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author @@ -301,10 +310,10 @@ blockCommands = M.fromList $ -- , ("hrule", pure horizontalRule) , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> loose_item) + , ("item", skipopts *> looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> tok >>= setCaption) + , ("caption", skipopts *> setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= @@ -327,6 +336,7 @@ blockCommands = M.fromList $ , "hyperdef" , "markboth", "markright", "markleft" , "hspace", "vspace" + , "newpage" ] addMeta :: ToMetaValue a => String -> a -> LP () @@ -336,9 +346,16 @@ addMeta field val = updateState $ \st -> splitBibs :: String -> [Inlines] splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') -setCaption :: Inlines -> LP Blocks -setCaption ils = do - updateState $ \st -> st{ stateCaption = Just ils } +setCaption :: LP Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces' >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ stateCaption = Just ils' } return mempty resetCaption :: LP () @@ -361,7 +378,7 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl' contents @@ -374,25 +391,39 @@ inlineCommand = try $ do star <- option "" (string "*") let name' = name ++ star let raw = do - rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) - let rawcommand = '\\' : name ++ star ++ snd rawargs + rawcommand <- getRawCommand name' transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed else if parseRaw then return $ rawInline "latex" rawcommand else return mempty - case M.lookup name' inlineCommands of - Just p -> p <|> raw - Nothing -> case M.lookup name inlineCommands of - Just p -> p <|> raw - Nothing -> raw + lookupListDefault mzero [name',name] inlineCommands + <|> raw unlessParseRaw :: LP () unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool -isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands +isBlockCommand s = s `M.member` blockCommands + + +inlineEnvironments :: M.Map String (LP Inlines) +inlineEnvironments = M.fromList + [ ("displaymath", mathEnv id Nothing "displaymath") + , ("equation", mathEnv id Nothing "equation") + , ("equation*", mathEnv id Nothing "equation*") + , ("gather", mathEnv id (Just "gathered") "gather") + , ("gather*", mathEnv id (Just "gathered") "gather*") + , ("multline", mathEnv id (Just "gathered") "multline") + , ("multline*", mathEnv id (Just "gathered") "multline*") + , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") + , ("align", mathEnv id (Just "aligned") "align") + , ("align*", mathEnv id (Just "aligned") "align*") + , ("alignat", mathEnv id (Just "aligned") "alignat") + , ("alignat*", mathEnv id (Just "aligned") "alignat*") + ] inlineCommands :: M.Map String (LP Inlines) inlineCommands = M.fromList $ @@ -414,9 +445,14 @@ inlineCommands = M.fromList $ , ("sim", lit "~") , ("label", unlessParseRaw >> (inBrackets <$> tok)) , ("ref", unlessParseRaw >> (inBrackets <$> tok)) + , ("noindent", unlessParseRaw >> return mempty) + , ("textgreek", tok) + , ("sep", lit ",") + , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline $ braced) + , ("ensuremath", mathInline braced) + , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") , ("$", lit "$") @@ -464,7 +500,7 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) + , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) , (",", pure mempty) , ("@", pure mempty) , (" ", lit "\160") @@ -477,7 +513,7 @@ inlineCommands = M.fromList $ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) - , ("lstinline", doverb) + , ("lstinline", skipopts *> doverb) , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> @@ -494,6 +530,7 @@ inlineCommands = M.fromList $ , ("citealp", citation "citealp" NormalCitation False) , ("citealp*", citation "citealp*" NormalCitation False) , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) , ("footcite", inNote <$> citation "footcite" NormalCitation False) , ("parencite", citation "parencite" NormalCitation False) , ("supercite", citation "supercite" NormalCitation False) @@ -516,6 +553,7 @@ inlineCommands = M.fromList $ , ("supercites", citation "supercites" NormalCitation True) , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) , ("Footcite", citation "Footcite" NormalCitation False) , ("Parencite", citation "Parencite" NormalCitation False) , ("Supercite", citation "Supercite" NormalCitation False) @@ -542,7 +580,7 @@ inlineCommands = M.fromList $ ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: - [ "noindent", "index" ] + [ "index" ] mkImage :: String -> LP Inlines mkImage src = do @@ -559,7 +597,7 @@ inNote ils = unescapeURL :: String -> String unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` "#$%&~_^\\{}" + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" @@ -585,7 +623,7 @@ lit = pure . str accent :: (Char -> String) -> Inlines -> LP Inlines accent f ils = case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys) + (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) [] -> mzero _ -> return ils @@ -774,7 +812,7 @@ breve 'u' = "ŭ" breve c = [c] tok :: LP Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) +tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar opt :: LP Inlines opt = bracketed inline <* optional sp @@ -786,15 +824,20 @@ inlineText :: LP Inlines inlineText = str <$> many1 inlineChar inlineChar :: LP Char -inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n" +inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" environment :: LP Blocks environment = do controlSeq "begin" name <- braced - case M.lookup name environments of - Just p -> p <|> rawEnv name - Nothing -> rawEnv name + M.findWithDefault mzero name environments + <|> rawEnv name + +inlineEnvironment :: LP Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- braced + M.findWithDefault mzero name inlineEnvironments rawEnv :: String -> LP Blocks rawEnv name = do @@ -807,15 +850,11 @@ rawEnv name = do ---- -type IncludeParser = ParserT [Char] [String] IO String +type IncludeParser = ParserT String [String] IO String -- | Replace "include" commands with file contents. -handleIncludes :: String -> IO String -handleIncludes s = do - res <- runParserT includeParser' [] "input" s - case res of - Right s' -> return s' - Left e -> error $ show e +handleIncludes :: String -> IO (Either PandocError String) +handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s includeParser' :: IncludeParser includeParser' = @@ -857,6 +896,12 @@ backslash' = string "\\" braced' :: IncludeParser braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp + include' :: IncludeParser include' = do fs' <- try $ do @@ -865,11 +910,11 @@ include' = do <|> try (string "input") <|> string "usepackage" -- skip options - skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + skipMany $ try $ char '[' *> manyTill anyChar (char ']') fs <- (map trim . splitBy (==',')) <$> braced' return $ if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs pos <- getPosition containers <- getState let fn = case containers of @@ -938,14 +983,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: String -> LP Blocks alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ - concat $ intersperse "\\\\\n" $ lines t) + intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s strToCode x = x -rawLaTeXBlock :: Parser [Char] ParserState String +rawLaTeXBlock :: LP String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) -rawLaTeXInline :: Parser [Char] ParserState Inline +rawLaTeXInline :: LP Inline rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) RawInline "latex" <$> applyMacros' raw @@ -954,41 +999,43 @@ addImageCaption :: Blocks -> LP Blocks addImageCaption = walkM go where go (Image alt (src,tit)) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Image (toList ils) (src, "fig:")) - Nothing -> return (Image alt (src,tit)) + return $ case mbcapt of + Just ils -> Image (toList ils) (src, "fig:") + Nothing -> Image alt (src,tit) go x = return x addTableCaption :: Blocks -> LP Blocks addTableCaption = walkM go where go (Table c als ws hs rs) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Table (toList ils) als ws hs rs) - Nothing -> return (Table c als ws hs rs) + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs go x = return x environments :: M.Map String (LP Blocks) environments = M.fromList [ ("document", env "document" blocks <* skipMany anyChar) - , ("letter", env "letter" letter_contents) + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) , ("figure", env "figure" $ resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular", env "tabular" simpTable) + , ("tabular*", env "tabular" $ simpTable True) + , ("tabular", env "tabular" $ simpTable False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", ordered_list) + , ("enumerate", orderedList') , ("alltt", alltt =<< verbEnv "alltt") , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) - , ("verbatim", codeBlock <$> (verbEnv "verbatim")) + , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" @@ -996,17 +1043,17 @@ environments = M.fromList let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "Verbatim")) + codeBlockWith attr <$> verbEnv "Verbatim") , ("lstlisting", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] - ++ maybe [] (:[]) (lookup "language" options + ++ maybeToList (lookup "language" options >>= fromListingsLanguage) let attr = (fromMaybe "" (lookup "label" options),classes,kvs) - codeBlockWith attr <$> (verbEnv "lstlisting")) + codeBlockWith attr <$> verbEnv "lstlisting") , ("minted", do options <- option [] keyvals lang <- grouped (many1 $ satisfy (/='}')) let kvs = [ (if k == "firstnumber" @@ -1016,27 +1063,27 @@ environments = M.fromList [ "numberLines" | lookup "linenos" options == Just "true" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "minted")) + codeBlockWith attr <$> verbEnv "minted") , ("obeylines", parseFromString (para . trimInlines . mconcat <$> many inline) =<< intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv Nothing "displaymath") - , ("equation", mathEnv Nothing "equation") - , ("equation*", mathEnv Nothing "equation*") - , ("gather", mathEnv (Just "gathered") "gather") - , ("gather*", mathEnv (Just "gathered") "gather*") - , ("multline", mathEnv (Just "gathered") "multline") - , ("multline*", mathEnv (Just "gathered") "multline*") - , ("eqnarray", mathEnv (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv (Just "aligned") "eqnarray*") - , ("align", mathEnv (Just "aligned") "align") - , ("align*", mathEnv (Just "aligned") "align*") - , ("alignat", mathEnv (Just "aligned") "alignat") - , ("alignat*", mathEnv (Just "aligned") "alignat*") + , ("displaymath", mathEnv para Nothing "displaymath") + , ("equation", mathEnv para Nothing "equation") + , ("equation*", mathEnv para Nothing "equation*") + , ("gather", mathEnv para (Just "gathered") "gather") + , ("gather*", mathEnv para (Just "gathered") "gather*") + , ("multline", mathEnv para (Just "gathered") "multline") + , ("multline*", mathEnv para (Just "gathered") "multline*") + , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") + , ("align", mathEnv para (Just "aligned") "align") + , ("align*", mathEnv para (Just "aligned") "align*") + , ("alignat", mathEnv para (Just "aligned") "alignat") + , ("alignat*", mathEnv para (Just "aligned") "alignat*") ] -letter_contents :: LP Blocks -letter_contents = do +letterContents :: LP Blocks +letterContents = do bs <- blocks st <- getState -- add signature (author) and address (title) @@ -1063,8 +1110,8 @@ closing = do item :: LP Blocks item = blocks *> controlSeq "item" *> skipopts *> blocks -loose_item :: LP Blocks -loose_item = do +looseItem :: LP Blocks +looseItem = do ctx <- stateParserContext `fmap` getState if ctx == ListItemState then mzero @@ -1092,8 +1139,8 @@ listenv name p = try $ do updateState $ \st -> st{ stateParserContext = oldCtx } return res -mathEnv :: Maybe String -> String -> LP Blocks -mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name) +mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a +mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) where inner x = case innerEnv of Nothing -> x Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ @@ -1107,8 +1154,8 @@ verbEnv name = do res <- manyTill anyChar endEnv return $ stripTrailingNewlines res -ordered_list :: LP Blocks -ordered_list = do +orderedList' :: LP Blocks +orderedList' = do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ char '[' *> anyOrderedListMarker <* char ']' @@ -1120,7 +1167,7 @@ ordered_list = do optional sp num <- grouped (many1 digit) spaces - return $ (read num + 1 :: Int) + return (read num + 1 :: Int) bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -1134,14 +1181,14 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = (void comment) - <|> (void sp) - <|> (void blanklines) - <|> (void macro) - <|> (void blockCommand) - <|> (void anyControlSeq) - <|> (void braced) - <|> (void anyChar) + preambleBlock = void comment + <|> void sp + <|> void blanklines + <|> void macro + <|> void blockCommand + <|> void anyControlSeq + <|> void braced + <|> void anyChar ------- @@ -1183,7 +1230,7 @@ citationLabel = optional sp *> <* optional sp <* optional (char ',') <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*" + where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String) cites :: CitationMode -> Bool -> LP [Citation] cites mode multi = try $ do @@ -1217,7 +1264,7 @@ complexNatbibCitation mode = try $ do suff <- ils skipSpaces optional $ char ';' - return $ addPrefix pref $ addSuffix suff $ cits' + return $ addPrefix pref $ addSuffix suff cits' (c:cits, raw) <- withRaw $ grouped parseOne return $ cite (c{ citationMode = mode }:cits) (rawInline "latex" $ "\\citetext" ++ raw) @@ -1227,7 +1274,7 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}") + let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' @@ -1241,13 +1288,22 @@ parseAligns = try $ do return aligns' hline :: LP () -hline = () <$ (try $ spaces >> controlSeq "hline") +hline = try $ do + spaces' + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" + spaces' + optional $ bracketed (many1 (satisfy (/=']'))) + return () lbreak :: LP () -lbreak = () <$ (try $ spaces *> controlSeq "\\") +lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces') amp :: LP () -amp = () <$ (try $ spaces *> char '&') +amp = () <$ try (spaces' *> char '&') parseTableRow :: Int -- ^ number of columns -> LP [Blocks] @@ -1260,19 +1316,22 @@ parseTableRow cols = try $ do guard $ cells' /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces + spaces' return cells'' -simpTable :: LP Blocks -simpTable = try $ do - spaces +spaces' :: LP () +spaces' = spaces *> skipMany (comment *> spaces) + +simpTable :: Bool -> LP Blocks +simpTable hasWidthParameter = try $ do + when hasWidthParameter $ () <$ (spaces' >> tok) + skipopts aligns <- parseAligns let cols = length aligns optional hline header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) - spaces - skipMany (comment *> spaces) + spaces' let header'' = if null header' then replicate cols mempty else header' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 25a303f52..ae81ae7fc 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,6 +1,7 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,7 +36,7 @@ import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, toLower ) +import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import qualified Data.Text as T @@ -55,7 +56,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup @@ -63,13 +64,14 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMarkdown opts s = (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") @@ -77,13 +79,9 @@ readMarkdown opts s = -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readMarkdownWithWarnings opts s = - (readWith parseMarkdownWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") - where parseMarkdownWithWarnings = do - doc <- parseMarkdown - warnings <- stateWarnings <$> getState - return (doc, warnings) + (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines @@ -117,6 +115,12 @@ isBlank _ = False -- auxiliary functions -- +-- | Succeeds when we're in list context. +inList :: MarkdownParser () +inList = do + ctx <- stateParserContext <$> getState + guard (ctx == ListItemState) + isNull :: F Inlines -> Bool isNull ils = B.isNull $ runF ils def @@ -161,19 +165,23 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: MarkdownParser (F Inlines) -inlinesInBalancedBrackets = charsInBalancedBrackets >>= - parseFromString (trimInlinesF . mconcat <$> many inline) - -charsInBalancedBrackets :: MarkdownParser [Char] -charsInBalancedBrackets = do +inlinesInBalancedBrackets = do char '[' - result <- manyTill ( many1 (noneOf "`[]\n") - <|> (snd <$> withRaw code) - <|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets) - <|> count 1 (satisfy (/='\n')) - <|> (newline >> notFollowedBy blankline >> return "\n") - ) (char ']') - return $ concat result + (_, raw) <- withRaw $ charsInBalancedBrackets 1 + guard $ not $ null raw + parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) + +charsInBalancedBrackets :: Int -> MarkdownParser () +charsInBalancedBrackets 0 = return () +charsInBalancedBrackets openBrackets = + (char '[' >> charsInBalancedBrackets (openBrackets + 1)) + <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) + <|> (( (() <$ code) + <|> (() <$ (escapedChar')) + <|> (newline >> notFollowedBy blankline) + <|> skipMany1 (noneOf "[]`\n\\") + <|> (() <$ count 1 (oneOf "`\\")) + ) >> charsInBalancedBrackets openBrackets) -- -- document structure @@ -243,8 +251,9 @@ yamlMetaBlock = try $ do H.foldrWithKey (\k v m -> if ignorable k then m - else B.setMeta (T.unpack k) - (yamlToMeta opts v) m) + else case yamlToMeta opts v of + Left _ -> m + Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap Right Yaml.Null -> return $ return nullMeta Right _ -> do @@ -276,33 +285,42 @@ yamlMetaBlock = try $ do ignorable :: Text -> Bool ignorable t = (T.pack "_") `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> MetaValue -toMetaValue opts x = - case readMarkdown opts (T.unpack x) of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] +toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) + where + toMeta p = + case p of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t - -yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue + Pandoc _ bs -> MetaBlocks bs + endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} + meta_exts = Set.fromList [ Ext_pandoc_title_block + , Ext_mmd_title_block + , Ext_yaml_metadata_block + ] + +yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = MetaString $ show + | base10Exponent n >= 0 = return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) - $ V.toList xs -yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + | otherwise = return $ MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) + (V.toList xs) +yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> if ignorable k then m - else M.insert (T.unpack k) - (yamlToMeta opts v) m) - M.empty o -yamlToMeta _ _ = MetaString "" + else (do + v' <- yamlToMeta opts v + m' <- m + return (M.insert (T.unpack k) v' m'))) + (return M.empty) o +yamlToMeta _ _ = return $ MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -318,11 +336,15 @@ mmdTitleBlock = try $ do kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + skipMany1 spaceNoNewline val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + guard $ not . null . trim $ val let key' = concat $ words $ map toLower key let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') + where + spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -337,12 +359,6 @@ parseMarkdown = do let Pandoc _ bs = B.doc $ runF blocks st return $ Pandoc meta bs -addWarning :: Maybe SourcePos -> String -> MarkdownParser () -addWarning mbpos msg = - updateState $ \st -> st{ - stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : - stateWarnings st } - referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do pos <- getPosition @@ -458,12 +474,12 @@ block = do , bulletList , header , lhsCodeBlock - , rawTeXBlock , divHtml , htmlBlock , table - , lineBlock , codeBlockIndented + , rawTeXBlock + , lineBlock , blockQuote , hrule , orderedList @@ -499,9 +515,12 @@ atxHeader = try $ do notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -534,15 +553,25 @@ setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + skipSpaces + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text +registerImplicitHeader :: String -> String -> MarkdownParser () +registerImplicitHeader raw ident = do + let key = toKey $ "[" ++ raw ++ "]" + updateState (\s -> s { stateHeaderKeys = + M.insert key ('#':ident,"") (stateHeaderKeys s) }) + -- -- hrule block -- @@ -741,9 +770,9 @@ anyOrderedListStart = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number res <- do guardDisabled Ext_fancy_lists - many1 digit + start <- many1 digit >>= safeRead char '.' - return (1, DefaultStyle, DefaultDelim) + return (start, DefaultStyle, DefaultDelim) <|> do (num, style, delim) <- anyOrderedListMarker -- if it could be an abbreviated first name, -- insist on more than one space @@ -865,7 +894,7 @@ defListMarker = do tabStop <- getOption readerTabStop let remaining = tabStop - (length sps + 1) if remaining > 0 - then count remaining (char ' ') <|> string "\t" + then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar else mzero return () @@ -874,7 +903,7 @@ definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks) raw + contents <- mapM (parseFromString parseBlocks . (++"\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) @@ -885,6 +914,7 @@ defRawBlock compact = try $ do firstline <- anyLine let dline = try ( do notFollowedBy blankline + notFollowedByHtmlCloser if compact -- laziness not compatible with compact then () <$ indentSpaces else (() <$ indentSpaces) @@ -901,7 +931,10 @@ defRawBlock compact = try $ do definitionList :: MarkdownParser (F Blocks) definitionList = try $ do - lookAhead (anyLine >> optional blankline >> defListMarker) + lookAhead (anyLine >> + optional (blankline >> notFollowedBy (table >> return ())) >> + -- don't capture table caption as def list! + defListMarker) compactDefinitionList <|> normalDefinitionList compactDefinitionList :: MarkdownParser (F Blocks) @@ -932,6 +965,8 @@ para = try $ do <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) <|> (guardEnabled Ext_lists_without_preceding_blankline >> + -- Avoid creating a paragraph in a nested list. + notFollowedBy' inList >> () <$ lookAhead listStart) <|> do guardEnabled Ext_native_divs inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1062,7 +1097,9 @@ dashedLine :: Char dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) + let lengthDashes = length dashes + lengthSp = length sp + return (lengthDashes, lengthDashes + lengthSp) -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. @@ -1216,7 +1253,8 @@ gridPart :: Char -> Parser [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' - return (length dashes, length dashes + 1) + let lengthDashes = length dashes + return (lengthDashes, lengthDashes + 1) gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline @@ -1295,12 +1333,8 @@ pipeBreak = try $ do pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- try ( pipeBreak >>= \als -> - return (return $ replicate (length als) mempty, als)) - <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> - - return (row, als) ) - lines' <- sequence <$> many1 pipeTableRow + (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- sequence <$> many pipeTableRow let widths = replicate (length aligns) 0.0 return $ (aligns, widths, heads, lines') @@ -1482,7 +1516,9 @@ code = try $ do math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) + <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> + ((getOption readerSmart >>= guard) *> (return <$> apostrophe) + <* notFollowedBy space) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. @@ -1616,8 +1652,7 @@ endline = try $ do newline notFollowedBy blankline -- parse potential list-starts differently if in a list: - st <- getState - when (stateParserContext st == ListItemState) $ notFollowedBy listStart + notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header @@ -1684,9 +1719,11 @@ referenceLink :: (String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False - (ref,raw') <- try - (skipSpaces >> optional (newline >> skipSpaces) >> reference) - <|> return (mempty, "") + (_,raw') <- option (mempty, "") $ + lookAhead (try (spnl >> normalCite >> return (mempty, ""))) + <|> + try (spnl >> reference) + when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' @@ -1702,13 +1739,13 @@ referenceLink constructor (lab, raw) = do return $ do keys <- asksF stateKeys case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref + Nothing -> if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback + then do + headerKeys <- asksF stateHeaderKeys + case M.lookup key headerKeys of + Just (src, tit) -> constructor src tit <$> lab + Nothing -> makeFallback else makeFallback Just (src,tit) -> constructor src tit <$> lab @@ -1722,12 +1759,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris + getState >>= guard . stateAllowLinks (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) autoLink :: MarkdownParser (F Inlines) autoLink = try $ do + getState >>= guard . stateAllowLinks char '<' (orig, src) <- uri <|> emailAddress -- in rare cases, something may remain after the uri parser @@ -1874,8 +1913,20 @@ textualCite = try $ do return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) <$> rest Nothing -> - (do (cs, raw) <- withRaw $ bareloc first - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) + (do + (cs, raw) <- withRaw $ bareloc first + let (spaces',raw') = span isSpace raw + spc | null spaces' = mempty + | otherwise = B.space + lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' + fallback <- referenceLink B.link (lab,raw') + return $ do + fallback' <- fallback + cs' <- cs + return $ + case B.toList fallback' of + Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' + _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) <|> return (do st <- askF return $ case M.lookup key (stateExamples st) of Just n -> B.str (show n) @@ -1885,10 +1936,12 @@ bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do spnl char '[' + notFollowedBy $ char '^' suff <- suffix rest <- option (return []) $ try $ char ';' >> citeList spnl char ']' + notFollowedBy $ oneOf "[(" return $ do suff' <- suff rest' <- rest diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e43b8a86c..2a5adab22 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe) import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMediaWiki opts s = - case runParser parseMediaWiki MWState{ mwOptions = opts + readWith parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty , mwIdentifierList = [] } - "source" (s ++ "\n") of - Left err' -> error $ "\nError:\n" ++ show err' - Right result -> result + (s ++ "\n") data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int @@ -593,11 +593,17 @@ imageOption = <|> try (many1 (oneOf "x0123456789") <* string "px") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +collapseUnderscores :: String -> String +collapseUnderscores [] = [] +collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) +collapseUnderscores (x:xs) = x : collapseUnderscores xs + +addUnderscores :: String -> String +addUnderscores = collapseUnderscores . intercalate "_" . words + internalLink :: MWParser Inlines internalLink = try $ do sym "[[" - let addUnderscores x = let (pref,suff) = break (=='#') x - in pref ++ intercalate "_" (words suff) pagename <- unwords . words <$> many (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index f4dfa62c1..94ea9e3a2 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,9 +1,9 @@ {- -Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; Either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2014 John MacFarlane + Copyright : Copyright (C) 2011-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Error +import Control.Applicative + -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead) -- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readNative s = - case safeRead s of - Just d -> d - Nothing -> Pandoc nullMeta $ readBlocks s + -> Either PandocError Pandoc +readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) -readBlocks :: String -> [Block] -readBlocks s = - case safeRead s of - Just d -> d - Nothing -> [readBlock s] +readBlocks :: String -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) -readBlock :: String -> Block -readBlock s = - case safeRead s of - Just d -> d - Nothing -> Plain $ readInlines s +readBlock :: String -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) -readInlines :: String -> [Inline] -readInlines s = - case safeRead s of - Just d -> d - Nothing -> [readInline s] +readInlines :: String -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) -readInline :: String -> Inline -readInline s = - case safeRead s of - Just d -> d - Nothing -> error "Cannot parse document" +readInline :: String -> Either PandocError Inline +readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 35d01e877..19ddba36b 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where import Data.Char (toUpper) import Text.Pandoc.Options @@ -11,8 +12,11 @@ import Data.Generics import Data.Monoid import Control.Monad.State import Control.Applicative ((<$>), (<$)) +import Data.Default +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error -type OPML = State OPMLState +type OPML = ExceptT PandocError (State OPMLState) data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -21,17 +25,19 @@ data OPMLState = OPMLState{ , opmlDocDate :: Inlines } deriving Show -readOPML :: ReaderOptions -> String -> Pandoc +instance Default OPMLState where + def = OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +readOPML :: ReaderOptions -> String -> Either PandocError Pandoc readOPML _ inp = setTitle (opmlDocTitle st') - $ setAuthors (opmlDocAuthors st') - $ setDate (opmlDocDate st') - $ doc $ mconcat bs - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) - OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } + . setAuthors (opmlDocAuthors st') + . setDate (opmlDocDate st') + . doc . mconcat <$> bs + where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -58,14 +64,16 @@ attrValue attr elt = Just z -> z Nothing -> "" -asHtml :: String -> Inlines -asHtml s = case readHtml def s of - Pandoc _ [Plain ils] -> fromList ils - _ -> mempty +exceptT :: Either PandocError a -> OPML a +exceptT = either throwError return + +asHtml :: String -> OPML Inlines +asHtml s = (\(Pandoc _ bs) -> case bs of + [Plain ils] -> fromList ils + _ -> mempty) <$> exceptT (readHtml def s) -asMarkdown :: String -> Blocks -asMarkdown s = fromList bs - where Pandoc _ bs = readMarkdown def s +asMarkdown :: String -> OPML Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) getBlocks :: Element -> OPML Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -82,8 +90,8 @@ parseBlock (Elem e) = "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e - where sect n = do let headerText = asHtml $ attrValue "text" e - let noteBlocks = asMarkdown $ attrValue "_note" e + where sect n = do headerText <- asHtml $ attrValue "text" e + noteBlocks <- asMarkdown $ attrValue "_note" e modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs new file mode 100644 index 000000000..1c8ec51bc --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -0,0 +1,86 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Entry point to the odt reader. +-} + +module Text.Pandoc.Readers.Odt ( readOdt ) where + +import Codec.Archive.Zip +import qualified Text.XML.Light as XML + +import qualified Data.ByteString.Lazy as B +import Data.Monoid ( mempty ) + +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Options +import Text.Pandoc.MediaBag +import qualified Text.Pandoc.UTF8 as UTF8 + +import Text.Pandoc.Readers.Odt.ContentReader +import Text.Pandoc.Readers.Odt.StyleReader + +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Generic.Fallible + +-- +readOdt :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag) +readOdt _ bytes = case bytesToOdt bytes of + Right pandoc -> Right (pandoc , mempty) + Left err -> Left err + +-- +bytesToOdt :: B.ByteString -> Either PandocError Pandoc +bytesToOdt bytes = archiveToOdt $ toArchive bytes + +-- +archiveToOdt :: Archive -> Either PandocError Pandoc +archiveToOdt archive + | Just contentEntry <- findEntryByPath "content.xml" archive + , Just stylesEntry <- findEntryByPath "styles.xml" archive + , Just contentElem <- entryToXmlElem contentEntry + , Just stylesElem <- entryToXmlElem stylesEntry + , Right styles <- chooseMax (readStylesAt stylesElem ) + (readStylesAt contentElem) + , startState <- readerState styles + , Right pandoc <- runConverter' read_body + startState + contentElem + = Right pandoc + + | otherwise + -- Not very detailed, but I don't think more information would be helpful + = Left $ ParseFailure "Couldn't parse odt file." + +-- +entryToXmlElem :: Entry -> Maybe XML.Element +entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs new file mode 100644 index 000000000..310ca028e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -0,0 +1,253 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleInstances #-} +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Arrows.State + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +An arrow that transports a state. It is in essence a more powerful version of +the standard state monad. As it is such a simple extension, there are +other version out there that do exactly the same. +The implementation is duplicated, though, to add some useful features. +Most of these might be implemented without access to innards, but it's much +faster and easier to implement this way. +-} + +module Text.Pandoc.Readers.Odt.Arrows.State where + +import Prelude hiding ( foldr, foldl ) + +import qualified Control.Category as Cat +import Control.Arrow +import Control.Monad + +import Data.Monoid +import Data.Foldable + +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible + + +newtype ArrowState state a b = ArrowState + { runArrowState :: (state, a) -> (state, b) } + +-- | Constructor +withState :: (state -> a -> (state, b)) -> ArrowState state a b +withState = ArrowState . uncurry + +-- | Constructor +withState' :: ((state, a) -> (state, b)) -> ArrowState state a b +withState' = ArrowState + +-- | Constructor +modifyState :: (state -> state ) -> ArrowState state a a +modifyState = ArrowState . first + +-- | Constructor +ignoringState :: ( a -> b ) -> ArrowState state a b +ignoringState = ArrowState . second + +-- | Constructor +fromState :: (state -> (state, b)) -> ArrowState state a b +fromState = ArrowState . (.fst) + +-- | Constructor +extractFromState :: (state -> b ) -> ArrowState state x b +extractFromState f = ArrowState $ \(state,_) -> (state, f state) + +-- | Constructor +withUnchangedState :: (state -> a -> b ) -> ArrowState state a b +withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) + +-- | Constructor +tryModifyState :: (state -> Either f state) + -> ArrowState state a (Either f a) +tryModifyState f = ArrowState $ \(state,a) + -> (state,).Left ||| (,Right a) $ f state + +instance Cat.Category (ArrowState s) where + id = ArrowState id + arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1) + +instance Arrow (ArrowState state) where + arr = ignoringState + first a = ArrowState $ \(s,(aF,aS)) + -> second (,aS) $ runArrowState a (s,aF) + second a = ArrowState $ \(s,(aF,aS)) + -> second (aF,) $ runArrowState a (s,aS) + +instance ArrowChoice (ArrowState state) where + left a = ArrowState $ \(s,e) -> case e of + Left l -> second Left $ runArrowState a (s,l) + Right r -> (s, Right r) + right a = ArrowState $ \(s,e) -> case e of + Left l -> (s, Left l) + Right r -> second Right $ runArrowState a (s,r) + +instance ArrowLoop (ArrowState state) where + loop a = ArrowState $ \(s, x) + -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) + in (s', x') + +instance ArrowApply (ArrowState state) where + app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) + + +-- | Embedding of a state arrow in a state arrow with a different state type. +switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y +switchState there back a = ArrowState $ first there + >>> runArrowState a + >>> first back + +-- | Lift a state arrow to modify the state of an arrow +-- with a different state type. +liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x +liftToState unlift a = modifyState $ unlift &&& id + >>> runArrowState a + >>> snd + +-- | Switches the type of the state temporarily. +-- Drops the intermediate result state, behaving like the identity arrow, +-- save for side effects in the state. +withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x +withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst + +-- | Switches the type of the state temporarily. +-- Returns the resulting sub-state. +withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' +withSubState' unlift a = ArrowState $ runArrowState unlift + >>> switch + >>> runArrowState a + >>> switch + where switch (x,y) = (y,x) + +-- | Switches the type of the state temporarily. +-- Drops the intermediate result state, behaving like a fallible +-- identity arrow, save for side effects in the state. +withSubStateF :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f x ) +withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a) + >>^ spreadChoice + >>^ fmap fst + +-- | Switches the type of the state temporarily. +-- Returns the resulting sub-state. +withSubStateF' :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f s') +withSubStateF' unlift a = ArrowState go + where go p@(s,_) = tryRunning unlift + ( tryRunning a (second Right) ) + p + where tryRunning a' b v = case runArrowState a' v of + (_ , Left f) -> (s, Left f) + (x , Right y) -> b (y,x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results +-- in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m +foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f + where a' x (s',m) = second (m <>) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results +-- in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m +foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f + where a' (s',m) x = second (m <>) $ runArrowState a (s',x) + +-- | Fold a fallible state arrow through something 'Foldable'. Collect the +-- results in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +-- If the iteration fails, the state will be reset to the initial one. +foldS' :: (Foldable f, Monoid m) + => ArrowState s x (Either e m) + -> ArrowState s (f x) (Either e m) +foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f + where a' s x (s',Right m) = case runArrowState a (s',x) of + (s'',Right m') -> (s'', Right (m <> m')) + (_ ,Left e ) -> (s , Left e) + a' _ _ e = e + +-- | Fold a fallible state arrow through something 'Foldable'. Collect the +-- results in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +-- If the iteration fails, the state will be reset to the initial one. +foldSL' :: (Foldable f, Monoid m) + => ArrowState s x (Either e m) + -> ArrowState s (f x) (Either e m) +foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f + where a' s (s',Right m) x = case runArrowState a (s',x) of + (s'',Right m') -> (s'', Right (m <> m')) + (_ ,Left e ) -> (s , Left e) + a' _ e _ = e + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateS :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f + where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateSL :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f + where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x) + + +-- | Fold a fallible state arrow through something 'Foldable'. +-- Collect the results in a 'MonadPlus'. +-- If the iteration fails, the state will be reset to the initial one. +iterateS' :: (Foldable f, MonadPlus m) + => ArrowState s x (Either e y ) + -> ArrowState s (f x) (Either e (m y)) +iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f + where a' s x (s',Right m) = case runArrowState a (s',x) of + (s'',Right m') -> (s'',Right $ mplus m $ return m') + (_ ,Left e ) -> (s ,Left e ) + a' _ _ e = e + +-- | Fold a fallible state arrow through something 'Foldable'. +-- Collect the results in a 'MonadPlus'. +-- If the iteration fails, the state will be reset to the initial one. +iterateSL' :: (Foldable f, MonadPlus m) + => ArrowState s x (Either e y ) + -> ArrowState s (f x) (Either e (m y)) +iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f + where a' s (s',Right m) x = case runArrowState a (s',x) of + (s'',Right m') -> (s'',Right $ mplus m $ return m') + (_ ,Left e ) -> (s ,Left e ) + a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs new file mode 100644 index 000000000..9710973b3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -0,0 +1,497 @@ +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Arrows.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Utility functions for Arrows (Kleisli monads). + +Some general notes on notation: + +* "^" is meant to stand for a pure function that is lifted into an arrow +based on its usage for that purpose in "Control.Arrow". +* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function +with an equivalent return value. +* "_" stands for the dropping of a value. +-} + +-- We export everything +module Text.Pandoc.Readers.Odt.Arrows.Utils where + +import Control.Arrow +import Control.Monad ( join, MonadPlus(..) ) + +import Data.Monoid +import qualified Data.Foldable as F + +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils + + +and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') +and2 = (&&&) + +and3 :: (Arrow a) + => a b c0->a b c1->a b c2 + -> a b (c0,c1,c2 ) +and4 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3 + -> a b (c0,c1,c2,c3 ) +and5 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4 + -> a b (c0,c1,c2,c3,c4 ) +and6 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 + -> a b (c0,c1,c2,c3,c4,c5 ) +and7 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 + -> a b (c0,c1,c2,c3,c4,c5,c6 ) +and8 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 + -> a b (c0,c1,c2,c3,c4,c5,c6,c7) + +and3 a b c = (and2 a b ) &&& c + >>^ \((z,y ) , x) -> (z,y,x ) +and4 a b c d = (and3 a b c ) &&& d + >>^ \((z,y,x ) , w) -> (z,y,x,w ) +and5 a b c d e = (and4 a b c d ) &&& e + >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) +and6 a b c d e f = (and5 a b c d e ) &&& f + >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) +and7 a b c d e f g = (and6 a b c d e f ) &&& g + >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) +and8 a b c d e f g h = (and7 a b c d e f g) &&& h + >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) + +liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z +liftA2 f a b = a &&& b >>^ uncurry f + +liftA3 :: (Arrow a) => (z->y->x -> r) + -> a b z->a b y->a b x + -> a b r +liftA4 :: (Arrow a) => (z->y->x->w -> r) + -> a b z->a b y->a b x->a b w + -> a b r +liftA5 :: (Arrow a) => (z->y->x->w->v -> r) + -> a b z->a b y->a b x->a b w->a b v + -> a b r +liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u + -> a b r +liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u->a b t + -> a b r +liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s + -> a b r + +liftA3 fun a b c = and3 a b c >>^ uncurry3 fun +liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun +liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun +liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun +liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun +liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun + +liftA :: (Arrow a) => (y -> z) -> a b y -> a b z +liftA fun a = a >>^ fun + + +-- | Duplicate a value to subsequently feed it into different arrows. +-- Can almost always be replaced with '(&&&)', 'keepingTheValue', +-- or even '(|||)'. +-- Aequivalent to +-- > returnA &&& returnA +duplicate :: (Arrow a) => a b (b,b) +duplicate = arr $ join (,) + +-- | Lifts the combination of two values into an arrow. +joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z +joinOn = arr.uncurry + +-- | Applies a function to the uncurried result-pair of an arrow-application. +-- (The §-symbol was chosen to evoke an association with pairs through the +-- shared first character) +(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>§ f = a >>^ uncurry f + +-- | '(>>§)' with its arguments flipped +(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(§<<) = flip (>>§) + +-- | Precomposition with an uncurried function +(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f §>> a = uncurry f ^>> a + +-- | Precomposition with an uncurried function (right to left variant) +(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<§) = flip (§>>) + +infixr 2 >>§, §<<, §>>, <<§ + + +-- | Duplicate a value and apply an arrow to the second instance. +-- Aequivalent to +-- > \a -> duplicate >>> second a +-- or +-- > \a -> returnA &&& a +keepingTheValue :: (Arrow a) => a b c -> a b (b,c) +keepingTheValue a = returnA &&& a + +-- | Duplicate a value and apply an arrow to the first instance. +-- Aequivalent to +-- > \a -> duplicate >>> first a +-- or +-- > \a -> a &&& returnA +keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) +keepingTheValue' a = a &&& returnA + +-- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. +-- Actually, it's the more complex '(>=>)', because 'bind' alone does not +-- combine as nicely in arrow form. +-- The current implementation is not the most efficient one, because it can +-- not return directly if a 'Nothing' is encountered. That in turn follows +-- from the type system, as 'Nothing' has an "invisible" type parameter that +-- can not be dropped early. +-- +-- Also, there probably is a way to generalize this to other monads +-- or applicatives, but I'm leaving that as an exercise to the reader. +-- I have a feeling there is a new Arrow-typeclass to be found that is less +-- restrictive than 'ArrowApply'. If it is already out there, +-- I have not seen it yet. ('ArrowPlus' for example is not general enough.) +(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) +a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join + +infixr 2 >>>= + +-- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. +-- (But still different from a true bind) +(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) +(>++<) = liftA2 mplus + +-- | Left-compose with a pure function +leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) +leftLift = left.arr + +-- | Right-compose with a pure function +rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') +rightLift = right.arr + + +( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') +( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') +( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') + +l ^+++ r = leftLift l >>> right r +l +++^ r = left l >>> rightLift r +l ^+++^ r = leftLift l >>> rightLift r + +infixr 2 ^+++, +++^, ^+++^ + +( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d +( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d +( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d + +l ^||| r = arr l ||| r +l |||^ r = l ||| arr r +l ^|||^ r = arr l ||| arr r + +infixr 2 ^||| , |||^, ^|||^ + +( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') +( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') +( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') + +l ^&&& r = arr l &&& r +l &&&^ r = l &&& arr r +l ^&&&^ r = arr l &&& arr r + +infixr 3 ^&&&, &&&^, ^&&&^ + +( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') +( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') +( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') + +l ^*** r = arr l *** r +l ***^ r = l *** arr r +l ^***^ r = arr l *** arr r + +infixr 3 ^***, ***^, ^***^ + +-- | A version of +-- +-- >>> \p -> arr (\x -> if p x the Right x else Left x) +-- +-- but with p being an arrow +choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) +choose checkValue = keepingTheValue checkValue >>^ select + where select (x,True ) = Right x + select (x,False ) = Left x + +-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. +choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) +choiceToMaybe = arr eitherToMaybe + +-- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@. +maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b) +maybeToChoice = arr maybeToEither + +-- | Lifts a constant value into an arrow +returnV :: (Arrow a) => c -> a x c +returnV = arr.const + +-- | 'returnA' dropping everything +returnA_ :: (Arrow a) => a _b () +returnA_ = returnV () + +-- | Wrapper for an arrow that can be evaluated im parallel. All +-- Arrows can be evaluated in parallel, as long as they return a +-- monoid. +newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } + deriving (Eq, Ord, Show) + +instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where + mempty = CoEval $ returnV mempty + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + +-- | Evaluates a collection of arrows in a parallel fashion. +-- +-- This is in essence a fold of '(&&&)' over the collection, +-- so the actual execution order and parallelity depends on the +-- implementation of '(&&&)' in the arrow in question. +-- The default implementation of '(&&&)' for example keeps the +-- order as given in the collection. +-- +-- This function can be seen as a generalization of +-- 'Control.Applicative.sequenceA' to arrows or as an alternative to +-- a fold with 'Control.Applicative.WrappedArrow', which +-- substitutes the monoid with function application. +-- +coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m +coEval = evalParallelArrow . (F.foldMap CoEval) + +-- | Defines Left as failure, Right as success +type FallibleArrow a input failure success = a input (Either failure success) + +type ReFallibleArrow a failure success success' + = FallibleArrow a (Either failure success) failure success' + +-- | Wrapper for fallible arrows. Fallible arrows are all arrows that return +-- an Either value where left is a faliure and right is a success value. +newtype AlternativeArrow a input failure success + = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } + + +instance (ArrowChoice a, Monoid failure) + => Monoid (AlternativeArrow a input failure success) where + mempty = TryArrow $ returnV $ Left mempty + (TryArrow a) `mappend` (TryArrow b) + = TryArrow $ a &&& b + >>^ \(a',~b') + -> ( (\a'' -> left (mappend a'') b') ||| Right ) + a' + +-- | Evaluates a collection of fallible arrows, trying each one in succession. +-- Left values are interpreted as failures, right values as successes. +-- +-- The evaluation is stopped once an arrow succeeds. +-- Up to that point, all failures are collected in the failure-monoid. +-- Note that '()' is a monoid, and thus can serve as a failure-collector if +-- you are uninterested in the exact failures. +-- +-- This is in essence a fold of '(&&&)' over the collection, enhanced with a +-- little bit of repackaging, so the actual execution order depends on the +-- implementation of '(&&&)' in the arrow in question. +-- The default implementation of '(&&&)' for example keeps the +-- order as given in the collection. +-- +tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) + => f (FallibleArrow a b failure success) + -> FallibleArrow a b failure success +tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) + +-- +liftSuccess :: (ArrowChoice a) + => (success -> success') + -> ReFallibleArrow a failure success success' +liftSuccess = rightLift + +-- +liftAsSuccess :: (ArrowChoice a) + => a x success + -> FallibleArrow a x failure success +liftAsSuccess a = a >>^ Right + +-- +asFallibleArrow :: (ArrowChoice a) + => a x success + -> FallibleArrow a x failure success +asFallibleArrow a = a >>^ Right + +-- | Raises an error into a 'ReFallibleArrow' if the arrow is already in +-- "error mode" +liftError :: (ArrowChoice a, Monoid failure) + => failure + -> ReFallibleArrow a failure success success +liftError e = leftLift (e <>) + +-- | Raises an error into a 'FallibleArrow', droping both the arrow input +-- and any previously stored error value. +_raiseA :: (ArrowChoice a) + => failure + -> FallibleArrow a x failure success +_raiseA e = returnV (Left e) + +-- | Raises an empty error into a 'FallibleArrow', droping both the arrow input +-- and any previously stored error value. +_raiseAEmpty :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success +_raiseAEmpty = _raiseA mempty + +-- | Raises an error into a 'ReFallibleArrow', possibly appending the new error +-- to an existing one +raiseA :: (ArrowChoice a, Monoid failure) + => failure + -> ReFallibleArrow a failure success success +raiseA e = arr $ Left.(either (<> e) (const e)) + +-- | Raises an empty error into a 'ReFallibleArrow'. If there already is an +-- error, nothing changes. +-- (Note that this function is only aequivalent to @raiseA mempty@ iff the +-- failure monoid follows the monoid laws.) +raiseAEmpty :: (ArrowChoice a, Monoid failure) + => ReFallibleArrow a failure success success +raiseAEmpty = arr (fromRight (const mempty) >>> Left) + + +-- | Execute the second arrow if the first succeeds +(>>?) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a >>? b = a >>> Left ^||| b + +-- | Execute the lifted second arrow if the first succeeds +(>>?^) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> (success -> success') + -> FallibleArrow a x failure success' +a >>?^ f = a >>^ Left ^|||^ Right . f + +-- | Execute the lifted second arrow if the first succeeds +(>>?^?) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> (success -> Either failure success') + -> FallibleArrow a x failure success' +a >>?^? b = a >>> Left ^|||^ b + +-- | Execute the second arrow if the lifted first arrow succeeds +(^>>?) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a ^>>? b = a ^>> Left ^||| b + +-- | Execute the lifted second arrow if the lifted first arrow succeeds +(^>>?^) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> (success -> success') + -> FallibleArrow a x failure success' +a ^>>?^ f = arr $ a >>> right f + +-- | Execute the lifted second arrow if the lifted first arrow succeeds +(^>>?^?) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> (success -> Either failure success') + -> FallibleArrow a x failure success' +a ^>>?^? f = a ^>> Left ^|||^ f + +-- | Execute the second, non-fallible arrow if the first arrow succeeds +(>>?!) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> a success success' + -> FallibleArrow a x failure success' +a >>?! f = a >>> right f + +--- +(>>?§) :: (ArrowChoice a, Monoid f) + => FallibleArrow a x f (b,b') + -> (b -> b' -> c) + -> FallibleArrow a x f c +a >>?§ f = a >>?^ (uncurry f) + +--- +(^>>?§) :: (ArrowChoice a, Monoid f) + => (x -> Either f (b,b')) + -> (b -> b' -> c) + -> FallibleArrow a x f c +a ^>>?§ f = arr a >>?^ (uncurry f) + +--- +(>>?§?) :: (ArrowChoice a, Monoid f) + => FallibleArrow a x f (b,b') + -> (b -> b' -> (Either f c)) + -> FallibleArrow a x f c +a >>?§? f = a >>?^? (uncurry f) + +infixr 1 >>?, >>?^, >>?^? +infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! +infixr 1 >>?§, ^>>?§, >>?§? + +-- | Keep values that are Right, replace Left values by a constant. +ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v +ifFailedUse v = arr $ either (const v) id + +-- | '(&&)' lifted into an arrow +(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool +(<&&>) = liftA2 (&&) + +-- | '(||)' lifted into an arrow +(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool +(<||>) = liftA2 (||) + +-- | An equivalent of '(&&)' in a fallible arrow +(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s + -> FallibleArrow a x f s' + -> FallibleArrow a x f (s,s') +(>&&<) = liftA2 chooseMin + +-- | An equivalent of '(||)' in some forms of fallible arrows +(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s + -> FallibleArrow a x f s + -> FallibleArrow a x f s +(>||<) = liftA2 chooseMax + +-- | An arrow version of a short-circuit (<|>) +ifFailedDo :: (ArrowChoice a) + => FallibleArrow a x f y + -> FallibleArrow a x f y + -> FallibleArrow a x f y +ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) + where repackage (x , Left _) = Left x + repackage (_ , Right y) = Right y + +infixr 4 <&&>, <||>, >&&<, >||< +infixr 1 `ifFailedDo` + + diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs new file mode 100644 index 000000000..1f095bade --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Base.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Base + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Core types of the odt reader. +-} + +module Text.Pandoc.Readers.Odt.Base where + +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Namespaces + +type OdtConverterState s = XMLConverterState Namespace s + +type XMLReader s a b = FallibleXMLConverter Namespace s a b + +type XMLReaderSafe s a b = XMLConverter Namespace s a b + diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs new file mode 100644 index 000000000..9bb585b8e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -0,0 +1,790 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE RecordWildCards #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.ContentReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +The core of the odt reader that converts odt features into Pandoc types. +-} + +module Text.Pandoc.Readers.Odt.ContentReader +( readerState +, read_body +) where + +import Control.Arrow +import Control.Applicative hiding ( liftA, liftA2, liftA3 ) + +import qualified Data.Map as M +import Data.List ( find ) +import Data.Monoid +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.Pandoc.Shared + +import Text.Pandoc.Readers.Odt.Base +import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.StyleReader + +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils + + +-------------------------------------------------------------------------------- +-- State +-------------------------------------------------------------------------------- + +type Anchor = String + +data ReaderState + = ReaderState { -- | A collection of styles read somewhere else. + -- It is only queried here, not modified. + styleSet :: Styles + -- | A stack of the styles of parent elements. + -- Used to look up inherited style properties. + , styleTrace :: [Style] + -- | Keeps track of the current depth in nested lists + , currentListLevel :: ListLevel + -- | Lists may provide their own style, but they don't have + -- to. If they do not, the style of a parent list may be used + -- or even a default list style from the paragraph style. + -- This value keeps track of the closest list style there + -- currently is. + , currentListStyle :: Maybe ListStyle + -- | A map from internal anchor names to "pretty" ones. + -- The mapping is a purely cosmetic one. + , bookmarkAnchors :: M.Map Anchor Anchor + +-- , sequences +-- , trackedChangeIDs + } + deriving ( Show ) + +readerState :: Styles -> ReaderState +readerState styles = ReaderState styles [] 0 Nothing M.empty + +-- +pushStyle' :: Style -> ReaderState -> ReaderState +pushStyle' style state = state { styleTrace = style : styleTrace state } + +-- +popStyle' :: ReaderState -> ReaderState +popStyle' state = case styleTrace state of + _:trace -> state { styleTrace = trace } + _ -> state + +-- +modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState) +modifyListLevel f state = state { currentListLevel = f (currentListLevel state) } + +-- +shiftListLevel :: ListLevel -> (ReaderState -> ReaderState) +shiftListLevel diff = modifyListLevel (+ diff) + +-- +swapCurrentListStyle :: Maybe ListStyle -> ReaderState + -> (ReaderState, Maybe ListStyle) +swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle } + , currentListStyle state + ) + +-- +lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor +lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors + +-- +putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState +putPrettyAnchor ugly pretty state@ReaderState{..} + = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors } + +-- +usedAnchors :: ReaderState -> [Anchor] +usedAnchors ReaderState{..} = M.elems bookmarkAnchors + +-------------------------------------------------------------------------------- +-- Reader type and associated tools +-------------------------------------------------------------------------------- + +type OdtReader a b = XMLReader ReaderState a b + +type OdtReaderSafe a b = XMLReaderSafe ReaderState a b + +-- | Extract something from the styles +fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b +fromStyles f = keepingTheValue + (getExtraState >>^ styleSet) + >>§ f + +-- +getStyleByName :: OdtReader StyleName Style +getStyleByName = fromStyles lookupStyle >>^ maybeToChoice + +-- +findStyleFamily :: OdtReader Style StyleFamily +findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice + +-- +lookupListStyle :: OdtReader StyleName ListStyle +lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice + +-- +switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) +switchCurrentListStyle = keepingTheValue getExtraState + >>§ swapCurrentListStyle + >>> first setExtraState + >>^ snd + +-- +pushStyle :: OdtReaderSafe Style Style +pushStyle = keepingTheValue ( + ( keepingTheValue getExtraState + >>§ pushStyle' + ) + >>> setExtraState + ) + >>^ fst + +-- +popStyle :: OdtReaderSafe x x +popStyle = keepingTheValue ( + getExtraState + >>> arr popStyle' + >>> setExtraState + ) + >>^ fst + +-- +getCurrentListLevel :: OdtReaderSafe _x ListLevel +getCurrentListLevel = getExtraState >>^ currentListLevel + + +type AnchorPrefix = String + +-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a +-- unique identifier but without assuming that the id should be for a header. +-- Second argument is a list of already used identifiers. +uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor +uniqueIdentFrom baseIdent usedIdents = + let numIdent n = baseIdent ++ "-" ++ show n + in if baseIdent `elem` usedIdents + then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + Just x -> numIdent x + Nothing -> baseIdent -- if we have more than 60,000, allow repeats + else baseIdent + +-- | First argument: basis for a new "pretty" anchor if none exists yet +-- Second argument: a key ("ugly" anchor) +-- Returns: saved "pretty" anchor or created new one +getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor +getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do + state <- getExtraState -< () + case lookupPrettyAnchor uglyAnchor state of + Just prettyAnchor -> returnA -< prettyAnchor + Nothing -> do + let newPretty = uniqueIdentFrom baseIdent (usedAnchors state) + modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty + +-- | Input: basis for a new header anchor +-- Ouput: saved new anchor +getHeaderAnchor :: OdtReaderSafe Inlines Anchor +getHeaderAnchor = proc title -> do + state <- getExtraState -< () + let anchor = uniqueIdent (toList title) (usedAnchors state) + modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor + + +-------------------------------------------------------------------------------- +-- Working with styles +-------------------------------------------------------------------------------- + +-- +readStyleByName :: OdtReader _x Style +readStyleByName = findAttr NsText "style-name" >>? getStyleByName + +-- +isStyleToTrace :: OdtReader Style Bool +isStyleToTrace = findStyleFamily >>?^ (==FaText) + +-- +withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines +withNewStyle a = proc x -> do + fStyle <- readStyleByName -< () + case fStyle of + Right style -> do + mFamily <- arr styleFamily -< style + fTextProps <- arr ( maybeToChoice + . textProperties + . styleProperties + ) -< style + case fTextProps of + Right textProps -> do + state <- getExtraState -< () + let triple = (state, textProps, mFamily) + modifier <- arr modifierFromStyleDiff -< triple + fShouldTrace <- isStyleToTrace -< style + case fShouldTrace of + Right shouldTrace -> do + if shouldTrace + then do + pushStyle -< style + inlines <- a -< x + popStyle -< () + arr modifier -<< inlines + else + -- In case anything goes wrong + a -< x + Left _ -> a -< x + Left _ -> a -< x + Left _ -> a -< x + + +type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) +type InlineModifier = Inlines -> Inlines + +-- | Given data about the local style changes, calculates how to modify +-- an instance of 'Inlines' +modifierFromStyleDiff :: PropertyTriple -> InlineModifier +modifierFromStyleDiff propertyTriple = + composition $ + (getVPosModifier propertyTriple) + : map (first ($ propertyTriple) >>> ifThen_else ignore) + [ (hasEmphChanged , emph ) + , (hasChanged isStrong , strong ) + , (hasChanged strikethrough , strikeout ) + ] + where + ifThen_else else' (if',then') = if if' then then' else else' + + ignore = id :: InlineModifier + + getVPosModifier :: PropertyTriple -> InlineModifier + getVPosModifier triple@(_,textProps,_) = + let getVPos = Just . verticalPosition + in case lookupPreviousValueM getVPos triple of + Nothing -> ignore + Just oldVPos -> getVPosModifier' (oldVPos,verticalPosition textProps) + + getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore + getVPosModifier' ( _ , VPosSub ) = subscript + getVPosModifier' ( _ , VPosSuper ) = superscript + getVPosModifier' ( _ , _ ) = ignore + + hasEmphChanged :: PropertyTriple -> Bool + hasEmphChanged = swing any [ hasChanged isEmphasised + , hasChangedM pitch + , hasChanged underline + ] + + hasChanged property triple@(_, property -> newProperty, _) = + maybe True (/=newProperty) (lookupPreviousValue property triple) + + hasChangedM property triple@(_, textProps,_) = + fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple + + lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties) + + lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties) + + lookupPreviousStyleValue f (ReaderState{..},_,mFamily) + = ( findBy f $ extendedStylePropertyChain styleTrace styleSet ) + <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily ) + + +type ParaModifier = Blocks -> Blocks + +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5 +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5 + +-- | Returns either 'id' or 'blockQuote' depending on the current indentation +getParaModifier :: Style -> ParaModifier +getParaModifier Style{..} | Just props <- paraProperties styleProperties + , isBlockQuote (indentation props) + (margin_left props) + = blockQuote + | otherwise + = id + where + isBlockQuote mIndent mMargin + | LengthValueMM indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM indent <- mIndent + , LengthValueMM margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + + | PercentValue indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue indent <- mIndent + , PercentValue margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + + | otherwise + = False + +-- +constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks +constructPara reader = proc blocks -> do + fStyle <- readStyleByName -< blocks + case fStyle of + Left _ -> reader -< blocks + Right style -> do + let modifier = getParaModifier style + blocks' <- reader -< blocks + arr modifier -<< blocks' + + + +type ListConstructor = [Blocks] -> Blocks + +getListConstructor :: ListLevelStyle -> ListConstructor +getListConstructor ListLevelStyle{..} = + case listLevelType of + LltBullet -> bulletList + LltImage -> bulletList + LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat + listNumberDelim = toListNumberDelim listItemPrefix + listItemSuffix + in orderedListWith (1, listNumberStyle, listNumberDelim) + where + toListNumberStyle LinfNone = DefaultStyle + toListNumberStyle LinfNumber = Decimal + toListNumberStyle LinfRomanLC = LowerRoman + toListNumberStyle LinfRomanUC = UpperRoman + toListNumberStyle LinfAlphaLC = LowerAlpha + toListNumberStyle LinfAlphaUC = UpperAlpha + toListNumberStyle (LinfString _) = Example + + toListNumberDelim Nothing (Just ".") = Period + toListNumberDelim (Just "" ) (Just ".") = Period + toListNumberDelim Nothing (Just ")") = OneParen + toListNumberDelim (Just "" ) (Just ")") = OneParen + toListNumberDelim (Just "(") (Just ")") = TwoParens + toListNumberDelim _ _ = DefaultDelim + + +-- | Determines which style to use for a list, which level to use of that +-- style, and which type of list to create as a result of this information. +-- Then prepares the state for eventual child lists and constructs the list from +-- the results. +-- Two main cases are handled: The list may provide its own style or it may +-- rely on a parent list's style. I the former case the current style in the +-- state must be switched before and after the call to the child converter +-- while in the latter the child converter can be called directly. +-- If anything goes wrong, a default ordered-list-constructor is used. +constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks +constructList reader = proc x -> do + modifyExtraState (shiftListLevel 1) -< () + listLevel <- getCurrentListLevel -< () + fStyleName <- findAttr NsText "style-name" -< () + case fStyleName of + Right styleName -> do + fListStyle <- lookupListStyle -< styleName + case fListStyle of + Right listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> do + oldListStyle <- switchCurrentListStyle -< Just listStyle + blocks <- constructListWith listLevelStyle -<< x + switchCurrentListStyle -< oldListStyle + returnA -< blocks + Nothing -> constructOrderedList -< x + Left _ -> constructOrderedList -< x + Left _ -> do + state <- getExtraState -< () + mListStyle <- arr currentListStyle -< state + case mListStyle of + Just listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> constructListWith listLevelStyle -<< x + Nothing -> constructOrderedList -< x + Nothing -> constructOrderedList -< x + where + constructOrderedList = + reader + >>> modifyExtraState (shiftListLevel (-1)) + >>^ orderedList + constructListWith listLevelStyle = + reader + >>> getListConstructor listLevelStyle + ^>> modifyExtraState (shiftListLevel (-1)) + +-------------------------------------------------------------------------------- +-- Readers +-------------------------------------------------------------------------------- + +type ElementMatcher result = (Namespace, ElementName, OdtReader result result) + +type InlineMatcher = ElementMatcher Inlines + +type BlockMatcher = ElementMatcher Blocks + + +-- +matchingElement :: (Monoid e) + => Namespace -> ElementName + -> OdtReaderSafe e e + -> ElementMatcher e +matchingElement ns name reader = (ns, name, asResultAccumulator reader) + where + asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + +-- +matchChildContent' :: (Monoid result) + => [ElementMatcher result] + -> OdtReaderSafe _x result +matchChildContent' ls = returnV mempty >>> matchContent' ls + +-- +matchChildContent :: (Monoid result) + => [ElementMatcher result] + -> OdtReaderSafe (result, XML.Content) result + -> OdtReaderSafe _x result +matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback + + +-------------------------------------------- +-- Matchers +-------------------------------------------- + +---------------------- +-- Basics +---------------------- + +-- +-- | Open Document allows several consecutive spaces if they are marked up +read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines +read_plain_text = fst ^&&& read_plain_text' >>§ recover + where + -- fallible version + read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines + read_plain_text' = ( second ( arr extractText ) + >>^ spreadChoice >>?! second text + ) + >>?§ (<>) + -- + extractText :: XML.Content -> Fallible String + extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText _ = failEmpty + + +-- specifically. I honor that, although the current implementation of '(<>)' +-- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein. +-- The rational is to be prepared for future modifications. +read_spaces :: InlineMatcher +read_spaces = matchingElement NsText "s" ( + readAttrWithDefault NsText "c" 1 -- how many spaces? + >>^ fromList.(`replicate` Space) + ) +-- +read_line_break :: InlineMatcher +read_line_break = matchingElement NsText "line-break" + $ returnV linebreak + +-- +read_span :: InlineMatcher +read_span = matchingElement NsText "span" + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + +-- +read_paragraph :: BlockMatcher +read_paragraph = matchingElement NsText "p" + $ constructPara + $ liftA para + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + + +---------------------- +-- Headers +---------------------- + +-- +read_header :: BlockMatcher +read_header = matchingElement NsText "h" + $ proc blocks -> do + level <- ( readAttrWithDefault NsText "outline-level" 1 + ) -< blocks + children <- ( matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + ) -< blocks + anchor <- getHeaderAnchor -< children + let idAttr = (anchor, [], []) -- no classes, no key-value pairs + arr (uncurry3 headerWith) -< (idAttr, level, children) + +---------------------- +-- Lists +---------------------- + +-- +read_list :: BlockMatcher +read_list = matchingElement NsText "list" +-- $ withIncreasedListLevel + $ constructList +-- $ liftA bulletList + $ matchChildContent' [ read_list_item + ] +-- +read_list_item :: ElementMatcher [Blocks] +read_list_item = matchingElement NsText "list-item" + $ liftA (compactify'.(:[])) + ( matchChildContent' [ read_paragraph + , read_header + , read_list + ] + ) + + +---------------------- +-- Links +---------------------- + +read_link :: InlineMatcher +read_link = matchingElement NsText "a" + $ liftA3 link + ( findAttrWithDefault NsXLink "href" "" ) + ( findAttrWithDefault NsOffice "title" "" ) + ( matchChildContent [ read_span + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text ) + + +------------------------- +-- Footnotes +------------------------- + +read_note :: InlineMatcher +read_note = matchingElement NsText "note" + $ liftA note + $ matchChildContent' [ read_note_body ] + +read_note_body :: BlockMatcher +read_note_body = matchingElement NsText "note-body" + $ matchChildContent' [ read_paragraph ] + +------------------------- +-- Citations +------------------------- + +read_citation :: InlineMatcher +read_citation = matchingElement NsText "bibliography-mark" + $ liftA2 cite + ( liftA2 makeCitation + ( findAttrWithDefault NsText "identifier" "" ) + ( readAttrWithDefault NsText "number" 0 ) + ) + ( matchChildContent [] read_plain_text ) + where + makeCitation :: String -> Int -> [Citation] + makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] + + +---------------------- +-- Tables +---------------------- + +-- +read_table :: BlockMatcher +read_table = matchingElement NsTable "table" + $ liftA (simpleTable []) + $ matchChildContent' [ read_table_row + ] + +-- +read_table_row :: ElementMatcher [[Blocks]] +read_table_row = matchingElement NsTable "table-row" + $ liftA (:[]) + $ matchChildContent' [ read_table_cell + ] + +-- +read_table_cell :: ElementMatcher [Blocks] +read_table_cell = matchingElement NsTable "table-cell" + $ liftA (compactify'.(:[])) + $ matchChildContent' [ read_paragraph + ] + +---------------------- +-- Internal links +---------------------- + +_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ = "anchor" + +-- +readAnchorAttr :: OdtReader _x Anchor +readAnchorAttr = findAttr NsText "name" + +-- | Beware: may fail +findAnchorName :: OdtReader AnchorPrefix Anchor +findAnchorName = ( keepingTheValue readAnchorAttr + >>^ spreadChoice + ) >>?! getPrettyAnchor + + +-- +maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix + -> OdtReaderSafe Inlines Inlines +maybeAddAnchorFrom anchorReader = + keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) + >>> + proc (inlines, fAnchorElem) -> do + case fAnchorElem of + Right anchorElem -> + arr (anchorElem <>) -<< inlines + Left _ -> returnA -< inlines + where + toAnchorElem :: Anchor -> Inlines + toAnchorElem anchorID = spanWith (anchorID, [], []) mempty + -- no classes, no key-value pairs + +-- +read_bookmark :: InlineMatcher +read_bookmark = matchingElement NsText "bookmark" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_bookmark_start :: InlineMatcher +read_bookmark_start = matchingElement NsText "bookmark-start" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_reference_start :: InlineMatcher +read_reference_start = matchingElement NsText "reference-mark-start" + $ maybeAddAnchorFrom readAnchorAttr + +-- | Beware: may fail +findAnchorRef :: OdtReader _x Anchor +findAnchorRef = ( findAttr NsText "ref-name" + >>?^ (_ANCHOR_PREFIX_,) + ) >>?! getPrettyAnchor + + +-- +maybeInAnchorRef :: OdtReaderSafe Inlines Inlines +maybeInAnchorRef = proc inlines -> do + fRef <- findAnchorRef -< () + case fRef of + Right anchor -> + arr (toAnchorRef anchor) -<< inlines + Left _ -> returnA -< inlines + where + toAnchorRef :: Anchor -> Inlines -> Inlines + toAnchorRef anchor = link ('#':anchor) "" -- no title + +-- +read_bookmark_ref :: InlineMatcher +read_bookmark_ref = matchingElement NsText "bookmark-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + +-- +read_reference_ref :: InlineMatcher +read_reference_ref = matchingElement NsText "reference-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + + +---------------------- +-- Entry point +---------------------- + +--read_plain_content :: OdtReaderSafe _x Inlines +--read_plain_content = strContent >>^ text + +read_text :: OdtReaderSafe _x Pandoc +read_text = matchChildContent' [ read_header + , read_paragraph + , read_list + , read_table + ] + >>^ doc + +read_body :: OdtReader _x Pandoc +read_body = executeIn NsOffice "body" + $ executeIn NsOffice "text" + $ liftAsSuccess read_text + diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs new file mode 100644 index 000000000..5922164c9 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -0,0 +1,260 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.Fallible + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Data types and utilities representing failure. Most of it is based on the +"Either" type in its usual configuration (left represents failure). + +In most cases, the failure type is implied or required to be a "Monoid". + +The choice of "Either" instead of a custom type makes it easier to write +compatible instances of "ArrowChoice". +-} + +-- We export everything +module Text.Pandoc.Readers.Odt.Generic.Fallible where + +import Control.Applicative +import Control.Monad + +import qualified Data.Foldable as F +import Data.Monoid + +-- | Default for now. Will probably become a class at some point. +type Failure = () + +type Fallible a = Either Failure a + + +-- | False -> Left (), True -> Right () +boolToEither :: Bool -> Fallible () +boolToEither False = Left () +boolToEither True = Right () + +-- | False -> Left (), True -> Right () +boolToChoice :: Bool -> Fallible () +boolToChoice False = Left () +boolToChoice True = Right () + +-- +maybeToEither :: Maybe a -> Fallible a +maybeToEither (Just a) = Right a +maybeToEither Nothing = Left () + +-- +eitherToMaybe :: Either _l a -> Maybe a +eitherToMaybe (Left _) = Nothing +eitherToMaybe (Right a) = Just a + +-- | > untagEither === either id id +untagEither :: Either a a -> a +untagEither (Left a) = a +untagEither (Right a) = a + +-- | > fromLeft f === either f id +fromLeft :: (a -> b) -> Either a b -> b +fromLeft f (Left a) = f a +fromLeft _ (Right b) = b + +-- | > fromRight f === either id f +fromRight :: (a -> b) -> Either b a -> b +fromRight _ (Left b) = b +fromRight f (Right a) = f a + +-- | > recover a === fromLeft (const a) === either (const a) id +recover :: a -> Either _f a -> a +recover a (Left _) = a +recover _ (Right a) = a + +-- | I would love to use 'fail'. Alas, 'Monad.fail'... +failWith :: failure -> Either failure _x +failWith f = Left f + +-- +failEmpty :: (Monoid failure) => Either failure _x +failEmpty = failWith mempty + +-- +succeedWith :: a -> Either _x a +succeedWith = Right + +-- +collapseEither :: Either failure (Either failure x) + -> Either failure x +collapseEither (Left f ) = Left f +collapseEither (Right (Left f)) = Left f +collapseEither (Right (Right x)) = Right x + +-- | If either of the values represents an error, the result is a +-- (possibly combined) error. If both values represent a success, +-- both are returned. +chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') +chooseMin = chooseMinWith (,) + +-- | If either of the values represents an error, the result is a +-- (possibly combined) error. If both values represent a success, +-- a combination is returned. +chooseMinWith :: (Monoid a) => (b -> b' -> c) + -> Either a b + -> Either a b' + -> Either a c +chooseMinWith (><) (Right a) (Right b) = Right $ a >< b +chooseMinWith _ (Left a) (Left b) = Left $ a <> b +chooseMinWith _ (Left a) _ = Left a +chooseMinWith _ _ (Left b) = Left b + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b +chooseMax = chooseMaxWith (<>) + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMaxWith :: (Monoid a) => (b -> b -> b) + -> Either a b + -> Either a b + -> Either a b +chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b +chooseMaxWith _ (Left a) (Left b) = Left $ a <> b +chooseMaxWith _ (Right a) _ = Right a +chooseMaxWith _ _ (Right b) = Right b + + +-- | Class of containers that can escalate contained 'Either's. +-- The word "Vector" is meant in the sense of a disease transmitter. +class ChoiceVector v where + spreadChoice :: v (Either f a) -> Either f (v a) + +-- Let's do a few examples first + +instance ChoiceVector Maybe where + spreadChoice (Just (Left f)) = Left f + spreadChoice (Just (Right x)) = Right (Just x) + spreadChoice Nothing = Right Nothing + +instance ChoiceVector (Either l) where + spreadChoice (Right (Left f)) = Left f + spreadChoice (Right (Right x)) = Right (Right x) + spreadChoice (Left x ) = Right (Left x) + +instance ChoiceVector ((,) a) where + spreadChoice (_, Left f) = Left f + spreadChoice (x, Right y) = Right (x,y) + -- Wasn't there a newtype somewhere with the elements flipped? + +-- +-- More instances later, first some discussion. +-- +-- I'll have to freshen up on type system details to see how (or if) to do +-- something like +-- +-- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where +-- > : +-- +-- But maybe it would be even better to use something like +-- +-- > class ChoiceVector v v' f | v -> v' f where +-- > spreadChoice :: v -> Either f v' +-- +-- That way, more places in @v@ could spread the cheer, e.g.: +-- +-- As before: +-- -- ( a , Either f b) (a , b) f +-- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where +-- > spreadChoice (_, Left f) = Left f +-- > spreadChoice (a, Right b) = Right (a,b) +-- +-- But also: +-- -- ( Either f a , b) (a , b) f +-- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where +-- > spreadChoice (Right a,b) = Right (a,b) +-- > spreadChoice (Left f,_) = Left f +-- +-- And maybe even: +-- -- ( Either f a , Either f b) (a , b) f +-- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where +-- > spreadChoice (Right a , Right b) = Right (a,b) +-- > spreadChoice (Left f , _ ) = Left f +-- > spreadChoice ( _ , Left f) = Left f +-- +-- Of course that would lead to a lot of overlapping instances... +-- But I can't think of a different way. A selector function might help, +-- but not even a "Data.Traversable" is powerful enough for that. +-- But maybe someone has already solved all this with a lens library. +-- +-- Well, it's an interesting academic question. But for practical purposes, +-- I have more than enough right now. + +instance ChoiceVector ((,,) a b) where + spreadChoice (_,_, Left f) = Left f + spreadChoice (a,b, Right x) = Right (a,b,x) + +instance ChoiceVector ((,,,) a b c) where + spreadChoice (_,_,_, Left f) = Left f + spreadChoice (a,b,c, Right x) = Right (a,b,c,x) + +instance ChoiceVector ((,,,,) a b c d) where + spreadChoice (_,_,_,_, Left f) = Left f + spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) + +instance ChoiceVector (Const a) where + spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types + +-- | Fails on the first error +instance ChoiceVector [] where + spreadChoice = sequence -- using the monad instance of Either. + -- Could be generalized to "Data.Traversable" - but why play + -- with UndecidableInstances unless this is really needed. + +-- | Wrapper for a list. While the normal list instance of 'ChoiceVector' +-- fails whenever it can, this type will never fail. +newtype SuccessList a = SuccessList { collectNonFailing :: [a] } + deriving ( Eq, Ord, Show ) + +instance ChoiceVector SuccessList where + spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing + where unTagRight (Right x) = (x:) + unTagRight _ = id + +-- | Like 'catMaybes', but for 'Either'. +collectRights :: [Either _l r] -> [r] +collectRights = collectNonFailing . untag . spreadChoice . SuccessList + where untag = fromLeft (error "Unexpected Left") + +-- | A version of 'collectRights' generalized to other containers. The +-- container must be both "reducible" and "buildable". Most general containers +-- should fullfill these requirements, but there is no single typeclass +-- (that I know of) for that. +-- Therefore, they are split between 'Foldable' and 'MonadPlus'. +-- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) +collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r +collectRightsF = F.foldr unTagRight mzero + where unTagRight (Right x) = mplus $ return x + unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs new file mode 100644 index 000000000..82ae3e20e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -0,0 +1,62 @@ +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +A class containing a set of namespace identifiers. Used to convert between +typesafe Haskell namespace identifiers and unsafe "real world" namespaces. +-} + +module Text.Pandoc.Readers.Odt.Generic.Namespaces where + +import qualified Data.Map as M + +-- +type NameSpaceIRI = String + +-- +type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI + +-- +class (Eq nsID, Ord nsID) => NameSpaceID nsID where + + -- | Given a IRI, possibly update the map and return the id of the namespace. + -- May fail if the namespace is unknown and the application does not + -- allow unknown namespaces. + getNamespaceID :: NameSpaceIRI + -> NameSpaceIRIs nsID + -> Maybe (NameSpaceIRIs nsID, nsID) + -- | Given a namespace id, lookup its IRI. May be overriden for performance. + getIRI :: nsID + -> NameSpaceIRIs nsID + -> Maybe NameSpaceIRI + -- | The root element of an XML document has a namespace, too, and the + -- "XML.Light-parser" is eager to remove the corresponding namespace + -- attribute. + -- As a result, at least this root namespace must be provided. + getInitialIRImap :: NameSpaceIRIs nsID + + getIRI = M.lookup + getInitialIRImap = M.empty diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs new file mode 100644 index 000000000..afd7d616c --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs @@ -0,0 +1,48 @@ +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.SetMap + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +A map of values to sets of values. +-} + +module Text.Pandoc.Readers.Odt.Generic.SetMap where + +import qualified Data.Map as M +import qualified Data.Set as S + +type SetMap k v = M.Map k (S.Set v) + +empty :: SetMap k v +empty = M.empty + +fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v +fromList = foldr (uncurry insert) empty + +insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v +insert key value setMap = M.insertWith S.union key (S.singleton value) setMap + +union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v +union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3 diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs new file mode 100644 index 000000000..6c10ed61d --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -0,0 +1,171 @@ +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt.Generic.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +General utility functions for the odt reader. +-} + +module Text.Pandoc.Readers.Odt.Generic.Utils +( uncurry3 +, uncurry4 +, uncurry5 +, uncurry6 +, uncurry7 +, uncurry8 +, swap +, reverseComposition +, bool +, tryToRead +, Lookupable(..) +, readLookupables +, readLookupable +, readPercent +, findBy +, swing +, composition +) where + +import Control.Category ( Category, (>>>), (<<<) ) +import qualified Control.Category as Cat ( id ) +import Control.Monad ( msum ) + +import qualified Data.Foldable as F ( Foldable, foldr ) +import Data.Maybe + + +-- | Aequivalent to +-- > foldr (.) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- The noun-form was chosen to be consistend with 'sum', 'product' etc +-- based on the discussion at +-- <https://groups.google.com/forum/#!topic/haskell-cafe/VkOZM1zaHOI> +-- (that I was not part of) +composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +composition = F.foldr (<<<) Cat.id + +-- | Aequivalent to +-- > foldr (flip (.)) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- A reversed version of 'composition'. +reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +reverseComposition = F.foldr (>>>) Cat.id + +-- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'. +-- Note that the first value is selected if the boolean value is 'False'. +-- That makes 'bool' consistent with the other two. Also, 'bool' now takes its +-- arguments in the exact opposite order compared to the normal if construct. +bool :: a -> a -> Bool -> a +bool x _ False = x +bool _ x True = x + +-- | This function often makes it possible to switch values with the functions +-- that are applied to them. +-- +-- Examples: +-- > swing map :: [a -> b] -> a -> [b] +-- > swing any :: [a -> Bool] -> a -> Bool +-- > swing foldr :: b -> a -> [a -> b -> b] -> b +-- > swing scanr :: c -> a -> [a -> c -> c] -> c +-- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c] +-- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool) +-- +-- Stolen from <https://wiki.haskell.org/Pointfree> +swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d +swing = flip.(.flip id) +-- swing f c a = f ($ a) c + + +-- | Alternative to 'read'/'reads'. The former of these throws errors +-- (nobody wants that) while the latter returns "to much" for simple purposes. +-- This function instead applies 'reads' and returns the first match (if any) +-- in a 'Maybe'. +tryToRead :: (Read r) => String -> Maybe r +tryToRead = reads >>> listToMaybe >>> fmap fst + +-- | A version of 'reads' that requires a '%' sign after the number +readPercent :: ReadS Int +readPercent s = [ (i,s') | (i , r ) <- reads s + , ("%" , s') <- lex r + ] + +-- | Data that can be looked up. +-- This is mostly a utility to read data with kind *. +class Lookupable a where + lookupTable :: [(String, a)] + +-- | The idea is to use this function as if there was a declaration like +-- +-- > instance (Lookupable a) => (Read a) where +-- > readsPrec _ = readLookupables +-- . +-- But including this code in this form would need UndecideableInstances. +-- That is a bad idea. Luckily 'readLookupable' (without the s at the end) +-- can be used directly in almost any case. +readLookupables :: (Lookupable a) => String -> [(a,String)] +readLookupables s = [ (a,rest) | (word,rest) <- lex s, + let result = lookup word lookupTable, + isJust result, + let Just a = result + ] + +-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. +readLookupable :: (Lookupable a) => String -> Maybe a +readLookupable s = msum + $ map ((`lookup` lookupTable).fst) + $ lex s + +uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z +uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z +uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z +uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z +uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z +uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z + +uncurry3 fun (a,b,c ) = fun a b c +uncurry4 fun (a,b,c,d ) = fun a b c d +uncurry5 fun (a,b,c,d,e ) = fun a b c d e +uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f +uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g +uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h + +swap :: (a,b) -> (b,a) +swap (a,b) = (b,a) + +-- | A version of "Data.List.find" that uses a converter to a Maybe instance. +-- The returned value is the first which the converter returns in a 'Just' +-- wrapper. +findBy :: (a -> Maybe b) -> [a] -> Maybe b +findBy _ [] = Nothing +findBy f ((f -> Just x):_ ) = Just x +findBy f ( _:xs) = findBy f xs + diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs new file mode 100644 index 000000000..ec7e0ea5e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -0,0 +1,1064 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +A generalized XML parser based on stateful arrows. +It might be sufficient to define this reader as a comonad, but there is +not a lot of use in trying. +-} + +module Text.Pandoc.Readers.Odt.Generic.XMLConverter +( ElementName +, XMLConverterState +, XMLConverter +, FallibleXMLConverter +, swapPosition +, runConverter +, runConverter'' +, runConverter' +, runConverterF' +, runConverterF +, getCurrentElement +, getExtraState +, setExtraState +, modifyExtraState +, convertingExtraState +, producingExtraState +, lookupNSiri +, lookupNSprefix +, readNSattributes +, elemName +, elemNameIs +, strContent +, elContent +, currentElem +, currentElemIs +, expectElement +, elChildren +, findChildren +, filterChildren +, filterChildrenName +, findChild' +, findChild +, filterChild' +, filterChild +, filterChildName' +, filterChildName +, isSet +, isSet' +, isSetWithDefault +, hasAttrValueOf' +, failIfNotAttrValueOf +, isThatTheAttrValue +, searchAttrIn +, searchAttrWith +, searchAttr +, lookupAttr +, lookupAttr' +, lookupAttrWithDefault +, lookupDefaultingAttr +, findAttr' +, findAttr +, findAttrWithDefault +, readAttr +, readAttr' +, readAttrWithDefault +, getAttr +-- , (>/<) +-- , (?>/<) +, executeIn +, collectEvery +, withEveryL +, withEvery +, tryAll +, tryAll' +, IdXMLConverter +, MaybeEConverter +, ElementMatchConverter +, MaybeCConverter +, ContentMatchConverter +, makeMatcherE +, makeMatcherC +, prepareMatchersE +, prepareMatchersC +, matchChildren +, matchContent'' +, matchContent' +, matchContent +) where + +import Control.Applicative hiding ( liftA, liftA2 ) +import Control.Monad ( MonadPlus ) +import Control.Arrow + +import qualified Data.Map as M +import qualified Data.Foldable as F +import Data.Default +import Data.Monoid ( Monoid ) +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Readers.Odt.Arrows.State +import Text.Pandoc.Readers.Odt.Arrows.Utils + +import Text.Pandoc.Readers.Odt.Generic.Namespaces +import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible + +-------------------------------------------------------------------------------- +-- Basis types for readability +-------------------------------------------------------------------------------- + +-- +type ElementName = String +type AttributeName = String +type AttributeValue = String + +-- +type NameSpacePrefix = String + +-- +type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix + +-------------------------------------------------------------------------------- +-- Main converter state +-------------------------------------------------------------------------------- + +-- GADT so some of the NameSpaceID restrictions can be deduced +data XMLConverterState nsID extraState where + XMLConverterState :: NameSpaceID nsID + => { -- | A stack of parent elements. The top element is the current one. + -- Arguably, a real Zipper would be better. But that is an + -- optimization that can be made at a later time, e.g. when + -- replacing Text.XML.Light. + parentElements :: [XML.Element] + -- | A map from internal namespace IDs to the namespace prefixes + -- used in XML elements + , namespacePrefixes :: NameSpacePrefixes nsID + -- | A map from internal namespace IDs to namespace IRIs + -- (Only necessary for matching namespace IDs and prefixes) + , namespaceIRIs :: NameSpaceIRIs nsID + -- | A place to put "something else". This feature is used heavily + -- to keep the main code cleaner. More specifically, the main reader + -- is divided into different stages. Each stage lifts something up + -- here, which the next stage can then use. This could of course be + -- generalized to a state-tree or used for the namespace IRIs. The + -- border between states and values is an imaginary one, after all. + -- But the separation as it is seems to be enough for now. + , moreState :: extraState + } + -> XMLConverterState nsID extraState + +-- +createStartState :: (NameSpaceID nsID) + => XML.Element + -> extraState + -> XMLConverterState nsID extraState +createStartState element extraState = + XMLConverterState + { parentElements = [element] + , namespacePrefixes = M.empty + , namespaceIRIs = getInitialIRImap + , moreState = extraState + } + +-- | Functor over extra state +instance Functor (XMLConverterState nsID) where + fmap f ( XMLConverterState parents prefixes iRIs extraState ) + = XMLConverterState parents prefixes iRIs (f extraState) + +-- +replaceExtraState :: extraState + -> XMLConverterState nsID _x + -> XMLConverterState nsID extraState +replaceExtraState x s + = fmap (const x) s + +-- +currentElement :: XMLConverterState nsID extraState + -> XML.Element +currentElement state = head (parentElements state) + +-- | Replace the current position by another, modifying the extra state +-- in the process +swapPosition :: (extraState -> extraState') + -> [XML.Element] + -> XMLConverterState nsID extraState + -> XMLConverterState nsID extraState' +swapPosition f stack state + = state { parentElements = stack + , moreState = f (moreState state) + } + +-- | Replace the current position by another, modifying the extra state +-- in the process +swapStack' :: XMLConverterState nsID extraState + -> [XML.Element] + -> ( XMLConverterState nsID extraState , [XML.Element] ) +swapStack' state stack + = ( state { parentElements = stack } + , parentElements state + ) + +-- +pushElement :: XML.Element + -> XMLConverterState nsID extraState + -> XMLConverterState nsID extraState +pushElement e state = state { parentElements = e:(parentElements state) } + +-- | Pop the top element from the call stack, unless it is the last one. +popElement :: XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) +popElement state + | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es } + | otherwise = Nothing + +-------------------------------------------------------------------------------- +-- Main type +-------------------------------------------------------------------------------- + +-- It might be a good idea to pack the converters in a GADT +-- Downside: data instead of type +-- Upside: 'Failure' could be made a parameter as well. + +-- +type XMLConverter nsID extraState input output + = ArrowState (XMLConverterState nsID extraState ) input output + +type FallibleXMLConverter nsID extraState input output + = XMLConverter nsID extraState input (Fallible output) + +-- +runConverter :: XMLConverter nsID extraState input output + -> XMLConverterState nsID extraState + -> input + -> output +runConverter converter state input = snd $ runArrowState converter (state,input) + +-- +runConverter'' :: (NameSpaceID nsID) + => XMLConverter nsID extraState (Fallible ()) output + -> extraState + -> XML.Element + -> output +runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () + +runConverter' :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState () success + -> extraState + -> XML.Element + -> Fallible success +runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () + +-- +runConverterF' :: FallibleXMLConverter nsID extraState x y + -> XMLConverterState nsID extraState + -> Fallible x -> Fallible y +runConverterF' a s e = runConverter (returnV e >>? a) s e + +-- +runConverterF :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState XML.Element x + -> extraState + -> Fallible XML.Element -> Fallible x +runConverterF a s = either failWith + (\e -> runConverter a (createStartState e s) e) + +-- +getCurrentElement :: XMLConverter nsID extraState x XML.Element +getCurrentElement = extractFromState currentElement + +-- +getExtraState :: XMLConverter nsID extraState x extraState +getExtraState = extractFromState moreState + +-- +setExtraState :: XMLConverter nsID extraState extraState extraState +setExtraState = withState $ \state extra + -> (replaceExtraState extra state , extra) + + +-- | Lifts a function to the extra state. +modifyExtraState :: (extraState -> extraState) + -> XMLConverter nsID extraState x x +modifyExtraState = modifyState.fmap + + +-- | First sets the extra state to the new value. Then modifies the original +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- convertingExtraState () converter >>> doOtherStuff) +-- +convertingExtraState :: extraState' + -> FallibleXMLConverter nsID extraState' extraState extraState + -> FallibleXMLConverter nsID extraState x x +convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA + where + setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v + modifyWithA = keepingTheValue (moreState ^>> a) + >>^ spreadChoice >>?§ flip replaceExtraState + +-- | First sets the extra state to the new value. Then produces a new +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- Aequivalent to +-- +-- > \v x a -> convertingExtraState v (returnV x >>> a) +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- producingExtraState () () producer >>> doOtherStuff) +-- +producingExtraState :: extraState' + -> a + -> FallibleXMLConverter nsID extraState' a extraState + -> FallibleXMLConverter nsID extraState x x +producingExtraState v x a = convertingExtraState v (returnV x >>> a) + + +-------------------------------------------------------------------------------- +-- Work in namespaces +-------------------------------------------------------------------------------- + +-- | Arrow version of 'getIRI' +lookupNSiri :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpaceIRI) +lookupNSiri nsID = extractFromState + $ \state -> getIRI nsID $ namespaceIRIs state + +-- +lookupNSprefix :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpacePrefix) +lookupNSprefix nsID = extractFromState + $ \state -> M.lookup nsID $ namespacePrefixes state + +-- | Extracts namespace attributes from the current element and tries to +-- update the current mapping accordingly +readNSattributes :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState x () +readNSattributes = fromState $ \state -> maybe (state, failEmpty ) + ( , succeedWith ()) + (extractNSAttrs state ) + where + extractNSAttrs :: (NameSpaceID nsID) + => XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) + extractNSAttrs startState + = foldl (\state d -> state >>= addNS d) + (Just startState) + nsAttribs + where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) + element = currentElement startState + readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri) + = Just (name, iri) + readNSattr _ = Nothing + addNS (prefix, iri) state = fmap updateState + $ getNamespaceID iri + $ namespaceIRIs state + where updateState (iris,nsID) + = state { namespaceIRIs = iris + , namespacePrefixes = M.insert nsID prefix + $ namespacePrefixes state + } + +-------------------------------------------------------------------------------- +-- Common namespace accessors +-------------------------------------------------------------------------------- + +-- | Given a namespace id and an element name, creates a 'XML.QName' for +-- internal use +elemName :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x XML.QName +elemName nsID name = lookupNSiri nsID + &&& lookupNSprefix nsID + >>§ XML.QName name + +-- | Checks if a given element matches both a specified namespace id +-- and a specified element name +elemNameIs :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState XML.Element Bool +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName + where hasThatName e iri = let elName = XML.elName e + in XML.qName elName == name + && XML.qURI elName == iri + +-------------------------------------------------------------------------------- +-- General content +-------------------------------------------------------------------------------- + +-- +strContent :: XMLConverter nsID extraState x String +strContent = getCurrentElement + >>^ XML.strContent + +-- +elContent :: XMLConverter nsID extraState x [XML.Content] +elContent = getCurrentElement + >>^ XML.elContent + +-------------------------------------------------------------------------------- +-- Current element +-------------------------------------------------------------------------------- + +-- +currentElem :: XMLConverter nsID extraState x (XML.QName) +currentElem = getCurrentElement + >>^ XML.elName + +currentElemIs :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x Bool +currentElemIs nsID name = getCurrentElement + >>> elemNameIs nsID name + + + +{- +currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> + (XML.qName >>^ (&&).(== name) ) + ^&&&^ + (XML.qIRI >>^ (==) ) + ) >>§ (.) + ) &&& lookupNSiri nsID >>§ ($) +-} + +-- +expectElement :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState x () +expectElement nsID name = currentElemIs nsID name + >>^ boolToChoice + +-------------------------------------------------------------------------------- +-- Chilren +-------------------------------------------------------------------------------- + +-- +elChildren :: XMLConverter nsID extraState x [XML.Element] +elChildren = getCurrentElement + >>^ XML.elChildren + +-- +findChildren :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x [XML.Element] +findChildren nsID name = elemName nsID name + &&& getCurrentElement + >>§ XML.findChildren + +-- +filterChildren :: (XML.Element -> Bool) + -> XMLConverter nsID extraState x [XML.Element] +filterChildren p = getCurrentElement + >>^ XML.filterChildren p + +-- +filterChildrenName :: (XML.QName -> Bool) + -> XMLConverter nsID extraState x [XML.Element] +filterChildrenName p = getCurrentElement + >>^ XML.filterChildrenName p + +-- +findChild' :: (NameSpaceID nsID) + => nsID + -> ElementName + -> XMLConverter nsID extraState x (Maybe XML.Element) +findChild' nsID name = elemName nsID name + &&& getCurrentElement + >>§ XML.findChild + +-- +findChild :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState x XML.Element +findChild nsID name = findChild' nsID name + >>> maybeToChoice + +-- +filterChild' :: (XML.Element -> Bool) + -> XMLConverter nsID extraState x (Maybe XML.Element) +filterChild' p = getCurrentElement + >>^ XML.filterChild p + +-- +filterChild :: (XML.Element -> Bool) + -> FallibleXMLConverter nsID extraState x XML.Element +filterChild p = filterChild' p + >>> maybeToChoice + +-- +filterChildName' :: (XML.QName -> Bool) + -> XMLConverter nsID extraState x (Maybe XML.Element) +filterChildName' p = getCurrentElement + >>^ XML.filterChildName p + +-- +filterChildName :: (XML.QName -> Bool) + -> FallibleXMLConverter nsID extraState x XML.Element +filterChildName p = filterChildName' p + >>> maybeToChoice + + +-------------------------------------------------------------------------------- +-- Attributes +-------------------------------------------------------------------------------- + +-- +isSet :: (NameSpaceID nsID) + => nsID -> AttributeName + -> (Either Failure Bool) + -> FallibleXMLConverter nsID extraState x Bool +isSet nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt stringToBool + +-- +isSet' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe Bool) +isSet' nsID attrName = findAttr' nsID attrName + >>^ (>>= stringToBool') + +isSetWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> Bool + -> XMLConverter nsID extraState x Bool +isSetWithDefault nsID attrName def' + = isSet' nsID attrName + >>^ fromMaybe def' + +-- +hasAttrValueOf' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> XMLConverter nsID extraState x Bool +hasAttrValueOf' nsID attrName attrValue + = findAttr nsID attrName + >>> ( const False ^|||^ (==attrValue)) + +-- +failIfNotAttrValueOf :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> FallibleXMLConverter nsID extraState x () +failIfNotAttrValueOf nsID attrName attrValue + = hasAttrValueOf' nsID attrName attrValue + >>^ boolToChoice + +-- | Is the value that is currently transported in the arrow the value of +-- the specified attribute? +isThatTheAttrValue :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState AttributeValue Bool +isThatTheAttrValue nsID attrName + = keepingTheValue + (findAttr nsID attrName) + >>§ right.(==) + +-- | Lookup value in a dictionary, fail if no attribute found or value +-- not in dictionary +searchAttrIn :: (NameSpaceID nsID) + => nsID -> AttributeName + -> [(AttributeValue,a)] + -> FallibleXMLConverter nsID extraState x a +searchAttrIn nsID attrName dict + = findAttr nsID attrName + >>?^? maybeToChoice.(`lookup` dict ) + + +-- | Lookup value in a dictionary. Fail if no attribute found. If value not in +-- dictionary, return default value +searchAttrWith :: (NameSpaceID nsID) + => nsID -> AttributeName + -> a + -> [(AttributeValue,a)] + -> FallibleXMLConverter nsID extraState x a +searchAttrWith nsID attrName defV dict + = findAttr nsID attrName + >>?^ (fromMaybe defV).(`lookup` dict ) + +-- | Lookup value in a dictionary. If attribute or value not found, +-- return default value +searchAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> a + -> [(AttributeValue,a)] + -> XMLConverter nsID extraState x a +searchAttr nsID attrName defV dict + = searchAttrIn nsID attrName dict + >>> const defV ^|||^ id + +-- | Read a 'Lookupable' attribute. Fail if no match. +lookupAttr :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x a +lookupAttr nsID attrName = lookupAttr' nsID attrName + >>^ maybeToChoice + + +-- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'. +lookupAttr' :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe a) +lookupAttr' nsID attrName + = findAttr' nsID attrName + >>^ (>>= readLookupable) + +-- | Read a 'Lookupable' attribute with explicit default +lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> a + -> XMLConverter nsID extraState x a +lookupAttrWithDefault nsID attrName deflt + = lookupAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read a 'Lookupable' attribute with implicit default +lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x a +lookupDefaultingAttr nsID attrName + = lookupAttrWithDefault nsID attrName def + +-- | Return value as a (Maybe String) +findAttr' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe AttributeValue) +findAttr' nsID attrName = elemName nsID attrName + &&& getCurrentElement + >>§ XML.findAttr + +-- | Return value as string or fail +findAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x AttributeValue +findAttr nsID attrName = findAttr' nsID attrName + >>> maybeToChoice + +-- | Return value as string or return provided default value +findAttrWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> XMLConverter nsID extraState x AttributeValue +findAttrWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read and return value or fail +readAttr :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x attrValue +readAttr nsID attrName = readAttr' nsID attrName + >>> maybeToChoice + +-- | Read and return value or return Nothing +readAttr' :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe attrValue) +readAttr' nsID attrName = findAttr' nsID attrName + >>^ (>>= tryToRead) + +-- | Read and return value or return provided default value +readAttrWithDefault :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> attrValue + -> XMLConverter nsID extraState x attrValue +readAttrWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ (>>= tryToRead) + >>^ fromMaybe deflt + +-- | Read and return value or return default value from 'Default' instance +getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x attrValue +getAttr nsID attrName = readAttrWithDefault nsID attrName def + +-------------------------------------------------------------------------------- +-- Movements +-------------------------------------------------------------------------------- + +-- +jumpThere :: XMLConverter nsID extraState XML.Element XML.Element +jumpThere = withState (\state element + -> ( pushElement element state , element ) + ) + +-- +swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element] +swapStack = withState swapStack' + +-- +jumpBack :: FallibleXMLConverter nsID extraState _x _x +jumpBack = tryModifyState (popElement >>> maybeToChoice) + +-- | Support function for "procedural" converters: jump to an element, execute +-- a converter, jump back. +-- This version is safer than 'executeThere', because it does not rely on the +-- internal stack. As a result, the converter can not move around in arbitrary +-- ways. The downside is of course that some of the environment is not +-- accessible to the converter. +switchingTheStack :: XMLConverter nsID moreState a b + -> XMLConverter nsID moreState (a, XML.Element) b +switchingTheStack a = second ( (:[]) ^>> swapStack ) + >>> first a + >>> second swapStack + >>^ fst + +-- | Support function for "procedural" converters: jumps to an element, executes +-- a converter, jumps back. +-- Make sure that the converter is well-behaved; that is it should +-- return to the exact position it started from in /every possible path/ of +-- execution, even if it "fails". If it does not, you may encounter +-- strange bugs. If you are not sure about the behaviour or want to use +-- shortcuts, you can often use 'switchingTheStack' instead. +executeThere :: FallibleXMLConverter nsID moreState a b + -> FallibleXMLConverter nsID moreState (a, XML.Element) b +executeThere a = second jumpThere + >>> fst + ^>> a + >>> jumpBack -- >>? jumpBack would not ensure the jump. + >>^ collapseEither + +-- | Do something in a sub-element, tnen come back +executeIn :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState f s + -> FallibleXMLConverter nsID extraState f s +executeIn nsID name a = keepingTheValue + (findChild nsID name) + >>> ignoringState liftFailure + >>? switchingTheStack a + where liftFailure (_, (Left f)) = Left f + liftFailure (x, (Right e)) = Right (x, e) + +-------------------------------------------------------------------------------- +-- Iterating over children +-------------------------------------------------------------------------------- + +-- Helper converter to prepare different types of iterations. +-- It lifts the children (of a certain type) of the current element +-- into the value level and pairs each one with the current input value. +prepareIteration :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState b [(b, XML.Element)] +prepareIteration nsID name = keepingTheValue + (findChildren nsID name) + >>§ distributeValue + +-- | Applies a converter to every child element of a specific type. +-- Collects results in a 'Monoid'. +-- Fails completely if any conversion fails. +collectEvery :: (NameSpaceID nsID, Monoid m) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a m + -> FallibleXMLConverter nsID extraState a m +collectEvery nsID name a = prepareIteration nsID name + >>> foldS' (switchingTheStack a) + +-- +withEveryL :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a [b] +withEveryL = withEvery + +-- | Applies a converter to every child element of a specific type. +-- Collects results in a 'MonadPlus'. +-- Fails completely if any conversion fails. +withEvery :: (NameSpaceID nsID, MonadPlus m) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a (m b) +withEvery nsID name a = prepareIteration nsID name + >>> iterateS' (switchingTheStack a) + +-- | Applies a converter to every child element of a specific type. +-- Collects all successful results in a list. +tryAll :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState b a + -> XMLConverter nsID extraState b [a] +tryAll nsID name a = prepareIteration nsID name + >>> iterateS (switchingTheStack a) + >>^ collectRights + +-- | Applies a converter to every child element of a specific type. +-- Collects all successful results. +tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState b a + -> XMLConverter nsID extraState b (c a) +tryAll' nsID name a = prepareIteration nsID name + >>> iterateS (switchingTheStack a) + >>^ collectRightsF + +-------------------------------------------------------------------------------- +-- Matching children +-------------------------------------------------------------------------------- + +type IdXMLConverter nsID moreState x + = XMLConverter nsID moreState x x + +type MaybeEConverter nsID moreState x + = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) + +-- Chainable converter that helps deciding which converter to actually use. +type ElementMatchConverter nsID extraState x + = IdXMLConverter nsID + extraState + (MaybeEConverter nsID extraState x, XML.Element) + +type MaybeCConverter nsID moreState x + = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) + +-- Chainable converter that helps deciding which converter to actually use. +type ContentMatchConverter nsID extraState x + = IdXMLConverter nsID + extraState + (MaybeCConverter nsID extraState x, XML.Content) + +-- Helper function: The @c@ is actually a converter that is to be selected by +-- matching XML elements to the first two parameters. +-- The fold used to match elements however is very simple, so to use it, +-- this function wraps the converter in another converter that unifies +-- the accumulator. Think of a lot of converters with the resulting type +-- chained together. The accumulator not only transports the element +-- unchanged to the next matcher, it also does the actual selecting by +-- combining the intermediate results with '(<|>)'. +makeMatcherE :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a a + -> ElementMatchConverter nsID extraState a +makeMatcherE nsID name c = ( second ( + elemNameIs nsID name + >>^ bool Nothing (Just tryC) + ) + >>§ (<|>) + ) &&&^ snd + where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + +-- Helper function: The @c@ is actually a converter that is to be selected by +-- matching XML content to the first two parameters. +-- The fold used to match elements however is very simple, so to use it, +-- this function wraps the converter in another converter that unifies +-- the accumulator. Think of a lot of converters with the resulting type +-- chained together. The accumulator not only transports the element +-- unchanged to the next matcher, it also does the actual selecting by +-- combining the intermediate results with '(<|>)'. +makeMatcherC :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a a + -> ContentMatchConverter nsID extraState a +makeMatcherC nsID name c = ( second ( contentToElem + >>> returnV Nothing + ||| ( elemNameIs nsID name + >>^ bool Nothing (Just cWithJump) + ) + ) + >>§ (<|>) + ) &&&^ snd + where cWithJump = ( fst + ^&&& ( second contentToElem + >>> spreadChoice + ^>>? executeThere c + ) + >>§ recover) + &&&^ snd + contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element + contentToElem = arr $ \e -> case e of + XML.Elem e' -> succeedWith e' + _ -> failEmpty + +-- Creates and chains a bunch of matchers +prepareMatchersE :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] + -> ElementMatchConverter nsID extraState x +--prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) +prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) + +-- Creates and chains a bunch of matchers +prepareMatchersC :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] + -> ContentMatchConverter nsID extraState x +--prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC) +prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) + +-- | Takes a list of element-data - converter groups and +-- * Finds all children of the current element +-- * Matches each group to each child in order (at most one group per child) +-- * Filters non-matched children +-- * Chains all found converters in child-order +-- * Applies the chain to the input element +matchChildren :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchChildren lookups = let matcher = prepareMatchersE lookups + in keepingTheValue ( + elChildren + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the element and drop the element + -- in the return value + swallowElem element converter = (,element) ^>> converter >>^ fst + +-- +matchContent'' :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchContent'' lookups = let matcher = prepareMatchersC lookups + in keepingTheValue ( + elContent + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the content and drop the content + -- in the return value + swallowContent content converter = (,content) ^>> converter >>^ fst + + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Filters non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent' :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchContent' lookups = matchContent lookups (arr fst) + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Adds a default converter for all non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState (a,XML.Content) a + -> XMLConverter nsID extraState a a +matchContent lookups fallback + = let matcher = prepareMatchersC lookups + in keepingTheValue ( + elContent + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ map swallowOrFallback + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the content and drop the content + -- in the return value + swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst + swallowOrFallback (Nothing ,content) = (,content) ^>> fallback + +-------------------------------------------------------------------------------- +-- Internals +-------------------------------------------------------------------------------- + +stringToBool :: (Monoid failure) => String -> Either failure Bool +stringToBool val -- stringToBool' val >>> maybeToChoice + | val `elem` trueValues = succeedWith True + | val `elem` falseValues = succeedWith False + | otherwise = failEmpty + where trueValues = ["true" ,"on" ,"1"] + falseValues = ["false","off","0"] + +stringToBool' :: String -> Maybe Bool +stringToBool' val | val `elem` trueValues = Just True + | val `elem` falseValues = Just False + | otherwise = Nothing + where trueValues = ["true" ,"on" ,"1"] + falseValues = ["false","off","0"] + + +distributeValue :: a -> [b] -> [(a,b)] +distributeValue = map.(,) + +-------------------------------------------------------------------------------- + +{- +NOTES +It might be a good idea to refactor the namespace stuff. +E.g.: if a namespace constructor took a string as a parameter, things like +> a ?>/< (NsText,"body") +would be nicer. +Together with a rename and some trickery, something like +> |< NsText "body" >< NsText "p" ?> a </> </>| +might even be possible. + +Some day, XML.Light should be replaced by something better. +While doing that, it might be useful to replace String as the type of element +names with something else, too. (Of course with OverloadedStrings). +While doing that, maybe the types can be created in a way that something like +> NsText:"body" +could be used. Overloading (:) does not sounds like the best idea, but if the +element name type was a list, this might be possible. +Of course that would be a bit hackish, so the "right" way would probably be +something like +> InNS NsText "body" +but isn't that a bit boring? ;) +-} diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs new file mode 100644 index 000000000..e28056814 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -0,0 +1,110 @@ +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Namespaces used in odt files. +-} + +module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) + ) where + +import Data.List ( isPrefixOf ) +import Data.Maybe ( fromMaybe, listToMaybe ) +import qualified Data.Map as M ( empty, insert ) + +import Text.Pandoc.Readers.Odt.Generic.Namespaces + + +instance NameSpaceID Namespace where + + getInitialIRImap = nsIDmap + + getNamespaceID "" m = Just(m, NsXML) + getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri) + where asPair nsID = Just (M.insert nsID iri m, nsID) + + +findID :: NameSpaceIRI -> Maybe Namespace +findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri] + +nsIDmap :: NameSpaceIRIs Namespace +nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs + +data Namespace = -- Open Document core + NsOffice | NsStyle | NsText | NsTable | NsForm + | NsDraw | Ns3D | NsAnim | NsChart | NsConfig + | NsDB | NsMeta | NsNumber | NsScript | NsManifest + | NsPresentation + -- Metadata + | NsODF + -- Compatible elements + | NsXSL_FO | NsSVG | NsSmil + -- External standards + | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL + | NsDublinCore + -- Metadata manifest + | NsPKG + -- Others + | NsOpenFormula + -- Core XML (basically only for the 'id'-attribute) + | NsXML + -- Fallback + | NsOther String + deriving ( Eq, Ord, Show ) + +-- | Not the actual iri's, but large prefixes of them - this way there are +-- less versioning problems and the like. +nsIDs :: [(String,Namespace)] +nsIDs = [ + ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), + ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), + ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ), + ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ), + ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ), + ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ), + ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ), + ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ), + ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ), + ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ), + ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ), + ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ), + ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ), + ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ), + ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ), + ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ), + ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ), + ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ), + ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ), + ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ), + ("http://purl.org/dc/elements" , NsDublinCore ), + ("http://www.w3.org/2003/g/data-view" , NsGRDDL ), + ("http://www.w3.org/1998/Math/MathML" , NsMathML ), + ("http://www.w3.org/1999/xhtml" , NsXHtml ), + ("http://www.w3.org/2002/xforms" , NsXForms ), + ("http://www.w3.org/1999/xlink" , NsXLink ) + ]
\ No newline at end of file diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs new file mode 100644 index 000000000..1cf87cc59 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -0,0 +1,737 @@ +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE Arrows #-} + +{- +Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.StyleReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> + Stability : alpha + Portability : portable + +Reader for the style information in an odt document. +-} + +module Text.Pandoc.Readers.Odt.StyleReader +( Style (..) +, StyleName +, StyleFamily (..) +, Styles (..) +, StyleProperties (..) +, TextProperties (..) +, ParaProperties (..) +, VerticalTextPosition (..) +, ListItemNumberFormat (..) +, ListLevel +, ListStyle (..) +, ListLevelStyle (..) +, ListLevelType (..) +, LengthOrPercent (..) +, lookupStyle +, getTextProperty +, getTextProperty' +, getParaProperty +, getListStyle +, getListLevelStyle +, getStyleFamily +, lookupDefaultStyle +, lookupDefaultStyle' +, lookupListStyleByName +, getPropertyChain +, textPropertyChain +, stylePropertyChain +, stylePropertyChain' +, getStylePropertyChain +, extendedStylePropertyChain +, extendedStylePropertyChain' +, liftStyles +, readStylesAt +) where + +import Control.Arrow +import Control.Applicative hiding ( liftA, liftA2, liftA3 ) + +import qualified Data.Foldable as F +import qualified Data.Map as M +import qualified Data.Set as S +import Data.List ( unfoldr ) +import Data.Default +import Data.Monoid +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Readers.Odt.Arrows.State +import Text.Pandoc.Readers.Odt.Arrows.Utils + +import Text.Pandoc.Readers.Odt.Generic.Utils +import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.XMLConverter + +import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.Base + + +readStylesAt :: XML.Element -> Fallible Styles +readStylesAt e = runConverter' readAllStyles mempty e + +-------------------------------------------------------------------------------- +-- Reader for font declarations and font pitches +-------------------------------------------------------------------------------- + +-- Pandoc has no support for different font pitches. Yet knowing them can be +-- very helpful in cases where Pandoc has more semantics than OpenDocument. +-- In these cases, the pitch can help deciding as what to define a block of +-- text. So let's start with a type for font pitches: + +data FontPitch = PitchVariable | PitchFixed + deriving ( Eq, Show ) + +instance Lookupable FontPitch where + lookupTable = [ ("variable" , PitchVariable) + , ("fixed" , PitchFixed ) + ] + +instance Default FontPitch where + def = PitchVariable + +-- The font pitch can be specifed in a style directly. Normally, however, +-- it is defined in the font. That is also the specs' recommendation. +-- +-- Thus, we want + +type FontFaceName = String + +type FontPitches = M.Map FontFaceName FontPitch + +-- To get there, the fonts have to be read and the pitches extracted. +-- But the resulting map are only needed at one later place, so it should not be +-- transported on the value level, especially as we already use a state arrow. +-- So instead, the resulting map is lifted into the state of the reader. +-- (An alternative might be ImplicitParams, but again, we already have a state.) +-- +-- So the main style readers will have the types +type StyleReader a b = XMLReader FontPitches a b +-- and +type StyleReaderSafe a b = XMLReaderSafe FontPitches a b +-- respectively. +-- +-- But before we can work with these, we need to define the reader that reads +-- the fonts: + +-- | A reader for font pitches +fontPitchReader :: XMLReader _s _x FontPitches +fontPitchReader = executeIn NsOffice "font-face-decls" ( + ( withEveryL NsStyle "font-face" $ liftAsSuccess ( + findAttr' NsStyle "name" + &&& + lookupDefaultingAttr NsStyle "font-pitch" + ) + ) + >>?^ ( M.fromList . (foldl accumLegalPitches []) ) + ) + where accumLegalPitches ls (Nothing,_) = ls + accumLegalPitches ls (Just n,p) = (n,p):ls + + +-- | A wrapper around the font pitch reader that lifts the result into the +-- state. +readFontPitches :: StyleReader x x +readFontPitches = producingExtraState () () fontPitchReader + + +-- | Looking up a pitch in the state of the arrow. +-- +-- The function does the following: +-- * Look for the font pitch in an attribute. +-- * If that fails, look for the font name, look up the font in the state +-- and use the pitch from there. +-- * Return the result in a Maybe +-- +findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) +findPitch = ( lookupAttr NsStyle "font-pitch" + `ifFailedDo` findAttr NsStyle "font-name" + >>? ( keepingTheValue getExtraState + >>§ M.lookup + >>^ maybeToChoice + ) + ) + >>> choiceToMaybe + +-------------------------------------------------------------------------------- +-- Definitions of main data +-------------------------------------------------------------------------------- + +type StyleName = String + +-- | There are two types of styles: named styles with a style family and an +-- optional style parent, and default styles for each style family, +-- defining default style properties +data Styles = Styles + { stylesByName :: M.Map StyleName Style + , listStylesByName :: M.Map StyleName ListStyle + , defaultStyleMap :: M.Map StyleFamily StyleProperties + } + deriving ( Show ) + +-- Styles from a monoid under union +instance Monoid Styles where + mempty = Styles M.empty M.empty M.empty + mappend (Styles sBn1 dSm1 lsBn1) + (Styles sBn2 dSm2 lsBn2) + = Styles (M.union sBn1 sBn2) + (M.union dSm1 dSm2) + (M.union lsBn1 lsBn2) + +-- Not all families from the specifications are implemented, only those we need. +-- But there are none that are not mentioned here. +data StyleFamily = FaText | FaParagraph +-- | FaTable | FaTableCell | FaTableColumn | FaTableRow +-- | FaGraphic | FaDrawing | FaChart +-- | FaPresentation +-- | FaRuby + deriving ( Eq, Ord, Show ) + +instance Lookupable StyleFamily where + lookupTable = [ ( "text" , FaText ) + , ( "paragraph" , FaParagraph ) +-- , ( "table" , FaTable ) +-- , ( "table-cell" , FaTableCell ) +-- , ( "table-column" , FaTableColumn ) +-- , ( "table-row" , FaTableRow ) +-- , ( "graphic" , FaGraphic ) +-- , ( "drawing-page" , FaDrawing ) +-- , ( "chart" , FaChart ) +-- , ( "presentation" , FaPresentation ) +-- , ( "ruby" , FaRuby ) + ] + +-- | A named style +data Style = Style { styleFamily :: Maybe StyleFamily + , styleParentName :: Maybe StyleName + , listStyle :: Maybe StyleName + , styleProperties :: StyleProperties + } + deriving ( Eq, Show ) + +data StyleProperties = SProps { textProperties :: Maybe TextProperties + , paraProperties :: Maybe ParaProperties +-- , tableColProperties :: Maybe TColProperties +-- , tableRowProperties :: Maybe TRowProperties +-- , tableCellProperties :: Maybe TCellProperties +-- , tableProperties :: Maybe TableProperties +-- , graphicProperties :: Maybe GraphProperties + } + deriving ( Eq, Show ) + +instance Default StyleProperties where + def = SProps { textProperties = Just def + , paraProperties = Just def + } + +data TextProperties = PropT { isEmphasised :: Bool + , isStrong :: Bool + , pitch :: Maybe FontPitch + , verticalPosition :: VerticalTextPosition + , underline :: Maybe UnderlineMode + , strikethrough :: Maybe UnderlineMode + } + deriving ( Eq, Show ) + +instance Default TextProperties where + def = PropT { isEmphasised = False + , isStrong = False + , pitch = Just def + , verticalPosition = def + , underline = Nothing + , strikethrough = Nothing + } + +data ParaProperties = PropP { paraNumbering :: ParaNumbering + , indentation :: LengthOrPercent + , margin_left :: LengthOrPercent + } + deriving ( Eq, Show ) + +instance Default ParaProperties where + def = PropP { paraNumbering = NumberingNone + , indentation = def + , margin_left = def + } + +---- +-- All the little data types that make up the properties +---- + +data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub + deriving ( Eq, Show ) + +instance Default VerticalTextPosition where + def = VPosNormal + +instance Read VerticalTextPosition where + readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ] + ++ [ (VPosSuper , s') | ("super" , s') <- lexS ] + ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ] + where + lexS = lex s + signumToVPos n | n < 0 = VPosSub + | n > 0 = VPosSuper + | otherwise = VPosNormal + +data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace + deriving ( Eq, Show ) + +instance Lookupable UnderlineMode where + lookupTable = [ ( "continuous" , UnderlineModeNormal ) + , ( "skip-white-space" , UnderlineModeSkipWhitespace ) + ] + + +data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int + deriving ( Eq, Show ) + +data LengthOrPercent = LengthValueMM Int | PercentValue Int + deriving ( Eq, Show ) + +instance Default LengthOrPercent where + def = LengthValueMM 0 + +instance Read LengthOrPercent where + readsPrec _ s = + [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s] + ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s + , (unit , s'') <- reads s' + , let lengthMM = estimateInMillimeter + length' unit + ] + +data XslUnit = XslUnitMM | XslUnitCM + | XslUnitInch + | XslUnitPoints | XslUnitPica + | XslUnitPixel + | XslUnitEM + +instance Show XslUnit where + show XslUnitMM = "mm" + show XslUnitCM = "cm" + show XslUnitInch = "in" + show XslUnitPoints = "pt" + show XslUnitPica = "pc" + show XslUnitPixel = "px" + show XslUnitEM = "em" + +instance Read XslUnit where + readsPrec _ "mm" = [(XslUnitMM , "")] + readsPrec _ "cm" = [(XslUnitCM , "")] + readsPrec _ "in" = [(XslUnitInch , "")] + readsPrec _ "pt" = [(XslUnitPoints , "")] + readsPrec _ "pc" = [(XslUnitPica , "")] + readsPrec _ "px" = [(XslUnitPixel , "")] + readsPrec _ "em" = [(XslUnitEM , "")] + readsPrec _ _ = [] + +-- | Rough conversion of measures into millimeters. +-- Pixels and em's are actually implemetation dependant/relative measures, +-- so I could not really easily calculate anything exact here even if I wanted. +-- But I do not care about exactness right now, as I only use measures +-- to determine if a paragraph is "indented" or not. +estimateInMillimeter :: Int -> XslUnit -> Int +estimateInMillimeter n XslUnitMM = n +estimateInMillimeter n XslUnitCM = n * 10 +estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4 +estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4 +estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4 +estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4 +estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4 + + +---- +-- List styles +---- + +type ListLevel = Int + +newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle + } + deriving ( Eq, Show ) + +-- +getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle +getListLevelStyle level ListStyle{..} = + let (lower , exactHit , _) = M.splitLookup level levelStyles + in exactHit <|> fmap fst (M.maxView lower) + -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] + -- ^ simpler, but in general less efficient + +data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType + , listItemPrefix :: Maybe String + , listItemSuffix :: Maybe String + , listItemFormat :: ListItemNumberFormat + } + deriving ( Eq, Ord ) + +instance Show ListLevelStyle where + show ListLevelStyle{..} = "<LLS|" + ++ (show listLevelType) + ++ "|" + ++ (maybeToString listItemPrefix) + ++ (show listItemFormat) + ++ (maybeToString listItemSuffix) + ++ ">" + where maybeToString = fromMaybe "" + +data ListLevelType = LltBullet | LltImage | LltNumbered + deriving ( Eq, Ord, Show ) + +data ListItemNumberFormat = LinfNone + | LinfNumber + | LinfRomanLC | LinfRomanUC + | LinfAlphaLC | LinfAlphaUC + | LinfString String + deriving ( Eq, Ord ) + +instance Show ListItemNumberFormat where + show LinfNone = "" + show LinfNumber = "1" + show LinfRomanLC = "i" + show LinfRomanUC = "I" + show LinfAlphaLC = "a" + show LinfAlphaUC = "A" + show (LinfString s) = s + +instance Default ListItemNumberFormat where + def = LinfNone + +instance Read ListItemNumberFormat where + readsPrec _ "" = [(LinfNone , "")] + readsPrec _ "1" = [(LinfNumber , "")] + readsPrec _ "i" = [(LinfRomanLC , "")] + readsPrec _ "I" = [(LinfRomanUC , "")] + readsPrec _ "a" = [(LinfAlphaLC , "")] + readsPrec _ "A" = [(LinfAlphaUC , "")] + readsPrec _ s = [(LinfString s , "")] + +-------------------------------------------------------------------------------- +-- Readers +-- +-- ...it seems like a whole lot of this should be automatically deriveable +-- or at least moveable into a class. Most of this is data concealed in +-- code. +-------------------------------------------------------------------------------- + +-- +readAllStyles :: StyleReader _x Styles +readAllStyles = ( readFontPitches + >>?! ( readAutomaticStyles + &&& readStyles )) + >>?§? chooseMax + -- all top elements are always on the same hierarchy level + +-- +readStyles :: StyleReader _x Styles +readStyles = executeIn NsOffice "styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList ) + +-- +readAutomaticStyles :: StyleReader _x Styles +readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( returnV M.empty ) + +-- +readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties) +readDefaultStyle = lookupAttr NsStyle "family" + >>?! keepingTheValue readStyleProperties + +-- +readStyle :: StyleReader _x (StyleName,Style) +readStyle = findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA4 Style + ( lookupAttr' NsStyle "family" ) + ( findAttr' NsStyle "parent-style-name" ) + ( findAttr' NsStyle "list-style-name" ) + readStyleProperties + ) + +-- +readStyleProperties :: StyleReaderSafe _x StyleProperties +readStyleProperties = liftA2 SProps + ( readTextProperties >>> choiceToMaybe ) + ( readParaProperties >>> choiceToMaybe ) + +-- +readTextProperties :: StyleReader _x TextProperties +readTextProperties = + executeIn NsStyle "text-properties" $ liftAsSuccess + ( liftA6 PropT + ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) + ( searchAttr NsXSL_FO "font-weight" False isFontBold ) + ( findPitch ) + ( getAttr NsStyle "text-position" ) + ( readUnderlineMode ) + ( readStrikeThroughMode ) + ) + where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] + isFontBold = ("normal",False):("bold",True) + :(map ((,True).show) ([100,200..900]::[Int])) + +readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readUnderlineMode = readLineMode "text-underline-mode" + "text-underline-style" + +readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readStrikeThroughMode = readLineMode "text-line-through-mode" + "text-line-through-style" + +readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode modeAttr styleAttr = proc x -> do + isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x + mode <- lookupAttr' NsStyle modeAttr -< x + if isUL + then case mode of + Just m -> returnA -< Just m + Nothing -> returnA -< Just UnderlineModeNormal + else returnA -< Nothing + where + isLinePresent = [("none",False)] ++ map (,True) + [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" + , "long-dash" , "solid" , "wave" + ] + +-- +readParaProperties :: StyleReader _x ParaProperties +readParaProperties = + executeIn NsStyle "paragraph-properties" $ liftAsSuccess + ( liftA3 PropP + ( liftA2 readNumbering + ( isSet' NsText "number-lines" ) + ( readAttr' NsText "line-number" ) + ) + ( liftA2 readIndentation + ( isSetWithDefault NsStyle "auto-text-indent" False ) + ( getAttr NsXSL_FO "text-indent" ) + ) + ( getAttr NsXSL_FO "margin-left" ) + ) + where readNumbering (Just True) (Just n) = NumberingRestart n + readNumbering (Just True) _ = NumberingKeep + readNumbering _ _ = NumberingNone + + readIndentation False indent = indent + readIndentation True _ = def + +---- +-- List styles +---- + +-- +readListStyle :: StyleReader _x (StyleName, ListStyle) +readListStyle = + findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA ListStyle + $ ( liftA3 SM.union3 + ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) + ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) + ( readListLevelStyles NsText "list-level-style-image" LltImage ) + ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle + ) +-- +readListLevelStyles :: Namespace -> ElementName + -> ListLevelType + -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) +readListLevelStyles namespace elementName levelType = + ( tryAll namespace elementName (readListLevelStyle levelType) + >>^ SM.fromList + ) + +-- +readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) +readListLevelStyle levelType = readAttr NsText "level" + >>?! keepingTheValue + ( liftA4 toListLevelStyle + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ) + where + toListLevelStyle _ p s LinfNone = ListLevelStyle LltBullet p s LinfNone + toListLevelStyle _ p s f@(LinfString _) = ListLevelStyle LltBullet p s f + toListLevelStyle t p s f = ListLevelStyle t p s f + +-- +chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle +chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing + | otherwise = Just ( F.foldr1 select ls ) + where + select ( ListLevelStyle t1 p1 s1 f1 ) + ( ListLevelStyle t2 p2 s2 f2 ) + = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) + select' LltNumbered _ = LltNumbered + select' _ LltNumbered = LltNumbered + select' _ _ = LltBullet + selectLinf LinfNone f2 = f2 + selectLinf f1 LinfNone = f1 + selectLinf (LinfString _) f2 = f2 + selectLinf f1 (LinfString _) = f1 + selectLinf f1 _ = f1 + + +-------------------------------------------------------------------------------- +-- Tools to access style data +-------------------------------------------------------------------------------- + +-- +lookupStyle :: StyleName -> Styles -> Maybe Style +lookupStyle name Styles{..} = M.lookup name stylesByName + +-- +lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties +lookupDefaultStyle family Styles{..} = fromMaybe def + (M.lookup family defaultStyleMap) + +-- +lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties +lookupDefaultStyle' Styles{..} family = fromMaybe def + (M.lookup family defaultStyleMap) + +-- +getListStyle :: Style -> Styles -> Maybe ListStyle +getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) + +-- +lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle +lookupListStyleByName name Styles{..} = M.lookup name listStylesByName + + +-- | Returns a chain of parent of the current style. The direct parent will +-- be the first element of the list, followed by its parent and so on. +-- The current style is not in the list. +parents :: Style -> Styles -> [Style] +parents style styles = unfoldr findNextParent style -- Ha! + where findNextParent Style{..} + = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName + +-- | Looks up the style family of the current style. Normally, every style +-- should have one. But if not, all parents are searched. +getStyleFamily :: Style -> Styles -> Maybe StyleFamily +getStyleFamily style@Style{..} styles + = styleFamily + <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles) + +-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property +-- values are specified. Instead, a value might be inherited from a +-- parent style. This function makes this chain of inheritance +-- concrete and easily accessible by encapsulating the necessary lookups. +-- The resulting list contains the direct properties of the style as the first +-- element, the ones of the direct parent element as the next one, and so on. +-- +-- Note: There should also be default properties for each style family. These +-- are @not@ contained in this list because properties inherited from +-- parent elements take precedence over default styles. +-- +-- This function is primarily meant to be used through convenience wrappers. +-- +stylePropertyChain :: Style -> Styles -> [StyleProperties] +stylePropertyChain style styles + = map styleProperties (style : parents style styles) + +-- +extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] +extendedStylePropertyChain [] _ = [] +extendedStylePropertyChain [style] styles = (stylePropertyChain style styles) + ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) +extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) + ++ (extendedStylePropertyChain trace styles) +-- Optimizable with Data.Sequence + +-- +extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] +extendedStylePropertyChain' [] _ = Nothing +extendedStylePropertyChain' [style] styles = Just ( + (stylePropertyChain style styles) + ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) + ) +extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) + (extendedStylePropertyChain' trace styles) + +-- +stylePropertyChain' :: Styles -> Style -> [StyleProperties] +stylePropertyChain' = flip stylePropertyChain + +-- +getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] +getStylePropertyChain name styles = maybe [] + (`stylePropertyChain` styles) + (lookupStyle name styles) + +-- +getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] +getPropertyChain extract style styles = catMaybes + $ map extract + $ stylePropertyChain style styles + +-- +textPropertyChain :: Style -> Styles -> [TextProperties] +textPropertyChain = getPropertyChain textProperties + +-- +paraPropertyChain :: Style -> Styles -> [ParaProperties] +paraPropertyChain = getPropertyChain paraProperties + +-- +getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a +getTextProperty extract style styles = fmap extract + $ listToMaybe + $ textPropertyChain style styles + +-- +getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a +getTextProperty' extract style styles = F.asum + $ map extract + $ textPropertyChain style styles + +-- +getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a +getParaProperty extract style styles = fmap extract + $ listToMaybe + $ paraPropertyChain style styles + +-- | Lifts the reader into another readers' state. +liftStyles :: (OdtConverterState s -> OdtConverterState Styles) + -> (OdtConverterState Styles -> OdtConverterState s ) + -> XMLReader s x x +liftStyles extract inject = switchState extract inject + $ convertingExtraState M.empty readAllStyles + diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5c00a1b27..980f63504 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,7 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> +Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,12 +43,13 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) +import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks) +import Control.Monad.Reader (Reader, runReader, ask, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) @@ -56,20 +58,57 @@ import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) +import Text.Pandoc.Error + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") + -> Either PandocError Pandoc +readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") + +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } -type OrgParser = Parser [Char] OrgParserState +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState let meta = runF (orgStateMeta' st) st - return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st) + let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) + +-- | Drop COMMENT headers and the document tree below those headers. +dropCommentTrees :: [Block] -> [Block] +dropCommentTrees [] = [] +dropCommentTrees blks@(b:bs) = + maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b + +-- | Return the level of a header starting a comment or :noexport: tree and +-- Nothing otherwise. +commentHeaderLevel :: Block -> Maybe Int +commentHeaderLevel blk = + case blk of + (Header level _ ((Str "COMMENT"):_)) -> Just level + (Header level _ title) | hasNoExportTag title -> Just level + _ -> Nothing + where + hasNoExportTag :: [Inline] -> Bool + hasNoExportTag = any isNoExportTag + + isNoExportTag :: Inline -> Bool + isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True + isNoExportTag _ = False + +-- | Drop blocks until a header on or above the given level is seen +dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] +dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) + +isHeaderLevelLowerEq :: Int -> Block -> Bool +isHeaderLevelLowerEq n blk = + case blk of + (Header level _ _) -> n >= level + _ -> False -- -- Parser State for Org @@ -98,6 +137,9 @@ data OrgParserState = OrgParserState , orgStateNotes' :: OrgNoteTable } +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote + instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -111,6 +153,10 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + instance Default OrgParserState where def = defaultOrgParserState @@ -134,19 +180,6 @@ recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } -addBlockAttribute :: String -> String -> OrgParser () -addBlockAttribute key val = updateState $ \s -> - let attrs = orgStateBlockAttributes s - in s{ orgStateBlockAttributes = M.insert key val attrs } - -lookupBlockAttribute :: String -> OrgParser (Maybe String) -lookupBlockAttribute key = - M.lookup key . orgStateBlockAttributes <$> getState - -resetBlockAttributes :: OrgParser () -resetBlockAttributes = updateState $ \s -> - s{ orgStateBlockAttributes = orgStateBlockAttributes def } - updateLastForbiddenCharPos :: OrgParser () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} @@ -274,9 +307,18 @@ block = choice [ mempty <$ blanklines , paraOrPlain ] <?> "block" +-- +-- Block Attributes +-- + +-- | Parse optional block attributes (like #+TITLE or #+NAME) optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) optionalAttributes parser = try $ resetBlockAttributes *> parseBlockAttributes *> parser + where + resetBlockAttributes :: OrgParser () + resetBlockAttributes = updateState $ \s -> + s{ orgStateBlockAttributes = orgStateBlockAttributes def } parseBlockAttributes :: OrgParser () parseBlockAttributes = do @@ -301,6 +343,15 @@ lookupInlinesAttr attr = try $ do (fmap Just . parseFromString parseInlines) val +addBlockAttribute :: String -> String -> OrgParser () +addBlockAttribute key val = updateState $ \s -> + let attrs = orgStateBlockAttributes s + in s{ orgStateBlockAttributes = M.insert key val attrs } + +lookupBlockAttribute :: String -> OrgParser (Maybe String) +lookupBlockAttribute key = + M.lookup key . orgStateBlockAttributes <$> getState + -- -- Org Blocks (#+BEGIN_... / #+END_...) @@ -356,11 +407,11 @@ exportsResults :: [(String, String)] -> Bool exportsResults attrs = ("rundoc-exports", "results") `elem` attrs || ("rundoc-exports", "both") `elem` attrs -followingResultsBlock :: OrgParser (Maybe String) +followingResultsBlock :: OrgParser (Maybe (F Blocks)) followingResultsBlock = optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:" *> blankline - *> (unlines <$> many1 exampleLine)) + *> block) codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do @@ -375,7 +426,7 @@ codeBlock blkProp = do labelledBlck <- maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" - let resultBlck = pure $ maybe mempty (exampleCode) resultsContent + let resultBlck = fromMaybe mempty resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) where @@ -614,8 +665,25 @@ parseFormat = try $ do header :: OrgParser (F Blocks) header = try $ do level <- headerStart - title <- inlinesTillNewline - return $ B.header level <$> title + title <- manyTill inline (lookAhead headerEnd) + tags <- headerEnd + let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags + return $ B.header level <$> inlns + where + tagToInlineF :: String -> F Inlines + tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty + +headerEnd :: OrgParser [String] +headerEnd = option [] headerTags <* newline + +headerTags :: OrgParser [String] +headerTags = try $ + skipSpaces + *> char ':' + *> many1 tag + <* skipSpaces + where tag = many1 (alphaNum <|> oneOf "@%#_") + <* char ':' headerStart :: OrgParser Int headerStart = try $ @@ -828,12 +896,14 @@ list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem bulletListStart) +definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.definitionList . fmap compactify'DL . sequence + <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: OrgParser (F Blocks) -bulletList = fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem bulletListStart) +bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.bulletList . fmap compactify' . sequence + <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: OrgParser (F Blocks) orderedList = fmap B.orderedList . fmap compactify' . sequence @@ -845,10 +915,27 @@ genericListStart listMarker = try $ (+) <$> (length <$> many spaceChar) <*> (length <$> listMarker <* many1 spaceChar) --- parses bullet list start and returns its length (excl. following whitespace) +-- parses bullet list marker. maybe we know the indent level bulletListStart :: OrgParser Int -bulletListStart = genericListStart bulletListMarker - where bulletListMarker = pure <$> oneOf "*-+" +bulletListStart = bulletListStart' Nothing + +bulletListStart' :: Maybe Int -> OrgParser Int +-- returns length of bulletList prefix, inclusive of marker +bulletListStart' Nothing = do ind <- length <$> many spaceChar + when (ind == 0) $ notFollowedBy (char '*') + oneOf bullets + many1 spaceChar + return (ind + 1) + -- Unindented lists are legal, but they can't use '*' bullets + -- We return n to maintain compatibility with the generic listItem +bulletListStart' (Just n) = do count (n-1) spaceChar + when (n == 1) $ notFollowedBy (char '*') + oneOf bullets + many1 spaceChar + return n + +bullets :: String +bullets = "*+-" orderedListStart :: OrgParser Int orderedListStart = genericListStart orderedListMarker @@ -918,6 +1005,7 @@ inline = , subscript , superscript , inlineLaTeX + , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" @@ -927,7 +1015,7 @@ parseInlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" +specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" whitespace :: OrgParser (F Inlines) @@ -1054,13 +1142,13 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - srcF <- applyCustomLinkFormat =<< linkTarget + srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' return $ do src <- srcF - if isImageFilename src && isImageFilename title + if isImageFilename title then pure $ B.link src "" $ B.image title mempty mempty else linkToInlinesF src =<< title' @@ -1087,6 +1175,9 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") +possiblyEmptyLinkTarget :: OrgParser String +possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") + applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link @@ -1094,27 +1185,38 @@ applyCustomLinkFormat link = do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters return $ maybe link ($ drop 1 rest) formatter - +-- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind +-- of parsing. linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF s@('#':_) = pure . B.link s "" -linkToInlinesF s - | isImageFilename s = const . pure $ B.image s "" "" - | isUri s = pure . B.link s "" - | isRelativeUrl s = pure . B.link s "" -linkToInlinesF s = \title -> do - anchorB <- (s `elem`) <$> asksF orgStateAnchorIds - if anchorB - then pure $ B.link ('#':s) "" title - else pure $ B.emph title - -isRelativeUrl :: String -> Bool -isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) +linkToInlinesF s = + case s of + "" -> pure . B.link "" "" + ('#':_) -> pure . B.link s "" + _ | isImageFilename s -> const . pure $ B.image s "" "" + _ | isFileLink s -> pure . B.link (dropLinkType s) "" + _ | isUri s -> pure . B.link s "" + _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) "" + _ | isRelativeFilePath s -> pure . B.link s "" + _ -> internalLink s + +isFileLink :: String -> Bool +isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) + +dropLinkType :: String -> String +dropLinkType = tail . snd . break (== ':') + +isRelativeFilePath :: String -> Bool +isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) && + (':' `notElem` s) isUri :: String -> Bool isUri s = let (scheme, path) = break (== ':') s - in all (\c -> isAlphaNum c || c `elem` ".-") scheme + in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme && not (null path) +isAbsoluteFilePath :: String -> Bool +isAbsoluteFilePath = ('/' ==) . head + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -1124,6 +1226,13 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +internalLink :: String -> Inlines -> F Inlines +internalLink link title = do + anchorB <- (link `elem`) <$> asksF orgStateAnchorIds + if anchorB + then return $ B.link ('#':link) "" title + else return $ B.emph title + -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through -- @org-target-regexp@, which is fairly liberal. Since no link is created if @@ -1148,7 +1257,7 @@ solidify :: String -> String solidify = map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c - | c `elem` "_.-:" = c + | c `elem` ("_.-:" :: String) = c | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. @@ -1203,12 +1312,16 @@ displayMath :: OrgParser (F Inlines) displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] + +updatePositions :: Char + -> OrgParser (Char) +updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c + symbol :: OrgParser (F Inlines) symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - where updatePositions c - | c `elem` emphasisPreChars = c <$ updateLastPreCharPos - | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos - | otherwise = return c emphasisBetween :: Char -> OrgParser (F Inlines) @@ -1387,7 +1500,8 @@ simpleSubOrSuperString = try $ inlineLaTeX :: OrgParser (F Inlines) inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + maybe mzero returnF $ + parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs @@ -1395,6 +1509,11 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: String -> Maybe Inlines parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) + -- dropWhileEnd would be nice here, but it's not available before base 4.5 + where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1 + state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} @@ -1413,3 +1532,31 @@ inlineLaTeXCommand = try $ do count len anyChar return cs _ -> mzero + +smart :: OrgParser (F Inlines) +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice (map (return <$>) [orgApostrophe, dash, ellipses]) + where orgApostrophe = + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + *> return (B.str "\x2019") + +singleQuoted :: OrgParser (F Inlines) +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + fmap B.singleQuoted . trimInlinesF . mconcat <$> + many1Till inline singleQuoteEnd + +-- doubleQuoted will handle regular double-quoted sections, as well +-- as dialogues with an open double-quote without a close double-quote +-- in the same paragraph. +doubleQuoted :: OrgParser (F Inlines) +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return + (fmap B.doubleQuoted . trimInlinesF $ contents)) + <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e5eccb116..678eecc52 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( - readRST + readRST, + readRSTWithWarnings ) where import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero, mplus ) +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intersperse, intercalate, - transpose, sort, deleteFirstsBy, isSuffixOf ) + transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit) +import Data.Char (toLower, isHexDigit, isSpace) + +import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) +readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") + type RSTParser = Parser [Char] ParserState -- @@ -202,7 +209,7 @@ rawFieldListItem minIndent = try $ do fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - let term = B.str name + term <- parseInlineFromString name contents <- parseFromString parseBlocks raw optional blanklines return (term, [contents]) @@ -222,8 +229,7 @@ fieldList = try $ do lineBlock :: RSTParser Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM (parseFromString - (trimInlines . mconcat <$> many inline)) lines' + lines'' <- mapM parseInlineFromString lines' return $ B.para (mconcat $ intersperse B.linebreak lines'') -- @@ -335,6 +341,13 @@ indentedBlock = try $ do optional blanklines return $ unlines lns +quotedBlock :: Parser [Char] st [Char] +quotedBlock = try $ do + quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + lns <- many1 $ lookAhead (char quote) >> anyLine + optional blanklines + return $ unlines lns + codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline @@ -342,7 +355,8 @@ codeBlock :: Parser [Char] st Blocks codeBlock = try $ codeBlockStart >> codeBlockBody codeBlockBody :: Parser [Char] st Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock +codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> + (indentedBlock <|> quotedBlock) lhsCodeBlock :: RSTParser Blocks lhsCodeBlock = try $ do @@ -513,7 +527,6 @@ directive = try $ do -- TODO: line-block, parsed-literal, table, csv-table, list-table -- date -- include --- class -- title directive' :: RSTParser Blocks directive' = do @@ -535,39 +548,33 @@ directive' = do "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim top) + parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim $ unicodeTransform top) + parseInlineFromString (trim $ unicodeTransform top) "compound" -> parseFromString parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' - "rubric" -> B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning"] -> do let tit = B.para $ B.strong $ B.str label bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' return $ B.blockQuote $ tit <> bod "admonition" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) + tit <- B.para . B.strong <$> parseInlineFromString (trim top ++ if null subtit then "" else (": " ++ subtit)) bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -594,38 +601,69 @@ directive' = do Just t -> B.link (escapeURI $ trim t) "" $ B.image src "" alt Nothing -> B.image src "" alt - _ -> return mempty + "class" -> do + let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + -- directive content or the first immediately following element + children <- case body of + "" -> block + _ -> parseFromString parseBlocks body' + return $ B.divWith attrs children + other -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown directive: " ++ other + return mempty -- TODO: -- - Silently ignores illegal fields --- - Silently drops classes -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - baseRole <- case M.lookup parentRole customRoles of - Just (base, _, _) -> return base - Nothing -> return parentRole - - let fmt = if baseRole == "raw" then lookup "format" fields else Nothing - annotate = maybe id addLanguage $ - if baseRole == "code" + let (baseRole, baseFmt, baseAttr) = + maybe (parentRole, Nothing, nullAttr) id $ + M.lookup parentRole customRoles + fmt = if parentRole == "raw" then lookup "format" fields else baseFmt + annotate :: [String] -> [String] + annotate = maybe id (:) $ + if parentRole == "code" then lookup "language" fields else Nothing + attr = let (ident, classes, keyValues) = baseAttr + -- nub in case role name & language class are the same + in (ident, nub . (role :) . annotate $ classes, keyValues) + + -- warn about syntax we ignore + flip mapM_ fields $ \(key, _) -> case key of + "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "ignoring :language: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :code:" + "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + "ignoring :format: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :raw:" + _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ + ": in definition of role :" ++ role ++ ": in" + when (parentRole == "raw" && countKeys "format" > 1) $ + addWarning Nothing $ + "ignoring :format: fields after the first in the definition of role :" + ++ role ++": in" + when (parentRole == "code" && countKeys "language" > 1) $ + addWarning Nothing $ + "ignoring :language: fields after the first in the definition of role :" + ++ role ++": in" updateState $ \s -> s { stateRstCustomRoles = - M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + M.insert role (baseRole, fmt, attr) customRoles } return $ B.singleton Null where - addLanguage lang (ident, classes, keyValues) = - (ident, "sourceCode" : lang : classes, keyValues) + countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + -- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u @@ -666,7 +704,7 @@ extractCaption = do toChunks :: String -> [String] toChunks = dropWhile null . map (trim . unlines) - . splitBy (all (`elem` " \t")) . lines + . splitBy (all (`elem` (" \t" :: String))) . lines codeblock :: Maybe String -> String -> String -> RSTParser Blocks codeblock numberLines lang body = @@ -734,7 +772,7 @@ simpleReferenceName' :: Parser [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) + <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) simpleReferenceName :: Parser [Char] st Inlines @@ -917,6 +955,9 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" +parseInlineFromString :: String -> RSTParser Inlines +parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) + hyphens :: RSTParser Inlines hyphens = do result <- many1 (char '-') @@ -985,21 +1026,23 @@ renderRole contents fmt role attr = case role of "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents "PEP" -> return $ pepLink contents - "literal" -> return $ B.str contents + "literal" -> return $ B.codeWith attr contents "math" -> return $ B.math contents "title-reference" -> titleRef contents "title" -> titleRef contents "t" -> titleRef contents - "code" -> return $ B.codeWith attr contents + "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents + "span" -> return $ B.spanWith attr $ B.str contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do - customRole <- stateRstCustomRoles <$> getState - case M.lookup custom customRole of - Just (_, newFmt, inherit) -> let - fmtStr = fmt `mplus` newFmt - (newRole, newAttr) = inherit attr - in renderRole contents fmtStr newRole newAttr - Nothing -> return $ B.str contents -- Undefined role + customRoles <- stateRstCustomRoles <$> getState + case M.lookup custom customRoles of + Just (newRole, newFmt, newAttr) -> + renderRole contents newFmt newRole newAttr + Nothing -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" + return $ B.str contents -- Undefined role where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) @@ -1008,11 +1051,14 @@ renderRole contents fmt role attr = case role of where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" -roleNameEndingIn :: RSTParser Char -> RSTParser String -roleNameEndingIn end = many1Till (letter <|> char '-') end +addClass :: String -> Attr -> Attr +addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) + +roleName :: RSTParser String +roleName = many1 (letter <|> char '-') roleMarker :: RSTParser String -roleMarker = char ':' *> roleNameEndingIn (char ':') +roleMarker = char ':' *> roleName <* char ':' roleBefore :: RSTParser (String,String) roleBefore = try $ do diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs new file mode 100644 index 000000000..07b414431 --- /dev/null +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -0,0 +1,527 @@ +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 +{- + Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.TWiki + Copyright : Copyright (C) 2014 Alexander Sulfrian + License : GNU GPL, version 2 or above + + Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + Stability : alpha + Portability : portable + +Conversion of twiki text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.TWiki ( readTWiki + , readTWikiWithWarnings + ) where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) +import Data.Monoid (Monoid, mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad +import Text.Printf (printf) +import Debug.Trace (trace) +import Text.Pandoc.XML (fromEntities) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Data.Char (isAlphaNum) +import qualified Data.Foldable as F +import Text.Pandoc.Error + +-- | Read twiki from an input string and return a Pandoc document. +readTWiki :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Either PandocError Pandoc +readTWiki opts s = + (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") + +readTWikiWithWarnings :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Either PandocError (Pandoc, [String]) +readTWikiWithWarnings opts s = + (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") + where parseTWikiWithWarnings = do + doc <- parseTWiki + warnings <- stateWarnings <$> getState + return (doc, warnings) + +type TWParser = Parser [Char] ParserState + +-- +-- utility functions +-- + +tryMsg :: String -> TWParser a -> TWParser a +tryMsg msg p = try p <?> msg + +skip :: TWParser a -> TWParser () +skip parser = parser >> return () + +nested :: TWParser a -> TWParser a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: String -> TWParser (Attr, String) +htmlElement tag = tryMsg tag $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = skip $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: String -> TWParser a -> TWParser [a] +parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd + +-- +-- main parser +-- + +parseTWiki :: TWParser Pandoc +parseTWiki = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + + +-- +-- block parsers +-- + +block :: TWParser B.Blocks +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res + +blockElements :: TWParser B.Blocks +blockElements = choice [ separator + , header + , verbatim + , literal + , list "" + , table + , blockQuote + , noautolink + ] + +separator :: TWParser B.Blocks +separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule + +header :: TWParser B.Blocks +header = tryMsg "header" $ do + string "---" + level <- many1 (char '+') >>= return . length + guard $ level <= 6 + classes <- option [] $ string "!!" >> return ["unnumbered"] + skipSpaces + content <- B.trimInlines . mconcat <$> manyTill inline newline + attr <- registerHeader ("", classes, []) content + return $ B.headerWith attr level $ content + +verbatim :: TWParser B.Blocks +verbatim = (htmlElement "verbatim" <|> htmlElement "pre") + >>= return . (uncurry B.codeBlockWith) + +literal :: TWParser B.Blocks +literal = htmlElement "literal" >>= return . rawBlock + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +list :: String -> TWParser B.Blocks +list prefix = choice [ bulletList prefix + , orderedList prefix + , definitionList prefix] + +definitionList :: String -> TWParser B.Blocks +definitionList prefix = tryMsg "definitionList" $ do + indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " + elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + return $ B.definitionList elements + where + parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks]) + parseDefinitionListItem indent = do + string (indent ++ "$ ") >> skipSpaces + term <- many1Till inline $ string ": " + line <- listItemLine indent $ string "$ " + return $ (mconcat term, [line]) + +bulletList :: String -> TWParser B.Blocks +bulletList prefix = tryMsg "bulletList" $ + parseList prefix (char '*') (char ' ') + +orderedList :: String -> TWParser B.Blocks +orderedList prefix = tryMsg "orderedList" $ + parseList prefix (oneOf "1iIaA") (string ". ") + +parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks +parseList prefix marker delim = do + (indent, style) <- lookAhead $ string prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + return $ case style of + '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks + 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks + 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks + 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks + 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks + _ -> B.bulletList blocks + where + listStyle = do + indent <- many1 $ string " " + style <- marker + return (concat indent, style) + +parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks +parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + +listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks +listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat + where + lineContent = do + content <- anyLine + continuation <- optionMaybe listContinuation + return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) + filterSpaces = reverse . dropWhile (== ' ') . reverse + listContinuation = notFollowedBy (string prefix >> marker) >> + string " " >> lineContent + parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= + return . B.plain . mconcat + nestedList = list prefix + lastNewline = try $ char '\n' <* eof + newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList + +table :: TWParser B.Blocks +table = try $ do + tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip + rows <- many1 tableParseRow + return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead + where + buildTable caption rows (aligns, heads) + = B.table caption aligns heads rows + align rows = replicate (columCount rows) (AlignDefault, 0) + columns rows = replicate (columCount rows) mempty + columCount rows = length $ head rows + +tableParseHeader :: TWParser ((Alignment, Double), B.Blocks) +tableParseHeader = try $ do + char '|' + leftSpaces <- many spaceChar >>= return . length + char '*' + content <- tableColumnContent (char '*' >> skipSpaces >> char '|') + char '*' + rightSpaces <- many spaceChar >>= return . length + optional tableEndOfRow + return (tableAlign leftSpaces rightSpaces, content) + where + tableAlign left right + | left >= 2 && left == right = (AlignCenter, 0) + | left > right = (AlignRight, 0) + | otherwise = (AlignLeft, 0) + +tableParseRow :: TWParser [B.Blocks] +tableParseRow = many1Till tableParseColumn newline + +tableParseColumn :: TWParser B.Blocks +tableParseColumn = char '|' *> skipSpaces *> + tableColumnContent (skipSpaces >> char '|') + <* skipSpaces <* optional tableEndOfRow + +tableEndOfRow :: TWParser Char +tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' + +tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks +tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat + where + content = continuation <|> inline + continuation = try $ char '\\' >> newline >> return mempty + +blockQuote :: TWParser B.Blocks +blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat + +noautolink :: TWParser B.Blocks +noautolink = do + (_, content) <- htmlElement "noautolink" + st <- getState + setState $ st{ stateAllowLinks = False } + blocks <- try $ parseContent content + setState $ st{ stateAllowLinks = True } + return $ mconcat blocks + where + parseContent = parseFromString $ many $ block + +para :: TWParser B.Blocks +para = many1Till inline endOfParaElement >>= return . result . mconcat + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> skip blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + + +-- +-- inline parsers +-- + +inline :: TWParser B.Inlines +inline = choice [ whitespace + , br + , macro + , strong + , strongHtml + , strongAndEmph + , emph + , emphHtml + , boldCode + , smart + , link + , htmlComment + , code + , codeHtml + , nop + , autoLink + , str + , symbol + ] <?> "inline" + +whitespace :: TWParser B.Inlines +whitespace = (lb <|> regsp) >>= return + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: TWParser B.Inlines +br = try $ string "%BR%" >> return B.linebreak + +linebreak :: TWParser B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b +enclosed sep p = between sep (try $ sep <* endMarker) p + where + endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof + endSpace = (spaceChar <|> newline) >> return B.space + +macro :: TWParser B.Inlines +macro = macroWithParameters <|> withoutParameters + where + withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan + emptySpan name = buildSpan name [] mempty + +macroWithParameters :: TWParser B.Inlines +macroWithParameters = try $ do + char '%' + name <- macroName + (content, kvs) <- attributes + char '%' + return $ buildSpan name kvs $ B.str content + +buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan className kvs = B.spanWith attrs + where + attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) + additionalClasses = maybe [] words $ lookup "class" kvs + kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] + +macroName :: TWParser String +macroName = do + first <- letter + rest <- many $ alphaNum <|> char '_' + return (first:rest) + +attributes :: TWParser (String, [(String, String)]) +attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= + return . foldr (either mkContent mkKvs) ([], []) + where + spnl = skipMany (spaceChar <|> newline) + mkContent c ([], kvs) = (c, kvs) + mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkKvs kv (cont, rest) = (cont, (kv : rest)) + +attribute :: TWParser (Either String (String, String)) +attribute = withKey <|> withoutKey + where + withKey = try $ do + key <- macroName + char '=' + parseValue False >>= return . (curry Right key) + withoutKey = try $ parseValue True >>= return . Left + parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities + withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withoutQuotes allowSpaces + | allowSpaces == True = many1 $ noneOf "}" + | otherwise = many1 $ noneOf " }" + +nestedInlines :: Show a => TWParser a -> TWParser B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* (notFollowedBy end) + nestedInline = notFollowedBy whitespace >> nested inline + +strong :: TWParser B.Inlines +strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong + +strongHtml :: TWParser B.Inlines +strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) + >>= return . B.strong . mconcat + +strongAndEmph :: TWParser B.Inlines +strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong + +emph :: TWParser B.Inlines +emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph + +emphHtml :: TWParser B.Inlines +emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) + >>= return . B.emph . mconcat + +nestedString :: Show a => TWParser a -> TWParser String +nestedString end = innerSpace <|> (count 1 nonspaceChar) + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +boldCode :: TWParser B.Inlines +boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities + +htmlComment :: TWParser B.Inlines +htmlComment = htmlTag isCommentTag >> return mempty + +code :: TWParser B.Inlines +code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities + +codeHtml :: TWParser B.Inlines +codeHtml = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ B.codeWith attrs $ fromEntities content + +autoLink :: TWParser B.Inlines +autoLink = try $ do + state <- getState + guard $ stateAllowLinks state + (text, url) <- parseLink + guard $ checkLink (head $ reverse url) + return $ makeLink (text, url) + where + parseLink = notFollowedBy nop >> (uri <|> emailAddress) + makeLink (text, url) = B.link url "" $ B.str text + checkLink c + | c == '/' = True + | otherwise = isAlphaNum c + +str :: TWParser B.Inlines +str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str + +nop :: TWParser B.Inlines +nop = try $ (skip exclamation <|> skip nopTag) >> followContent + where + exclamation = char '!' + nopTag = stringAnyCase "<nop>" + followContent = many1 nonspaceChar >>= return . B.str . fromEntities + +symbol :: TWParser B.Inlines +symbol = count 1 nonspaceChar >>= return . B.str + +smart :: TWParser B.Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [ apostrophe + , dash + , ellipses + ] + +singleQuoted :: TWParser B.Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + many1Till inline singleQuoteEnd >>= + (return . B.singleQuoted . B.trimInlines . mconcat) + +doubleQuoted :: TWParser B.Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> + return (B.doubleQuoted $ B.trimInlines contents)) + <|> (return $ (B.str "\8220") B.<> contents) + +link :: TWParser B.Inlines +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ B.link url title content + +linkText :: TWParser (String, String, B.Inlines) +linkText = do + string "[[" + url <- many1Till anyChar (char ']') + content <- option [B.str url] linkContent + char ']' + return (url, "", mconcat content) + where + linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent + parseLinkContent = parseFromString $ many1 inline diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 3fee3051e..e5778b123 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007-2014 John MacFarlane + Copyright : Copyright (C) 2007-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ee64e8f2a..ec1da896d 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2014 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' +Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' and John MacFarlane This program is free software; you can redistribute it and/or modify @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2014 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> @@ -57,6 +57,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag ) +import Text.Pandoc.Shared (trim) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match @@ -67,11 +68,12 @@ import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid import Debug.Trace (trace) +import Text.Pandoc.Error -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTextile opts s = (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") @@ -325,33 +327,30 @@ para = B.para . trimInlines . mconcat <$> many1 inline -- Tables -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState Blocks -tableCell = do - c <- many1 (noneOf "|\n") - content <- trimInlines . mconcat <$> parseFromString (many1 inline) c +tableCell :: Bool -> Parser [Char] ParserState Blocks +tableCell headerCell = try $ do + char '|' + when headerCell $ () <$ string "_." + notFollowedBy blankline + raw <- trim <$> + many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) + content <- mconcat <$> parseFromString (many inline) raw return $ B.plain content -- | A table row is made of many table cells tableRow :: Parser [Char] ParserState [Blocks] -tableRow = try $ ( char '|' *> - (endBy1 tableCell (optional blankline *> char '|')) <* newline) - --- | Many table rows -tableRows :: Parser [Char] ParserState [[Blocks]] -tableRows = many1 tableRow +tableRow = many1 (tableCell False) <* char '|' <* newline --- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: Parser [Char] ParserState [Blocks] -tableHeaders = let separator = (try $ string "|_.") in - try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) +tableHeader :: Parser [Char] ParserState [Blocks] +tableHeader = many1 (tableCell True) <* char '|' <* newline -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. table :: Parser [Char] ParserState Blocks table = try $ do - headers <- option mempty tableHeaders - rows <- tableRows + headers <- option mempty $ tableHeader + rows <- many1 tableRow blanklines let nbOfCols = max (length headers) (length $ head rows) return $ B.table mempty @@ -607,8 +606,8 @@ langAttr = do -- | Parses material surrounded by a parser. surrounded :: Parser [Char] st t -- ^ surrounding parser - -> Parser [Char] st a -- ^ content parser (to be used repeatedly) - -> Parser [Char] st [a] + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 6f8c19ac7..304d6d4c5 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -48,11 +48,12 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) +import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) import Data.Time.Format (formatTime) -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import System.IO.Error (catchIOError) type T2T = ParserT String ParserState (Reader T2TMeta) @@ -83,12 +84,12 @@ getT2TMeta inps out = do return $ T2TMeta curDate curMtime (intercalate ", " inps) out -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc readTxt2TagsNoMacros = readTxt2Tags def parseT2T :: T2T Pandoc @@ -576,4 +577,3 @@ atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) ignoreSpacesCap :: T2T String -> T2T String ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) - diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 5b8f7a75a..a77127286 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2014 John MacFarlane + Copyright : Copyright (C) 2011-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -40,15 +40,30 @@ import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', err, fetchItem') +import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.UTF8 (toString, fromString) +import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Options (WriterOptions(..)) +import Data.List (isPrefixOf) +import Control.Applicative +import Text.Parsec (runParserT, ParsecT) +import qualified Text.Parsec as P +import Control.Monad.Trans (lift) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c +makeDataURI :: String -> ByteString -> String +makeDataURI mime raw = + if textual + then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) + else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) + where textual = "text/" `Data.List.isPrefixOf` mime + mime' = if textual && ';' `notElem` mime + then mime ++ ";charset=utf-8" + else mime -- mime type already has charset + convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) convertTag media sourceURL t@(TagOpen tagname as) | tagname `elem` @@ -58,51 +73,75 @@ convertTag media sourceURL t@(TagOpen tagname as) where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y - let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) y return (x, enc) else return (x,y) convertTag media sourceURL t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) src return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) convertTag media sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) src return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) convertTag _ _ t = return t --- NOTE: This is really crude, it doesn't respect CSS comments. cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString -> IO ByteString -cssURLs media sourceURL d orig = - case B.breakSubstring "url(" orig of - (x,y) | B.null y -> return orig - | otherwise -> do - let (u,v) = B.breakSubstring ")" $ B.drop 4 y - let url = toString - $ case B.take 1 u of - "\"" -> B.takeWhile (/='"') $ B.drop 1 u - "'" -> B.takeWhile (/='\'') $ B.drop 1 u - _ -> u - let url' = if isURI url - then url - else d </> url - (raw, mime) <- getRaw media sourceURL "" url' - rest <- cssURLs media sourceURL d v - let enc = "data:" `B.append` fromString mime `B.append` - ";base64," `B.append` (encode raw) - return $ x `B.append` "url(" `B.append` enc `B.append` rest - -getRaw :: MediaBag -> Maybe String -> MimeType -> String - -> IO (ByteString, MimeType) -getRaw media sourceURL mimetype src = do +cssURLs media sourceURL d orig = do + res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig + case res of + Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig + Right bs -> return bs + +parseCSSUrls :: MediaBag -> Maybe String -> FilePath + -> ParsecT ByteString () IO ByteString +parseCSSUrls media sourceURL d = B.concat <$> P.many + (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) + +-- Note: some whitespace in CSS is significant, so we can't collapse it! +pCSSWhite :: ParsecT ByteString () IO ByteString +pCSSWhite = B.singleton <$> P.space <* P.spaces + +pCSSComment :: ParsecT ByteString () IO ByteString +pCSSComment = P.try $ do + P.string "/*" + P.manyTill P.anyChar (P.try (P.string "*/")) + return B.empty + +pCSSOther :: ParsecT ByteString () IO ByteString +pCSSOther = do + (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|> + (B.singleton <$> P.char 'u') <|> + (B.singleton <$> P.char '/') + +pCSSUrl :: MediaBag -> Maybe String -> FilePath + -> ParsecT ByteString () IO ByteString +pCSSUrl media sourceURL d = P.try $ do + P.string "url(" + P.spaces + quote <- P.option Nothing (Just <$> P.oneOf "\"'") + url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) + P.spaces + P.char ')' + let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ + maybe "" (:[]) quote ++ ")") + case trim url of + '#':_ -> return fallback + 'd':'a':'t':'a':':':_ -> return fallback + u -> do let url' = if isURI u then u else d </> u + enc <- lift $ getDataURI media sourceURL "" url' + return (B.pack enc) + + +getDataURI :: MediaBag -> Maybe String -> MimeType -> String + -> IO String +getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri +getDataURI media sourceURL mimetype src = do let ext = map toLower $ takeExtension src fetchResult <- fetchItem' media sourceURL src (raw, respMime) <- case fetchResult of @@ -128,7 +167,7 @@ getRaw media sourceURL mimetype src = do result <- if mime == "text/css" then cssURLs media cssSourceURL (takeDirectory src) raw' else return raw' - return (result, mime) + return $ makeDataURI mime result -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9aa70e6f2..c09c2f2a0 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -2,7 +2,7 @@ FlexibleContexts, ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -76,6 +76,8 @@ module Text.Pandoc.Shared ( renderTags', -- * File handling inDirectory, + getDefaultReferenceDocx, + getDefaultReferenceODT, readDataFile, readDataFileUTF8, fetchItem, @@ -85,6 +87,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + mapLeft, + hush, -- * Safe read safeRead, -- * Temp directory @@ -113,10 +117,12 @@ import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Applicative ((<$>)) +import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Data.Time +import Data.Time.Clock.POSIX import System.IO (stderr) import System.IO.Temp import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), @@ -127,7 +133,8 @@ import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Text as T (toUpper, pack, unpack) -import Data.ByteString.Lazy (toChunks) +import Data.ByteString.Lazy (toChunks, fromChunks) +import qualified Data.ByteString.Lazy as BL #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -135,14 +142,20 @@ import Text.Pandoc.Data (dataFiles) import Paths_pandoc (getDataFileName) #endif #ifdef HTTP_CLIENT -import Network.HTTP.Client (httpLbs, parseUrl, withManager, +import Network.HTTP.Client (httpLbs, parseUrl, responseBody, responseHeaders, Request(port,host)) +#if MIN_VERSION_http_client(0,4,18) +import Network.HTTP.Client (newManager) +#else +import Network.HTTP.Client (withManager) +#endif import Network.HTTP.Client.Internal (addProxy) import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) +import Codec.Archive.Zip #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, @@ -654,27 +667,32 @@ hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do sectionContents' <- hierarchicalizeWithIds sectionContents rest' <- hierarchicalizeWithIds rest return $ Sec level newnum attr title' sectionContents' : rest' +hierarchicalizeWithIds ((Div ("",["references"],[]) + (Header level (ident,classes,kvs) title' : xs)):ys) = + hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) + title') : (xs ++ ys)) hierarchicalizeWithIds (x:rest) = do rest' <- hierarchicalizeWithIds rest return $ (Blk x) : rest' headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _ _) = l <= level +headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> [String] -> String -uniqueIdent title' usedIdents = - let baseIdent = case inlineListToIdentifier title' of +uniqueIdent title' usedIdents + = let baseIdent = case inlineListToIdentifier title' of "" -> "section" x -> x - numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `elem` usedIdents - then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + numIdent n = baseIdent ++ "-" ++ show n + in if baseIdent `elem` usedIdents + then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent + else baseIdent -- | True if block is a Header block. isHeaderBlock :: Block -> Bool @@ -740,7 +758,73 @@ inDirectory path action = E.bracket setCurrentDirectory (const $ setCurrentDirectory path >> action) +getDefaultReferenceDocx :: Maybe FilePath -> IO Archive +getDefaultReferenceDocx datadir = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = fromChunks . (:[]) + let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> + getCurrentTime + contents <- toLazy <$> readDataFile datadir + ("docx/" ++ path) + return $ toEntry path epochtime contents + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- doesFileExist (d </> "reference.docx") + if exists + then return (Just (d </> "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> BL.readFile arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +getDefaultReferenceODT :: Maybe FilePath -> IO Archive +getDefaultReferenceODT datadir = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (fromChunks . (:[])) `fmap` + readDataFile datadir ("odt/" ++ path) + return $ toEntry path epochtime contents + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- doesFileExist (d </> "reference.odt") + if exists + then return (Just (d </> "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> BL.readFile arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + + readDefaultDataFile :: FilePath -> IO BS.ByteString +readDefaultDataFile "reference.docx" = + (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing +readDefaultDataFile "reference.odt" = + (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of @@ -752,14 +836,17 @@ readDefaultDataFile fname = go (_:as) ".." = as go as x = x : as #else - getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile - where checkExistence fn = do - exists <- doesFileExist fn - if exists - then return fn - else err 97 ("Could not find data file " ++ fname) + getDataFileName fname' >>= checkExistence >>= BS.readFile + where fname' = if fname == "README" then fname else "data" </> fname #endif +checkExistence :: FilePath -> IO FilePath +checkExistence fn = do + exists <- doesFileExist fn + if exists + then return fn + else err 97 ("Could not find data file " ++ fn) + -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString @@ -794,6 +881,7 @@ fetchItem sourceURL s = fp = unEscapeString $ dropFragmentAndQuery s mime = case takeExtension fp of ".gz" -> getMimeType $ dropExtension fp + ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" x -> getMimeType x ensureEscaped x@(_:':':'\\':_) = x -- likely windows path ensureEscaped x = escapeURIString isAllowedInURI x @@ -822,7 +910,11 @@ openURL u Right pr -> case parseUrl pr of Just r -> addProxy (host r) (port r) req Nothing -> req +#if MIN_VERSION_http_client(0,4,18) + resp <- newManager tlsManagerSettings >>= httpLbs req' +#else resp <- withManager tlsManagerSettings $ httpLbs req' +#endif return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else @@ -855,6 +947,14 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" @@ -883,11 +983,11 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories -- Safe read -- -safeRead :: (Monad m, Read a) => String -> m a +safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + _ -> mzero -- -- Temp directory diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 2b863c780..878c900f7 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 4ae6a6d8a..b3243d752 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,7 +1,7 @@ {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP, OverloadedStrings, GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2009-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2009-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2014 John MacFarlane + Copyright : Copyright (C) 2009-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -124,11 +124,12 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> String -- ^ Name of writer -> IO (Either E.IOException String) getDefaultTemplate user writer = do - let format = takeWhile (`notElem` "+-") writer -- strip off extensions + let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of "native" -> return $ Right "" "json" -> return $ Right "" "docx" -> return $ Right "" + "fb2" -> return $ Right "" "odt" -> getDefaultTemplate user "opendocument" "markdown_strict" -> getDefaultTemplate user "markdown" "multimarkdown" -> getDefaultTemplate user "markdown" @@ -287,7 +288,7 @@ reservedWords :: [Text] reservedWords = ["else","endif","for","endfor","sep"] skipEndline :: Parser () -skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return () +skipEndline = P.try $ P.skipMany (P.satisfy (`elem` (" \t" :: String))) >> P.char '\n' >> return () pConditional :: Parser Template pConditional = do diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 543f39ab0..de3314a0d 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -93,10 +93,16 @@ dropBOM :: String -> String dropBOM ('\xFEFF':xs) = xs dropBOM xs = xs +filterCRs :: String -> String +filterCRs ('\r':'\n':xs) = '\n': filterCRs xs +filterCRs ('\r':xs) = '\n' : filterCRs xs +filterCRs (x:xs) = x : filterCRs xs +filterCRs [] = [] + -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String -toString = filter (/='\r') . dropBOM . T.unpack . T.decodeUtf8 +toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 fromString :: String -> B.ByteString fromString = T.encodeUtf8 . T.pack @@ -104,7 +110,7 @@ fromString = T.encodeUtf8 . T.pack -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = filter (/='\r') . dropBOM . TL.unpack . TL.decodeUtf8 +toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 fromStringLazy :: String -> BL.ByteString fromStringLazy = TL.encodeUtf8 . TL.pack diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index eebfe09d2..463be044c 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5b8c5167..bac28e54f 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines - return $ contents <> cr + return $ contents <> blankline blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do @@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } let marker = text (replicate lev '*') - return $ marker <> space <> contents <> cr + return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. orderedListItemToAsciiDoc :: WriterOptions -- ^ options @@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> space <> contents <> cr + return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. definitionListItemToAsciiDoc :: WriterOptions diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..fee36d454 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered + where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') + (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + metadata = runIdentity $ metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + context = defField "body" main $ metadata + rendered = if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node DOCUMENT (blocksToNodes bs) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) + | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) + | otherwise = id +blockToNodes (BlockQuote bs) = + (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = + (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = + (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = + (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] + : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) + | otherwise = id +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index ebdc4a3d3..1f8bbcdba 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2014 John MacFarlane + Copyright : Copyright (C) 2007-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -119,7 +119,7 @@ stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) toLabel :: String -> String toLabel z = concatMap go z where go x - | elem x "\\#[]\",{}%()|=" = "ux" ++ printf "%x" (ord x) + | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) | otherwise = [x] -- | Convert Elements to ConTeXt @@ -151,7 +151,13 @@ blockToConTeXt (CodeBlock _ str) = -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty -blockToConTeXt (Div _ bs) = blockListToConTeXt bs +blockToConTeXt (Div (ident,_,_) bs) = do + contents <- blockListToConTeXt bs + if null ident + then return contents + else return $ + ("\\reference" <> brackets (text $ toLabel ident) <> braces empty <> + "%") $$ contents blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -296,13 +302,8 @@ inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions contents <- inlineListToConTeXt txt let ref' = toLabel $ stringToConTeXt opts ref - return $ text "\\in" - <> braces (if writerNumberSections opts - then contents <+> text "(\\S" - else contents) -- prefix - <> braces (if writerNumberSections opts - then text ")" - else empty) -- suffix + return $ text "\\goto" + <> braces contents <> brackets (text ref') inlineToConTeXt (Link txt (src, _)) = do diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 914d61850..3a9c1954a 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,7 +1,11 @@ -{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings, - ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -{- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> +{-# LANGUAGE FlexibleInstances, OverloadedStrings, + ScopedTypeVariables, DeriveDataTypeable, CPP #-} +#if MIN_VERSION_base(4,8,0) +#else +{-# LANGUAGE OverlappingInstances #-} +#endif +{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,40 +39,41 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) import Data.Char ( toLower ) +import Data.Typeable import Scripting.Lua (LuaState, StackValue, callfunc) import Text.Pandoc.Writers.Shared import qualified Scripting.Lua as Lua -import Text.Pandoc.UTF8 (fromString, toString) -import Data.ByteString (ByteString) -import qualified Data.ByteString.Char8 as C8 +import qualified Text.Pandoc.UTF8 as UTF8 import Data.Monoid +import Control.Monad (when) +import Control.Exception import qualified Data.Map as M import Text.Pandoc.Templates +import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8) -attrToMap :: Attr -> M.Map ByteString ByteString +attrToMap :: Attr -> M.Map String String attrToMap (id',classes,keyvals) = M.fromList - $ ("id", fromString id') - : ("class", fromString $ unwords classes) - : map (\(x,y) -> (fromString x, fromString y)) keyvals - -getList :: StackValue a => LuaState -> Int -> IO [a] -getList lua i' = do - continue <- Lua.next lua i' - if continue - then do - next <- Lua.peek lua (-1) - Lua.pop lua 1 - x <- maybe (fail "peek returned Nothing") return next - rest <- getList lua i' - return (x : rest) - else return [] - -instance StackValue ByteString where - push l x = Lua.push l $ C8.unpack x - peek l n = (fmap . fmap) C8.pack (Lua.peek l n) - valuetype _ = Lua.TSTRING - + $ ("id", id') + : ("class", unwords classes) + : keyvals + +#if MIN_VERSION_hslua(0,4,0) +#if MIN_VERSION_base(4,8,0) +instance {-# OVERLAPS #-} StackValue [Char] where +#else +instance StackValue [Char] where +#endif + push lua cs = Lua.push lua (UTF8.fromString cs) + peek lua i = do + res <- Lua.peek lua i + return $ UTF8.toString `fmap` res + valuetype _ = Lua.TSTRING +#else +#if MIN_VERSION_base(4,8,0) +instance {-# OVERLAPS #-} StackValue a => StackValue [a] where +#else instance StackValue a => StackValue [a] where +#endif push lua xs = do Lua.createtable lua (length xs + 1) 0 let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i @@ -82,6 +87,19 @@ instance StackValue a => StackValue [a] where return (Just lst) valuetype _ = Lua.TTABLE +getList :: StackValue a => LuaState -> Int -> IO [a] +getList lua i' = do + continue <- Lua.next lua i' + if continue + then do + next <- Lua.peek lua (-1) + Lua.pop lua 1 + x <- maybe (fail "peek returned Nothing") return next + rest <- getList lua i' + return (x : rest) + else return [] +#endif + instance StackValue Format where push lua (Format f) = Lua.push lua (map toLower f) peek l n = fmap Format `fmap` Lua.peek l n @@ -106,13 +124,21 @@ instance (StackValue a, StackValue b) => StackValue (a,b) where peek _ _ = undefined -- not needed for our purposes valuetype _ = Lua.TTABLE +#if MIN_VERSION_base(4,8,0) +instance {-# OVERLAPS #-} StackValue [Inline] where +#else instance StackValue [Inline] where - push l ils = Lua.push l . C8.unpack =<< inlineListToCustom l ils +#endif + push l ils = Lua.push l =<< inlineListToCustom l ils peek _ _ = undefined valuetype _ = Lua.TSTRING +#if MIN_VERSION_base(4,8,0) +instance {-# OVERLAPS #-} StackValue [Block] where +#else instance StackValue [Block] where - push l ils = Lua.push l . C8.unpack =<< blockListToCustom l ils +#endif + push l ils = Lua.push l =<< blockListToCustom l ils peek _ _ = undefined valuetype _ = Lua.TSTRING @@ -134,7 +160,7 @@ instance StackValue MetaValue where instance StackValue Citation where push lua cit = do Lua.createtable lua 6 0 - let addValue ((k :: String), v) = Lua.push lua k >> Lua.push lua v >> + let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >> Lua.rawset lua (-3) addValue ("citationId", citationId cit) addValue ("citationPrefix", citationPrefix cit) @@ -145,29 +171,45 @@ instance StackValue Citation where peek = undefined valuetype _ = Lua.TTABLE +data PandocLuaException = PandocLuaException String + deriving (Show, Typeable) + +instance Exception PandocLuaException + -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc@(Pandoc meta _) = do - luaScript <- C8.unpack `fmap` C8.readFile luaFile + luaScript <- UTF8.readFile luaFile + enc <- getForeignEncoding + setForeignEncoding utf8 lua <- Lua.newstate Lua.openlibs lua - Lua.loadstring lua luaScript "custom" + status <- Lua.loadstring lua luaScript luaFile + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (status /= 0) $ +#if MIN_VERSION_hslua(0,4,0) + Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString +#else + Lua.tostring lua 1 >>= throw . PandocLuaException +#endif Lua.call lua 0 0 -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom lua opts doc context <- metaToJSON opts - (fmap toString . blockListToCustom lua) - (fmap toString . inlineListToCustom lua) + (blockListToCustom lua) + (inlineListToCustom lua) meta Lua.close lua - let body = toString rendered + setForeignEncoding enc + let body = rendered if writerStandalone opts then do let context' = setField "body" body context return $ renderTemplate' (writerTemplate opts) context' else return body -docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO ByteString +docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String docToCustom lua opts (Pandoc (Meta metamap) blocks) = do body <- blockListToCustom lua blocks callfunc lua "Doc" body metamap (writerVariables opts) @@ -175,7 +217,7 @@ docToCustom lua opts (Pandoc (Meta metamap) blocks) = do -- | Convert Pandoc block element to Custom. blockToCustom :: LuaState -- ^ Lua state -> Block -- ^ Block element - -> IO ByteString + -> IO String blockToCustom _ Null = return "" @@ -187,7 +229,7 @@ blockToCustom lua (Para [Image txt (src,tit)]) = blockToCustom lua (Para inlines) = callfunc lua "Para" inlines blockToCustom lua (RawBlock format str) = - callfunc lua "RawBlock" format (fromString str) + callfunc lua "RawBlock" format str blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule" @@ -195,7 +237,7 @@ blockToCustom lua (Header level attr inlines) = callfunc lua "Header" level inlines (attrToMap attr) blockToCustom lua (CodeBlock attr str) = - callfunc lua "CodeBlock" (fromString str) (attrToMap attr) + callfunc lua "CodeBlock" str (attrToMap attr) blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks @@ -216,22 +258,22 @@ blockToCustom lua (Div attr items) = -- | Convert list of Pandoc block elements to Custom. blockListToCustom :: LuaState -- ^ Options -> [Block] -- ^ List of block elements - -> IO ByteString + -> IO String blockListToCustom lua xs = do blocksep <- callfunc lua "Blocksep" bs <- mapM (blockToCustom lua) xs return $ mconcat $ intersperse blocksep bs -- | Convert list of Pandoc inline elements to Custom. -inlineListToCustom :: LuaState -> [Inline] -> IO ByteString +inlineListToCustom :: LuaState -> [Inline] -> IO String inlineListToCustom lua lst = do xs <- mapM (inlineToCustom lua) lst - return $ C8.concat xs + return $ concat xs -- | Convert Pandoc inline element to Custom. -inlineToCustom :: LuaState -> Inline -> IO ByteString +inlineToCustom :: LuaState -> Inline -> IO String -inlineToCustom lua (Str str) = callfunc lua "Str" $ fromString str +inlineToCustom lua (Str str) = callfunc lua "Str" str inlineToCustom lua Space = callfunc lua "Space" @@ -254,24 +296,24 @@ inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs inlineToCustom lua (Code attr str) = - callfunc lua "Code" (fromString str) (attrToMap attr) + callfunc lua "Code" str (attrToMap attr) inlineToCustom lua (Math DisplayMath str) = - callfunc lua "DisplayMath" (fromString str) + callfunc lua "DisplayMath" str inlineToCustom lua (Math InlineMath str) = - callfunc lua "InlineMath" (fromString str) + callfunc lua "InlineMath" str inlineToCustom lua (RawInline format str) = - callfunc lua "RawInline" format (fromString str) + callfunc lua "RawInline" format str inlineToCustom lua (LineBreak) = callfunc lua "LineBreak" inlineToCustom lua (Link txt (src,tit)) = - callfunc lua "Link" txt (fromString src) (fromString tit) + callfunc lua "Link" txt src tit inlineToCustom lua (Image alt (src,tit)) = - callfunc lua "Image" alt (fromString src) (fromString tit) + callfunc lua "Image" alt src tit inlineToCustom lua (Note contents) = callfunc lua "Note" contents diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b10317506..f3b99e141 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, PatternGuards #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | + not (null id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') @@ -153,6 +154,14 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +-- Add ids to paragraphs in divs with ids - this is needed for +-- pandoc-citeproc to get link anchors in bibliographies: +blockToDocbook opts (Div (ident,_,_) [Para lst]) = + let attribs = [("id", ident) | not (null ident)] in + if hasLineBreaks lst + then flush $ nowrap $ inTags False "literallayout" attribs + $ inlinesToDocbook opts lst + else inTags True "para" attribs $ inlinesToDocbook opts lst blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5320a2816..da4c78cef 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- -Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2014 John MacFarlane + Copyright : Copyright (C) 2012-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -41,7 +41,7 @@ import Data.Time.Clock.POSIX import Data.Time.Clock import Data.Time.Format import System.Environment -import System.Locale +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -62,8 +64,9 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) -import Control.Applicative ((<$>), (<|>)) -import Data.Maybe (fromMaybe, mapMaybe) +import Control.Applicative ((<$>), (<|>), (<*>)) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord) data ListMarker = NoMarker | BulletMarker @@ -104,13 +107,17 @@ data WriterState = WriterState{ , stInDel :: Bool , stChangesAuthor :: String , stChangesDate :: String + , stPrintWidth :: Integer + , stStyleMaps :: StyleMaps + , stFirstPara :: Bool + , stTocTitle :: [Inline] } defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] , stParaProperties = [] - , stFootnotes = [] + , stFootnotes = defaultFootnotes , stSectionIds = [] , stExternalLinks = M.empty , stImages = M.empty @@ -122,6 +129,10 @@ defaultWriterState = WriterState{ , stInDel = False , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" + , stPrintWidth = 1 + , stStyleMaps = defaultStyleMaps + , stFirstPara = False + , stTocTitle = normalizeInlines [Str "Table of Contents"] } type WS a = StateT WriterState IO a @@ -168,24 +179,80 @@ renumId f renumMap e renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) + | c == 0x9 = True + | c == 0xA = True + | c == 0xD = True + | 0x20 <= c && c <= 0xD7FF = True + | 0xE000 <= c && c <= 0xFFFD = True + | 0x10000 <= c && c <= 0x10FFFF = True + | otherwise = False + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = normalizeInlines [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath doc + let doc' = stripInvalidChars . walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime - refArchive <- liftM (toArchive . toLazy) $ - case writerReferenceDocx opts of - Just f -> B.readFile f - Nothing -> readDataFile datadir "reference.docx" - distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" + distArchive <- getDefaultReferenceDocx Nothing + refArchive <- case writerReferenceDocx opts of + Just f -> liftM (toArchive . toLazy) $ B.readFile f + Nothing -> getDefaultReferenceDocx datadir + + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + -- Gets the template size + let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + + let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + + -- Get the avaible area (converting the size and the margins to int and + -- doing the difference + let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) + <*> ( + (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) + <*> (read <$> mbAttrMarLeft ::Maybe Integer) + ) + + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleMaps = getStyleMaps styledoc + + let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ + metaValueToInlines <$> lookupMeta "toc-title" meta ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username - , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime} + , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stStyleMaps = styleMaps + , stTocTitle = tocTitle + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -193,9 +260,6 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs - - - let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") @@ -310,10 +374,7 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml reldoc - -- adjust contents to add sectPr from reference.docx - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + -- adjust contents to add sectPr from reference.docx let sectpr = case mbsectpr of Just sectpr' -> let cs = renumIds (\q -> qName q == "id" && qPrefix q == Just "r") @@ -323,8 +384,6 @@ writeDocx opts doc@(Pandoc meta _) = do add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs Nothing -> (mknode "w:sectPr" [] ()) - - -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' let contents' = contents ++ [sectpr] let docContents = mknode "w:document" stdAttributes @@ -346,11 +405,18 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -395,16 +461,24 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + -- we do, however, copy some settings over from reference + let settingsPath = "word/settings.xml" + settingsList = [ "w:autoHyphenation" + , "w:consecutiveHyphenLimit" + , "w:hyphenationZone" + , "w:doNotHyphenateCap" + ] + settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList + let entryFromArchive arch path = - maybe (fail $ path ++ " corrupt or missing in reference docx") + maybe (fail $ path ++ " missing in reference docx") return (findEntryByPath path arch `mplus` findEntryByPath path distArchive) docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - settingsEntry <- entryFromArchive distArchive "word/settings.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (fmap ("word/" ++) . extractTarget) @@ -427,10 +501,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () @@ -451,17 +528,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () + : mknode "w:noProof" [] () : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) $ backgroundColor style ) ] +copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren refArchive distArchive path timestamp elNames = do + ref <- parseXml refArchive distArchive path + dist <- parseXml distArchive distArchive path + return $ toEntry path timestamp $ renderXml dist{ + elContent = elContent dist ++ copyContent ref + } + where + strName QName{qName=name, qPrefix=prefix} + | Just p <- prefix = p++":"++name + | otherwise = name + shouldCopy = (`elem` elNames) . strName + cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} + copyContent = map cleanElem . filterChildrenName shouldCopy + -- this is the lowest number used for a list numId baseListId :: Int baseListId = 1000 @@ -539,6 +634,34 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists +makeTOC :: WriterOptions -> WS [Element] +makeTOC opts | writerTableOfContents opts = do + let depth = "1-"++(show (writerTOCDepth opts)) + let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + tocTitle <- gets stTocTitle + title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) + return $ + [mknode "w:sdt" [] ([ + mknode "w:sdtPr" [] ( + mknode "w:docPartObj" [] ( + [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), + mknode "w:docPartUnique" [] ()] + ) -- w:docPartObj + ), -- w:sdtPr + mknode "w:sdtContent" [] (title++[ + mknode "w:p" [] ( + mknode "w:r" [] ([ + mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), + mknode "w:instrText" [("xml:space","preserve")] tocCmd, + mknode "w:fldChar" [("w:fldCharType","separate")] (), + mknode "w:fldChar" [("w:fldCharType","end")] () + ]) -- w:r + ) -- w:p + ]) + ])] -- w:sdt +makeTOC _ = return [] + + -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) @@ -557,32 +680,45 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks - doc' <- blocksToOpenXML opts blocks' + doc' <- (setFirstPara >> blocksToOpenXML opts blocks') notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ subtitle ++ authors ++ date ++ abstract + toc <- makeTOC opts + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +pStyleM :: String -> WS XML.Element +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () + +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +rStyleM :: String -> WS XML.Element +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -596,12 +732,13 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ - getParaProps False + setFirstPara + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ + getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident @@ -613,40 +750,60 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do + setFirstPara + pushParaProp $ pCustomStyle $ + if null alt + then "Figure" + else "FigureWithCaption" paraProps <- getParaProps False + popParaProp contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps ++ contents)] + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] -blockToOpenXML opts (BlockQuote blocks) = - withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks -blockToOpenXML opts (CodeBlock attrs str) = - withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str] -blockToOpenXML _ HorizontalRule = return [ - mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] +blockToOpenXML opts (BlockQuote blocks) = do + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks + setFirstPara + return p +blockToOpenXML opts (CodeBlock attrs str) = do + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) + setFirstPara + return p +blockToOpenXML _ HorizontalRule = do + setFirstPara + return [ + mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] blockToOpenXML opts (Table caption aligns widths headers rows) = do + setFirstPara let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -657,51 +814,62 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ - mknode "w:tbl" [] + caption' ++ + [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' - ) : caption' + )] blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do let marker = NumberMarker numstyle numdelim start addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst -blockToOpenXML opts (DefinitionList items) = - concat `fmap` mapM (definitionListItemToOpenXML opts) items + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l +blockToOpenXML opts (DefinitionList items) = do + l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items + setFirstPara + return l definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -765,6 +933,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -793,6 +964,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -802,6 +976,9 @@ formattedString str = do [ mknode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] str ] ] +setFirstPara :: WS () +setFirstPara = modify $ \s -> s { stFirstPara = True } + -- | Convert an inline element to OpenXML. inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str @@ -872,25 +1049,26 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rCustomStyle (show toktype) ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rCustomStyle "VerbatimChar") + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -900,22 +1078,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -927,6 +1105,7 @@ inlineToOpenXML opts (Link txt (src,_)) = do return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML opts (Image alt (src, tit)) = do -- first, check to see if we've already done this image + pageWidth <- gets stPrintWidth imgs <- gets stImages case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] @@ -940,10 +1119,15 @@ inlineToOpenXML opts (Image alt (src, tit)) = do inlinesToOpenXML opts alt Right (img, mt) -> do ident <- ("rId"++) `fmap` getUniqueId - let size = imageSize img - let (xpt,ypt) = maybe (120,120) sizeInPoints size + (xpt,ypt) <- case imageSize img of + Right size -> return $ sizeInPoints size + Left msg -> do + liftIO $ warn $ + "Could not determine image size in `" ++ + src ++ "': " ++ msg + return (120,120) -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) let cNvPicPr = mknode "pic:cNvPicPr" [] $ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () let nvPicPr = mknode "pic:nvPicPr" [] @@ -1001,18 +1185,38 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +-- Word will insert these footnotes into the settings.xml file +-- (whether or not they're visible in the document). If they're in the +-- file, but not in the footnotes.xml file, it will produce +-- problems. So we want to make sure we insert them into our document. +defaultFootnotes :: [Element] +defaultFootnotes = [ mknode "w:footnote" + [("w:type", "separator"), ("w:id", "-1")] $ + [ mknode "w:p" [] $ + [mknode "w:r" [] $ + [ mknode "w:separator" [] ()]]] + , mknode "w:footnote" + [("w:type", "continuationSeparator"), ("w:id", "0")] $ + [ mknode "w:p" [] $ + [ mknode "w:r" [] $ + [ mknode "w:continuationSeparator" [] ()]]]] + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = - case ((findEntryByPath relpath refArchive `mplus` - findEntryByPath relpath distArchive) - >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of - Just d -> return d - Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" + case findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive of + Nothing -> fail $ relpath ++ " missing in reference docx" + Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of + Nothing -> fail $ relpath ++ " corrupt in reference docx" + Just d -> return d -- | Scales the image to fit the page -fitToPage :: (Integer, Integer) -> (Integer, Integer) -fitToPage (x, y) - --5440680 is the emu width size of a letter page in portrait, minus the margins - | x > 5440680 = - (5440680, round $ (5440680 / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) +-- sizes are passed in emu +fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > pageWidth = + (pageWidth, round $ + ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) | otherwise = (x, y) + diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 8c1d360aa..7ebe09db7 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2014 John MacFarlane + Copyright : Copyright (C) 2008-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> @@ -134,7 +134,9 @@ blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|" ++ if null tit then capt else tit ++ capt - return $ "{{:" ++ src ++ opt ++ "}}\n" + -- Relative links fail isURI and receive a colon + prefix = if isURI src then "" else ":" + return $ "{{" ++ prefix ++ src ++ opt ++ "}}\n" blockToDokuWiki opts (Para inlines) = do indent <- stIndent <$> ask @@ -170,15 +172,15 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - let (beg, end) = if null at - then ("<code" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</code>") - else ("<source lang=\"" ++ head at ++ "\">", "</source>") - return $ beg ++ str ++ end + return $ "<code" ++ + (case at of + [] -> ">\n" + (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" blockToDokuWiki opts (BlockQuote blocks) = do contents <- blockListToDokuWiki opts blocks if isSimpleBlockQuote blocks - then return $ "> " ++ contents + then return $ unlines $ map ("> " ++) $ lines contents else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" blockToDokuWiki opts (Table capt aligns _ headers rows) = do @@ -352,9 +354,7 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False isSimpleBlockQuote :: [Block] -> Bool -isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs -isSimpleBlockQuote [b] = isPlainOrPara b -isSimpleBlockQuote _ = False +isSimpleBlockQuote bs = all isPlainOrPara bs -- | Concatenates strings with line breaks between them. vcat :: [String] -> String @@ -451,7 +451,7 @@ inlineToDokuWiki _ (Code _ str) = inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" +inlineToDokuWiki _ (Math _ str) = return $ "$" ++ str ++ "$" -- note: str should NOT be escaped inlineToDokuWiki _ (RawInline f str) @@ -459,7 +459,7 @@ inlineToDokuWiki _ (RawInline f str) | f == Format "html" = return $ "<html>" ++ str ++ "</html>" | otherwise = return "" -inlineToDokuWiki _ (LineBreak) = return "\\\\ " +inlineToDokuWiki _ (LineBreak) = return "\\\\\n" inlineToDokuWiki _ Space = return " " @@ -480,7 +480,9 @@ inlineToDokuWiki opts (Image alt (source, tit)) = do ("", []) -> "" ("", _ ) -> "|" ++ alt' (_ , _ ) -> "|" ++ tit - return $ "{{:" ++ source ++ txt ++ "}}" + -- Relative links fail isURI and receive a colon + prefix = if isURI source then "" else ":" + return $ "{{" ++ prefix ++ source ++ txt ++ "}}" inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 53574711f..8577c0fa2 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,11 +31,12 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( takeExtension, takeFileName ) +import System.FilePath.Glob ( namesMatching ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 @@ -44,7 +45,7 @@ import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromE import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX ( getPOSIXTime ) import Data.Time (getCurrentTime,UTCTime, formatTime) -import System.Locale ( defaultTimeLocale ) +import Text.Pandoc.Compat.Locale ( defaultTimeLocale ) import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim , normalizeDate, readDataFile, stringify, warn , hierarchicalize, fetchItem' ) @@ -55,16 +56,18 @@ import Text.Pandoc.Options ( WriterOptions(..) , EPUBVersion(..) , ObfuscationMethod(NoObfuscation) ) import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk, walkM) -import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, when, mplus, liftM) +import Text.Pandoc.Walk (walk, walkM, query) +import Data.Default +import Text.Pandoc.Writers.Markdown (writePlain) +import Control.Monad.State (modify, get, State, put, evalState) +import Control.Monad (mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -225,8 +228,9 @@ addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaInlines ils) = writePlain def + (Pandoc nullMeta [Plain ils]) +metaValueToString (MetaBlocks bs) = writePlain def (Pandoc nullMeta bs) metaValueToString (MetaBool b) = show b metaValueToString _ = "" @@ -343,7 +347,6 @@ writeEPUB opts doc@(Pandoc meta _) = do , writerStandalone = True , writerSectionDivs = True , writerHtml5 = epub3 - , writerTableOfContents = False -- we always have one in epub , writerVariables = vars , writerHTMLMathMethod = if epub3 @@ -358,8 +361,9 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + let cpContent = renderHtml $ writeHtml + opts'{ writerVariables = ("coverpage","true"):vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -374,21 +378,17 @@ writeEPUB opts doc@(Pandoc meta _) = do mediaRef <- newIORef [] Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= walkM (transformBlock opts' mediaRef) - pics <- readIORef mediaRef - let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem' (writerMediaBag opts') - (writerSourceURL opts') oldsrc - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return entries - Right (img,_) -> return $ - (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries - picEntries <- foldM readPicEntry [] pics + picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef -- handle fonts + let matchingGlob f = do + xs <- namesMatching f + when (null xs) $ + warn $ f ++ " did not match any font files." + return xs let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f - fontEntries <- mapM mkFontEntry $ writerEpubFonts opts' + fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') + fontEntries <- mapM mkFontEntry fontFiles -- set page progression direction attribution let progressionDirection = case epubPageDirection metadata of @@ -408,17 +408,16 @@ writeEPUB opts doc@(Pandoc meta _) = do (docTitle' meta) : blocks let chapterHeaderLevel = writerEpubChapterLevel opts - -- internal reference IDs change when we chunk the file, - -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. - -- the next two lines fix that: - let reftable = correlateRefs chapterHeaderLevel blocks' - let blocks'' = replaceRefs reftable blocks' let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel + isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) = + n <= chapterHeaderLevel isChapterHeader _ = False let toChapters :: [Block] -> State [Int] [Chapter] toChapters [] = return [] + toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) = + toChapters (bs ++ rest) toChapters (Header n attr@(_,classes,_) ils : bs) = do nums <- get mbnum <- if "unnumbered" `elem` classes @@ -439,7 +438,37 @@ writeEPUB opts doc@(Pandoc meta _) = do let (xs,ys) = break isChapterHeader bs (Chapter Nothing (b:xs) :) `fmap` toChapters ys - let chapters = evalState (toChapters blocks'') [] + let chapters' = evalState (toChapters blocks') [] + + let extractLinkURL' :: Int -> Inline -> [(String, String)] + extractLinkURL' num (Span (ident, _, _) _) + | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + extractLinkURL' _ _ = [] + + let extractLinkURL :: Int -> Block -> [(String, String)] + extractLinkURL num (Div (ident, _, _) _) + | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + extractLinkURL num (Header _ (ident, _, _) _) + | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + extractLinkURL num b = query (extractLinkURL' num) b + + let reftable = concat $ zipWith (\(Chapter _ bs) num -> + query (extractLinkURL num) bs) + chapters' [1..] + + let fixInternalReferences :: Inline -> Inline + fixInternalReferences (Link lab ('#':xs, tit)) = + case lookup xs reftable of + Just ys -> Link lab (ys, tit) + Nothing -> Link lab ('#':xs, tit) + fixInternalReferences x = x + + -- internal reference IDs change when we chunk the file, + -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. + -- this fixes that: + let chapters = map (\(Chapter mbnum bs) -> + Chapter mbnum $ walk fixInternalReferences bs) + chapters' let chapToEntry :: Int -> Chapter -> Entry chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) @@ -488,6 +517,9 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x + + let tocTitle = fromMaybe plainTitle $ + metaValueToString <$> lookupMeta "toc-title" meta let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -521,7 +553,7 @@ writeEPUB opts doc@(Pandoc meta _) = do case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! - [("idref", "cover_xhtml"),("linear","no")] $ () ] + [("idref", "cover_xhtml")] $ () ] ++ ((unode "itemref" ! [("idref", "title_page_xhtml") ,("linear", case lookupMeta "title" meta of @@ -532,7 +564,7 @@ writeEPUB opts doc@(Pandoc meta _) = do map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle), + [("type","toc"),("title", tocTitle), ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! @@ -542,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let contentsEntry = mkEntry "content.opf" contentsData -- toc.ncx - let secs = hierarchicalize blocks'' + let secs = hierarchicalize blocks' let tocLevel = writerTOCDepth opts @@ -569,8 +601,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n) - ,("playOrder", show n)] $ + [("id", "navPoint-" ++ show n)] $ [ unode "navLabel" $ unode "text" tit , unode "content" ! [("src", src)] $ () ] ++ subs @@ -611,17 +642,35 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = UTF8.fromStringLazy $ ppTopElement $ - unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") - ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ - [ unode "head" $ - [ unode "title" plainTitle - , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ] - , unode "body" $ - unode navtag ! [("epub:type","toc") | epub3] $ - [ unode "h1" ! [("id","toc-title")] $ plainTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1] - ] + let navBlocks = [RawBlock (Format "html") $ ppElement $ + unode navtag ! ([("epub:type","toc") | epub3] ++ + [("id","toc")]) $ + [ unode "h1" ! [("id","toc-title")] $ tocTitle + , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + let landmarks = if epub3 + then [RawBlock (Format "html") $ ppElement $ + unode "nav" ! [("epub:type","landmarks") + ,("hidden","hidden")] $ + [ unode "ol" $ + [ unode "li" + [ unode "a" ! [("href", "cover.xhtml") + ,("epub:type", "cover")] $ + "Cover"] | + epubCoverImage metadata /= Nothing + ] ++ + [ unode "li" + [ unode "a" ! [("href", "#toc") + ,("epub:type", "toc")] $ + "Table of contents" + ] | writerTableOfContents opts + ] + ] + ] + else [] + let navData = renderHtml $ writeHtml opts' + (Pandoc (setMeta "title" + (walk removeNote $ fromList $ docTitle' meta) nullMeta) + (navBlocks ++ landmarks)) let navEntry = mkEntry "nav.xhtml" navData -- mimetype @@ -764,59 +813,75 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Tag String -> IO (Tag String) -transformTag mediaRef tag@(TagOpen name attr) +transformTag opts mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef mediaRef src - newposter <- modifyMediaRef mediaRef poster + newsrc <- modifyMediaRef opts mediaRef src + newposter <- modifyMediaRef opts mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ tag = return tag - -modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath -modifyMediaRef _ "" = return "" -modifyMediaRef mediaRef oldsrc = do +transformTag _ _ tag = return tag + +modifyMediaRef :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] + -> FilePath + -> IO FilePath +modifyMediaRef _ _ "" = return "" +modifyMediaRef opts mediaRef oldsrc = do media <- readIORef mediaRef case lookup oldsrc media of - Just n -> return n - Nothing -> do - let new = "media/file" ++ show (length media) ++ - takeExtension (takeWhile (/='?') oldsrc) -- remove query - modifyIORef mediaRef ( (oldsrc, new): ) + Just (n,_) -> return n + Nothing -> do + res <- fetchItem' (writerMediaBag opts) + (writerSourceURL opts) oldsrc + (new, mbEntry) <- + case res of + Left _ -> do + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." + return (oldsrc, Nothing) + Right (img,mbMime) -> do + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + epochtime <- floor `fmap` getPOSIXTime + let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + return (new, Just entry) + modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) return new transformBlock :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Block -> IO Block -transformBlock _ mediaRef (RawBlock fmt raw) +transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline _ mediaRef (Image lab (src,tit)) = do - newsrc <- modifyMediaRef mediaRef src +transformInline opts mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef opts mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ mediaRef (RawInline fmt raw) +transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x @@ -849,11 +914,6 @@ mediaTypeOf x = Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -data IdentState = IdentState{ - chapterNumber :: Int, - identTable :: [(String,String)] - } deriving (Read, Show) - -- Returns filename for chapter number. showChapter :: Int -> String showChapter = printf "ch%03d.xhtml" @@ -870,38 +930,6 @@ addIdentifiers bs = evalState (mapM go bs) [] return $ Header n (ident',classes,kvs) ils go x = return x --- Go through a block list and construct a table --- correlating the automatically constructed references --- that would be used in a normal pandoc document with --- new URLs to be used in the EPUB. For example, what --- was "header-1" might turn into "ch006.xhtml#header". -correlateRefs :: Int -> [Block] -> [(String,String)] -correlateRefs chapterHeaderLevel bs = - identTable $ execState (mapM_ go bs) - IdentState{ chapterNumber = 0 - , identTable = [] } - where go :: Block -> State IdentState () - go (Header n (ident,_,_) _) = do - when (n <= chapterHeaderLevel) $ - modify $ \s -> s{ chapterNumber = chapterNumber s + 1 } - st <- get - let chapterid = showChapter (chapterNumber st) ++ - if n <= chapterHeaderLevel - then "" - else '#' : ident - modify $ \s -> s{ identTable = (ident, chapterid) : identTable st } - go _ = return () - --- Replace internal link references using the table produced --- by correlateRefs. -replaceRefs :: [(String,String)] -> [Block] -> [Block] -replaceRefs refTable = walk replaceOneRef - where replaceOneRef x@(Link lab ('#':xs,tit)) = - case lookup xs refTable of - Just url -> Link lab (url,tit) - Nothing -> x - replaceOneRef x = x - -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM normalizeDate' :: String -> Maybe String normalizeDate' xs = @@ -1198,4 +1226,3 @@ docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta _ -> [] go (MetaList xs) = concatMap go xs go _ = [] - diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 233b8b32b..31fa4bee8 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -85,7 +85,7 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) + return $ xml_head ++ (showContent fb2_xml) ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 1a00c7660..a2778ea97 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns #-} +{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -46,10 +46,13 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe ) +import Data.Maybe ( catMaybes, fromMaybe, isJust ) import Control.Monad.State import Text.Blaze.Html hiding(contents) +#if MIN_VERSION_blaze_markup(0,6,3) +#else import Text.Blaze.Internal(preEscapedString) +#endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 #else @@ -60,7 +63,7 @@ import qualified Text.Blaze.XHtml1.Transitional.Attributes as A import Text.Blaze.Renderer.String (renderHtml) import Text.TeXMath import Text.XML.Light.Output -import Text.XML.Light (unode, elChildren, add_attr, unqual) +import Text.XML.Light (unode, elChildren, unqual) import qualified Text.XML.Light as XML import System.FilePath (takeExtension) import Data.Monoid @@ -73,11 +76,13 @@ data WriterState = WriterState , stQuotes :: Bool -- ^ <q> tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section + , stElement :: Bool -- ^ Processing an Element } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, - stHighlighting = False, stSecNum = []} + stHighlighting = False, stSecNum = [], + stElement = False} -- Helpers to render HTML with the appropriate function. @@ -189,6 +194,9 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ defField "html5" (writerHtml5 opts) $ + defField "center" (case lookupMeta "center" meta of + Just (MetaBool False) -> False + _ -> True) $ metadata return (thebody, context) @@ -280,7 +288,13 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty - else blockToHtml opts (Header level' (id',classes,keyvals) title') + else do + modify (\st -> st{ stElement = True}) + res <- blockToHtml opts + (Header level' (id',classes,keyvals) title') + modify (\st -> st{ stElement = False}) + return res + let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] @@ -361,8 +375,8 @@ obfuscateLink opts (renderHtml -> txt) s = (linkText, altText) = if txt == drop 7 s' -- autolink then ("e", name' ++ " at " ++ domain') - else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ - domain' ++ ")") + else ("'" ++ obfuscateString txt ++ "'", + txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL @@ -430,24 +444,32 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, capt, nl opts] + [nl opts, img, nl opts, capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents blockToHtml opts (Div attr@(_,classes,_) bs) = do - contents <- blockListToHtml opts bs + let speakerNotes = "notes" `elem` classes + -- we don't want incremental output inside speaker notes, see #1394 + let opts' = if speakerNotes then opts{ writerIncremental = False } else opts + contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts return $ - if "notes" `elem` classes - then let opts' = opts{ writerIncremental = False } in - -- we don't want incremental output inside speaker notes - case writerSlideVariant opts of + if speakerNotes + then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' + DZSlides -> (addAttrs opts' attr $ H5.div $ contents') + ! (H5.customAttribute "role" "note") NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' -blockToHtml _ (RawBlock f str) +blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str + | f == Format "latex" = + case writerHTMLMathMethod opts of + MathJax _ -> do modify (\st -> st{ stMath = True }) + return $ toHtml str + _ -> return mempty | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do @@ -491,7 +513,7 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (_,classes,_) lst) = do +blockToHtml opts (Header level attr@(_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) @@ -499,7 +521,9 @@ blockToHtml opts (Header level (_,classes,_) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - return $ case level of + inElement <- gets stElement + return $ (if inElement then id else addAttrs opts attr) + $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' 3 -> H.h3 contents' @@ -512,7 +536,9 @@ blockToHtml opts (BulletList lst) = do return $ unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = case numstyle of + Example -> "decimal" + _ -> camelCaseToHyphenated $ show numstyle let attribs = (if startnum /= 1 then [A.start $ toValue startnum] else []) ++ @@ -629,7 +655,9 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, [] -> unode "mrow" () [x] -> x xs -> unode "mrow" xs - math = add_attr (XML.Attr (unqual "xmlns") "http://www.w3.org/1998/Math/MathML") . unode "math" + math childs = XML.Element q as [XML.Elem childs] l + where + (XML.Element q as _ l) = e annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"] @@ -639,7 +667,8 @@ inlineToHtml opts inline = case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " - (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= return . addAttrs opts attr' . H.span @@ -687,67 +716,72 @@ inlineToHtml opts inline = H.q `fmap` inlineListToHtml opts lst else (\x -> leftQuote >> x >> rightQuote) `fmap` inlineListToHtml opts lst - (Math t str) -> modify (\st -> st {stMath = True}) >> - (case writerHTMLMathMethod opts of - LaTeXMathML _ -> - -- putting LaTeXMathML in container with class "LaTeX" prevents - -- non-math elements on the page from being treated as math by - -- the javascript - return $ H.span ! A.class_ "LaTeX" $ - case t of - InlineMath -> toHtml ("$" ++ str ++ "$") - DisplayMath -> toHtml ("$$" ++ str ++ "$$") - JsMath _ -> do - let m = preEscapedString str - return $ case t of - InlineMath -> H.span ! A.class_ "math" $ m - DisplayMath -> H.div ! A.class_ "math" $ m - WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img - let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode str) - ! A.alt (toValue str) - ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag - GladTeX -> - return $ case t of - InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" - DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock - let conf = useShortEmptyTags (const False) - defaultConfigPP - case writeMathML dt <$> readTeX str of - Right r -> return $ preEscapedString $ - ppcElement conf (annotateMML r str) - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ "math") - MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ - case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" - KaTeX _ _ -> return $ H.span ! A.class_ "math" $ - toHtml (case t of - InlineMath -> str - DisplayMath -> "\\displaystyle " ++ str) - PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) - let m = H.span ! A.class_ "math" $ x - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag ) + (Math t str) -> do + modify (\st -> st {stMath = True}) + let mathClass = toValue $ ("math " :: String) ++ + if t == InlineMath then "inline" else "display" + case writerHTMLMathMethod opts of + LaTeXMathML _ -> + -- putting LaTeXMathML in container with class "LaTeX" prevents + -- non-math elements on the page from being treated as math by + -- the javascript + return $ H.span ! A.class_ "LaTeX" $ + case t of + InlineMath -> toHtml ("$" ++ str ++ "$") + DisplayMath -> toHtml ("$$" ++ str ++ "$$") + JsMath _ -> do + let m = preEscapedString str + return $ case t of + InlineMath -> H.span ! A.class_ mathClass $ m + DisplayMath -> H.div ! A.class_ mathClass $ m + WebTeX url -> do + let imtag = if writerHtml5 opts then H5.img else H.img + let m = imtag ! A.style "vertical-align:middle" + ! A.src (toValue $ url ++ urlEncode str) + ! A.alt (toValue str) + ! A.title (toValue str) + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag + GladTeX -> + return $ case t of + InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" + DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" + MathML _ -> do + let dt = if t == InlineMath + then DisplayInline + else DisplayBlock + let conf = useShortEmptyTags (const False) + defaultConfigPP + case writeMathML dt <$> readTeX str of + Right r -> return $ preEscapedString $ + ppcElement conf (annotateMML r str) + Left _ -> inlineListToHtml opts + (texMathToInlines t str) >>= + return . (H.span ! A.class_ mathClass) + MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" + KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ + toHtml (case t of + InlineMath -> str + DisplayMath -> "\\displaystyle " ++ str) + PlainMath -> do + x <- inlineListToHtml opts (texMathToInlines t str) + let m = H.span ! A.class_ mathClass $ x + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag (RawInline f str) | f == Format "latex" -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str + MathJax _ -> do modify (\st -> st {stMath = True}) + return $ toHtml str _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty @@ -768,22 +802,15 @@ inlineToHtml opts inline = then link' else link' ! A.title (toValue tit) (Image txt (s,tit)) | treatAsImage s -> do - let alternate' = stringify txt let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) ++ - if null txt - then [] - else [A.alt $ toValue alternate'] + [A.title $ toValue tit | not $ null tit] ++ + [A.alt $ toValue $ stringify txt] let tag = if writerHtml5 opts then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image _ (s,tit)) -> do let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) + [A.title $ toValue tit | not $ null tit] return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl (Note contents) @@ -803,7 +830,9 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) - $ H.sup + $ (if isJust (writerEpubVersion opts) + then id + else H.sup) $ toHtml ref return $ case writerEpubVersion opts of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index ae20efd4b..08e3e5b63 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} {- | Module : Text.Pandoc.Writers.ICML @@ -25,6 +25,7 @@ import Data.List (isPrefixOf, isInfixOf, stripPrefix) import Data.Text as Text (breakOnAll, pack) import Data.Monoid (mappend) import Control.Monad.State +import Network.URI (isURI) import qualified Data.Set as Set type Style = [String] @@ -70,7 +71,6 @@ linkName = "Link" -- block element names (appear in InDesign's paragraph styles pane) paragraphName :: String codeBlockName :: String -rawBlockName :: String blockQuoteName :: String orderedListName :: String bulletListName :: String @@ -93,7 +93,6 @@ subListParName :: String footnoteName :: String paragraphName = "Paragraph" codeBlockName = "CodeBlock" -rawBlockName = "Rawblock" blockQuoteName = "Blockquote" orderedListName = "NumList" bulletListName = "BulList" @@ -252,6 +251,13 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st else empty in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props +-- | Escape colon characters as %3a +escapeColons :: String -> String +escapeColons (x:xs) + | x == ':' = "%3a" ++ escapeColons xs + | otherwise = x : escapeColons xs +escapeColons [] = [] + -- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. hyperlinksToDoc :: Hyperlink -> Doc hyperlinksToDoc [] = empty @@ -260,13 +266,13 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs hyp (ident, url) = hdest $$ hlink where hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] + [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] $ inTags True "Properties" [] $ inTags False "BorderColor" [("type","enumeration")] (text "Black") $$ (inTags False "Destination" [("type","object")] - $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) + $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6 -- | Convert a list of Pandoc blocks to ICML. @@ -278,7 +284,9 @@ blockToICML :: WriterOptions -> Style -> Block -> WS Doc blockToICML opts style (Plain lst) = parStyle opts style lst blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML _ _ (RawBlock f str) + | f == Format "icml" = return $ text str + | otherwise = return empty blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst @@ -399,12 +407,14 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] -inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math -inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML _ _ (RawInline f str) + | f == Format "icml" = return $ text str + | otherwise = return empty inlineToICML opts style (Link lst (url, title)) = do content <- inlinesToICML opts (linkName:style) lst state $ \st -> @@ -497,6 +507,7 @@ imageICML _ style _ (linkURI, _) = hh = show $ imgHeight `div` 2 qw = show $ imgWidth `div` 4 qh = show $ imgHeight `div` 4 + uriPrefix = if isURI linkURI then "" else "file:" (stlStr, attrs) = styleToStrAttr style props = inTags True "Properties" [] $ inTags True "PathGeometry" [] $ inTags True "GeometryPathType" [("PathOpen","false")] @@ -516,7 +527,7 @@ imageICML _ style _ (linkURI, _) = $ vcat [ inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)] - , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)] + , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", uriPrefix++linkURI)] ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ae2f4e907..506edd182 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, PatternGuards #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -38,12 +38,13 @@ import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) -import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, - isPrefixOf, intercalate, intersperse ) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) +import Data.Aeson.Types ( (.:), parseMaybe, withObject ) import Control.Applicative ((<|>)) import Control.Monad.State +import qualified Text.Parsec as P import Text.Pandoc.Pretty import Text.Pandoc.Slides import Text.Pandoc.Highlighting (highlight, styleToLaTeX, @@ -102,25 +103,33 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + metadata <- metaToJSON options + (fmap (render colwidth) . blockListToLaTeX) + (fmap (render colwidth) . inlineListToLaTeX) + meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - case lookup "documentclass" (writerVariables options) of + let documentClass = case P.parse (do P.skipMany (P.satisfy (/='\\')) + P.string "\\documentclass" + P.skipMany (P.satisfy (/='{')) + P.char '{' + P.manyTill P.letter (P.char '}')) "template" + template of + Right r -> r + Left _ -> "" + case lookup "documentclass" (writerVariables options) `mplus` + parseMaybe (withObject "object" (.: "documentclass")) metadata of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () - Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && - (any (`isSuffixOf` x) bookClasses)) - (lines template) -> modify $ \s -> s{stBook = True} + Nothing | documentClass `elem` bookClasses + -> modify $ \s -> s{stBook = True} | otherwise -> return () -- check for \usepackage...{csquotes}; if present, we'll use -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - let colwidth = if writerWrapText options - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) else case last blocks' of @@ -135,6 +144,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta + let (mainlang, otherlang) = + case (reverse . splitBy (==',') . filter (/=' ')) `fmap` + getField "lang" metadata of + Just (m:os) -> (m, reverse os) + _ -> ("", []) let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -159,8 +173,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ defField "beamer" (writerBeamer options) $ - defField "mainlang" (maybe "" (reverse . takeWhile (/=',') . reverse) - (lookup "lang" $ writerVariables options)) $ + defField "mainlang" mainlang $ + defField "otherlang" otherlang $ (if stHighlighting st then defField "highlighting-macros" (styleToLaTeX $ writerHighlightStyle options ) @@ -206,7 +220,7 @@ stringToLaTeX ctx (x:xs) = do '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest - '$' -> "\\$" ++ rest + '$' | not isUrl -> "\\$" ++ rest '%' -> "\\%" ++ rest '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest @@ -240,7 +254,7 @@ toLabel z = go `fmap` stringToLaTeX URLString z where go [] = "" go (x:xs) | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x "-+=:;." = x:go xs + | elem x ("-+=:;." :: String) = x:go xs | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. @@ -272,10 +286,11 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = not $ null $ query hasCodeBlock elts ++ + let fragile = "fragile" `elem` classes || + not (null $ query hasCodeBlock elts ++ if writerListings opts then query hasCode elts - else [] + else []) let allowframebreaks = "allowframebreaks" `elem` classes let optionslist = ["fragile" | fragile] ++ ["allowframebreaks" | allowframebreaks] @@ -311,7 +326,8 @@ blockToLaTeX (Div (identifier,classes,_) bs) = do ref <- toLabel identifier let linkAnchor = if null identifier then empty - else "\\hyperdef{}" <> braces (text ref) <> "{}" + else "\\hyperdef{}" <> braces (text ref) <> + braces ("\\label" <> braces (text ref)) contents <- blockListToLaTeX bs if beamer && "notes" `elem` classes -- speaker notes then return $ "\\note" <> braces contents @@ -414,7 +430,7 @@ blockToLaTeX (BulletList lst) = do let inc = if incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst let spacing = if isTightList lst - then text "\\itemsep1pt\\parskip0pt\\parsep0pt" + then text "\\tightlist" else empty return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$ "\\end{itemize}" @@ -449,7 +465,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do else "\\setcounter" <> braces enum <> braces (text $ show $ start - 1) let spacing = if isTightList lst - then text "\\itemsep1pt\\parskip0pt\\parsep0pt" + then text "\\tightlist" else empty return $ text ("\\begin{enumerate}" ++ inc) $$ stylecommand @@ -463,7 +479,7 @@ blockToLaTeX (DefinitionList lst) = do let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst let spacing = if all isTightList (map snd lst) - then text "\\itemsep1pt\\parskip0pt\\parsep0pt" + then text "\\tightlist" else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ "\\end{description}" @@ -545,10 +561,16 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ [RawInline "tex" "}"] +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc tableCellToLaTeX _ (0, _, blocks) = - blockListToLaTeX $ walk fixLineBreaks blocks + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks tableCellToLaTeX header (width, align, blocks) = do modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks @@ -613,6 +635,7 @@ sectionHeader :: Bool -- True for unnumbered sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst lab <- text `fmap` toLabel ref + plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -625,7 +648,12 @@ sectionHeader unnumbered ref level lst = do then return empty else do return $ brackets txtNoNotes - let stuffing = star <> optional <> braces txt + let contents = if render Nothing txt == plain + then braces txt + else braces (text "\\texorpdfstring" + <> braces txt + <> braces (text plain)) + let stuffing = star <> optional <> contents book <- gets stBook opts <- gets stOptions let level' = if book || writerChapters opts then level - 1 else level @@ -669,7 +697,7 @@ sectionHeader unnumbered ref level lst = do inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc inlineListToLaTeX lst = - mapM inlineToLaTeX (fixLineInitialSpaces lst) + mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) >>= return . hcat -- nonbreaking spaces (~) in LaTeX don't work after line breaks, -- so we turn nbsps after hard breaks to \hspace commands. @@ -681,6 +709,14 @@ inlineListToLaTeX lst = fixNbsps s = let (ys,zs) = span (=='\160') s in replicate (length ys) hspace ++ [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" + -- linebreaks after blank lines cause problems: + fixBreaks [] = [] + fixBreaks ys@(LineBreak : LineBreak : _) = + case span (== LineBreak) ys of + (lbs, rest) -> RawInline "latex" + ("\\\\[" ++ show (length lbs) ++ + "\\baselineskip]") : fixBreaks rest + fixBreaks (y:ys) = y : fixBreaks ys isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True @@ -696,7 +732,8 @@ inlineToLaTeX (Span (id',classes,_) ils) = do ref <- toLabel id' let linkAnchor = if null id' then empty - else "\\hyperdef{}" <> braces (text ref) <> "{}" + else "\\hyperdef{}" <> braces (text ref) <> + braces ("\\label" <> braces (text ref)) fmap (linkAnchor <>) ((if noEmph then inCmd "textup" else id) . (if noStrong then inCmd "textnormal" else id) . @@ -730,10 +767,11 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions + inHeading <- gets stInHeading case () of - _ | writerListings opts -> listingsCode + _ | writerListings opts && not inHeading -> listingsCode | writerHighlight opts && not (null classes) -> highlightCode - | otherwise -> rawCode + | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } @@ -746,8 +784,10 @@ inlineToLaTeX (Code (_,classes,_) str) = do Nothing -> rawCode Just h -> modify (\st -> st{ stHighlighting = True }) >> return (text h) - rawCode = liftM (text . (\s -> "\\texttt{" ++ s ++ "}")) + rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) $ stringToLaTeX CodeString str + where + escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -780,7 +820,7 @@ inlineToLaTeX (RawInline f str) | f == Format "latex" || f == Format "tex" = return $ text str | otherwise = return empty -inlineToLaTeX (LineBreak) = return "\\\\" +inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 6b2c4c200..f91367eb9 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2014 John MacFarlane + Copyright : Copyright (C) 2007-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f06f1d6cc..804f4101d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -44,7 +44,6 @@ import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State -import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Readers.TeXMath (texMathToInlines) import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) @@ -57,12 +56,15 @@ import qualified Data.Text as T type Notes = [[Block]] type Refs = [([Inline], Target)] -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: [String] - , stPlain :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stRefShortcutable :: Bool + , stInList :: Bool + , stIds :: [String] + , stPlain :: Bool } instance Default WriterState - where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False } + where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, + stInList = False, stIds = [], stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -76,17 +78,7 @@ writeMarkdown opts document = -- pictures, or inline formatting). writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = - evalState (pandocToMarkdown opts{ - writerExtensions = Set.delete Ext_escaped_line_breaks $ - Set.delete Ext_pipe_tables $ - Set.delete Ext_raw_html $ - Set.delete Ext_markdown_in_html_blocks $ - Set.delete Ext_raw_tex $ - Set.delete Ext_footnotes $ - Set.delete Ext_tex_math_dollars $ - Set.delete Ext_citations $ - writerExtensions opts } - document) def{ stPlain = True } + evalState (pandocToMarkdown opts document) def{ stPlain = True } pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc pandocTitleBlock tit auths dat = @@ -243,7 +235,8 @@ noteToMarkdown opts num blocks = do -- | Escape special characters for Markdown. escapeString :: WriterOptions -> String -> String escapeString opts = escapeStringUsing markdownEscapes - where markdownEscapes = backslashEscapes specialChars + where markdownEscapes = ('<', "<") : ('>', ">") : + backslashEscapes specialChars specialChars = (if isEnabled Ext_superscript opts then ('^':) @@ -254,7 +247,7 @@ escapeString opts = escapeStringUsing markdownEscapes (if isEnabled Ext_tex_math_dollars opts then ('$':) else id) $ - "\\`*_<>#" + "\\`*_[]#" -- | Construct table of contents from list of header blocks. tableOfContents :: WriterOptions -> [Block] -> Doc @@ -323,9 +316,9 @@ blockToMarkdown opts (Plain inlines) = do then Just $ writerColumns opts else Nothing let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] + let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] let contents' = if isEnabled Ext_all_symbols_escapable opts && not (stPlain st) && beginsWithOrderedListMarker rendered then text $ escapeDelimiter rendered @@ -453,7 +446,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do $ Pandoc nullMeta [t] return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items + contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let start' = if isEnabled Ext_startnum opts then start else 1 @@ -464,13 +457,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + contents <- inList $ + mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items + contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline +inList :: State WriterState a -> State WriterState a +inList p = do + oldInList <- gets stInList + modify $ \st -> st{ stInList = True } + res <- p + modify $ \st -> st{ stInList = oldInList } + return res + addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of @@ -497,7 +499,12 @@ pipeTable headless aligns rawHeaders rawRows = do AlignCenter -> ':':replicate w '-' ++ ":" AlignRight -> replicate (w + 1) '-' ++ ":" AlignDefault -> replicate (w + 2) '-' - let header = if headless then empty else torow rawHeaders + -- note: pipe tables can't completely lack a + -- header; for a headerless table, we need a header of empty cells. + -- see jgm/pandoc#1996. + let header = if headless + then torow (replicate (length aligns) empty) + else torow rawHeaders let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows @@ -677,12 +684,53 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat - where avoidBadWraps [] = [] - avoidBadWraps (Space:Str (c:cs):xs) - | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs - avoidBadWraps (x:xs) = x : avoidBadWraps xs +inlineListToMarkdown opts lst = do + inlist <- gets stInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (i:is) = case i of + (Link _ _) -> case is of + -- If a link is followed by another link or '[' we don't shortcut + (Link _ _):_ -> unshortcutable + Space:(Link _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- withState (\s -> s { stRefShortcutable = False }) + (inlineToMarkdown opts i) + modify (\s -> s {stRefShortcutable = True }) + fmap (iMark <>) (go is) + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (Space:Str ('>':cs):xs) = + Str (' ':'>':cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str [c]:[]) + | c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (Space:Str [c]:Space:xs) + | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:Space:xs) + | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:[]) + | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: String -> Bool +isOrderedListMarker xs = (last xs `elem` ['.',')']) && + isRight (runParserT (anyOrderedListMarker >> eof) + defaultParserState "" xs) + +isRight :: Either a b -> Bool +isRight (Right _) = True +isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -692,8 +740,10 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts + return $ if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) then tagWithAttrs "span" attrs <> contents <> text "</span>" else contents inlineToMarkdown opts (Emph lst) = do @@ -713,26 +763,33 @@ inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts then "~~" <> contents <> "~~" - else "<s>" <> contents <> "</s>" + else if isEnabled Ext_raw_html opts + then "<s>" <> contents <> "</s>" + else contents inlineToMarkdown opts (Superscript lst) = do contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" - else "<sup>" <> contents <> "</sup>" + else if isEnabled Ext_raw_html opts + then "<sup>" <> contents <> "</sup>" + else contents inlineToMarkdown opts (Subscript lst) = do contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" - else "<sub>" <> contents <> "</sub>" + else if isEnabled Ext_raw_html opts + then "<sub>" <> contents <> "</sub>" + else contents inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do + if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) + ("",[],[("style","font-variant:small-caps;")]) <> contents <> text "</span>" + else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" @@ -821,8 +878,8 @@ inlineToMarkdown opts (Cite (c:cs) lst) sdoc <- inlineListToMarkdown opts sinlines let k' = text (modekey m ++ "@" ++ k) r = case sinlines of - Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc - _ -> k' <+> sdoc + Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" @@ -838,6 +895,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do [Str s] | escapeURI s == srcSuffix -> True _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- gets stRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto @@ -847,7 +907,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref - then "[]" + then if useShortcutRefLinks + then "" + else "[]" else "[" <> reftext <> "]" in first <> second else if plain diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 3f392a5d0..2b7c47e24 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2014 John MacFarlane + Copyright : Copyright (C) 2008-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -107,7 +107,7 @@ blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|alt=" ++ if null tit then capt else tit ++ capt - return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" + return $ "[[File:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" blockToMediaWiki (Para inlines) = do tags <- asks useTags @@ -375,14 +375,14 @@ inlineToMediaWiki (RawInline f str) | f == Format "html" = return str | otherwise = return "" -inlineToMediaWiki (LineBreak) = return "<br />" +inlineToMediaWiki (LineBreak) = return "<br />\n" inlineToMediaWiki Space = return " " inlineToMediaWiki (Link txt (src, _)) = do label <- inlineListToMediaWiki txt case txt of - [Str s] | escapeURI s == src -> return src + [Str s] | isURI src && escapeURI s == src -> return src _ -> return $ if isURI src then "[" ++ src ++ " " ++ label ++ "]" else "[[" ++ src' ++ "|" ++ label ++ "]]" @@ -397,7 +397,7 @@ inlineToMediaWiki (Image alt (source, tit)) = do then "" else '|' : alt' else '|' : tit - return $ "[[Image:" ++ source ++ txt ++ "]]" + return $ "[[File:" ++ source ++ txt ++ "]]" inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index cb821e40b..f342dc4f5 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 03f8e8ba4..b87a391fb 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2014 John MacFarlane + Copyright : Copyright (C) 2008-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,9 +39,10 @@ import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Control.Applicative ((<$>)) import Text.Pandoc.Options ( WriterOptions(..) ) -import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn ) +import Text.Pandoc.Shared ( stringify, fetchItem', warn, + getDefaultReferenceODT ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) -import Text.Pandoc.MIME ( getMimeType ) +import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared ( fixDisplayMath ) @@ -51,7 +52,7 @@ import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension, takeDirectory ) +import System.FilePath ( takeExtension, takeDirectory, (<.>)) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -60,11 +61,10 @@ writeODT :: WriterOptions -- ^ Writer options writeODT opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let title = docTitle meta - refArchive <- liftM toArchive $ + refArchive <- case writerReferenceODT opts of - Just f -> B.readFile f - Nothing -> (B.fromChunks . (:[])) `fmap` - readDataFile datadir "reference.odt" + Just f -> liftM toArchive $ B.readFile f + Nothing -> getDefaultReferenceODT datadir -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc @@ -127,23 +127,31 @@ writeODT opts doc@(Pandoc meta _) = do return $ fromArchive archive'' transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image lab (src,_)) = do +transformPicMath opts entriesRef (Image lab (src,t)) = do res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab - Right (img, _) -> do - let size = imageSize img - let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size + Right (img, mbMimeType) -> do + (w,h) <- case imageSize img of + Right size -> return $ sizeInPoints size + Left msg -> do + warn $ "Could not determine image size in `" ++ + src ++ "': " ++ msg + return (0,0) let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef - let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src + let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) + (mbMimeType >>= extensionFromMimeType) + let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) epochtime <- floor `fmap` getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) - return $ Image lab (newsrc, tit') + let fig | "fig:" `isPrefixOf` t = "fig:" + | otherwise = "" + return $ Image lab (newsrc, fig++tit') transformPicMath _ entriesRef (Math t math) = do entries <- readIORef entriesRef let dt = if t == InlineMath then DisplayInline else DisplayBlock diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index dd359f3f5..c7563d751 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2014 John MacFarlane + Copyright : Copyright (C) 2013-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -38,7 +38,7 @@ import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty import Data.Time -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import qualified Text.Pandoc.Builder as B -- | Convert Pandoc document to string in OPML format. @@ -87,4 +87,3 @@ elementToOPML opts (Sec _ _num _ title elements) = | not (null blocks)] in inTags True "outline" attrs $ vcat (map (elementToOPML opts) rest) - diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 773d142f4..83e17c943 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings #-} +{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} {- -Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it> +Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. This program is free software; you can redistribute it and/or modify @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -288,6 +288,8 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b + | Para [Image c (s,'f':'i':'g':':':t)] <- bs + = figure c s t | Para b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b @@ -334,7 +336,7 @@ blockToOpenDocument o bs mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles captionDoc <- if null c then return empty - else withParagraphStyle o "Caption" [Para c] + else withParagraphStyle o "TableCaption" [Para c] th <- if all null h then return empty else colHeadsToOpenDocument o name (map fst paraHStyles) h @@ -342,6 +344,12 @@ blockToOpenDocument o bs return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) $$ captionDoc + figure caption source title | null caption = + withParagraphStyle o "Figure" [Para [Image caption (source,title)]] + | otherwise = do + imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]] + captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] + return $ imageDoc $$ captionDoc colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc colHeadsToOpenDocument o tn ns hs = @@ -553,4 +561,3 @@ textStyleAttr s ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] | otherwise = [] - diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 414883b29..90b396cae 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2010-2014 Puneeth Chaganti <punchagan@gmail.com> +Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com> and John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010-2014 Puneeth Chaganti and John MacFarlane + Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti <punchagan@gmail.com> diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index a96670c96..151d3c2ae 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Refs - , stImages :: [([Inline], (String, String, Maybe String))] - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stLinks :: Refs + , stImages :: [([Inline], (String, String, Maybe String))] + , stHasMath :: Bool + , stHasRawTeX :: Bool + , stOptions :: WriterOptions + , stTopLevel :: Bool } -- | Convert Pandoc to RST. @@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, - stOptions = opts } + stHasRawTeX = False, stOptions = opts, + stTopLevel = True} in evalState (pandocToRST document) st -- | Return RST representation of document. @@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToRST) (fmap (trimr . render colwidth) . inlineListToRST) $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST blocks + body <- blockListToRST' True $ normalizeHeadings 1 blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get + rawTeX <- liftM stHasRawTeX get let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (writerTOCDepth opts) + $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath + $ defField "rawtex" rawTeX $ metadata if writerStandalone opts then return $ renderTemplate' (writerTemplate opts) context else return main + where + normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' + where (cont,bs') = break (headerLtEq l) bs + headerLtEq level (Header l' _ _) = l' <= level + headerLtEq _ _ = False + normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs + normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. refsToRST :: Refs -> State WriterState Doc @@ -105,7 +117,7 @@ keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` (render Nothing label') + let label'' = if ':' `elem` ((render Nothing label') :: String) then char '`' <> label' <> char '`' else label' return $ nowrap $ ".. _" <> label'' <> ": " <> text src @@ -173,7 +185,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do capt <- inlineListToRST txt let fig = "figure:: " <> text src let alt = ":alt: " <> if null tit then capt else text tit - return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline + return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines @@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str) (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline -blockToRST (Header level _ inlines) = do +blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + isTopLevel <- gets stTopLevel + if isTopLevel + then do + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate (offset contents) headerChar + return $ nowrap $ contents $$ border $$ blankline + else do + let rub = "rubric:: " <> contents + let name' | null name = empty + | otherwise = ":name: " <> text name + let cls | null classes = empty + | otherwise = ":class: " <> text (unwords classes) + return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts @@ -239,8 +261,7 @@ blockToRST (Table caption _ widths headers rows) = do middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToRST row - return $ makeRow cols) rows + let rows' = map makeRow rawRows let border ch = char '+' <> char ch <> (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> @@ -253,7 +274,7 @@ blockToRST (Table caption _ widths headers rows) = do blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." @@ -265,11 +286,11 @@ blockToRST (OrderedList (start, style', delim) items) = do contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ zip markers' items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do contents <- mapM definitionListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. bulletListItemToRST :: [Block] -> State WriterState Doc @@ -295,9 +316,19 @@ definitionListItemToRST (label, defs) = do return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. +blockListToRST' :: Bool + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST' topLevel blocks = do + tl <- gets stTopLevel + modify (\s->s{stTopLevel=topLevel}) + res <- vcat `fmap` mapM blockToRST blocks + modify (\s->s{stTopLevel=tl}) + return res + blockListToRST :: [Block] -- ^ List of block elements -> State WriterState Doc -blockListToRST blocks = mapM blockToRST blocks >>= return . vcat +blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc @@ -334,12 +365,12 @@ inlineListToRST lst = okAfterComplex :: Inline -> Bool okAfterComplex Space = True okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—" + okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—" + okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -393,6 +424,9 @@ inlineToRST (Math t str) = do else blankline $$ (".. math:: " <> text str) $$ blankline inlineToRST (RawInline f x) | f == "rst" = return $ text x + | f == "latex" || f == "tex" = do + modify $ \st -> st{ stHasRawTeX = True } + return $ ":raw-latex:`" <> text x <> "`" | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space @@ -427,7 +461,7 @@ inlineToRST (Image alternate (source, tit)) = do return $ "|" <> label <> "|" inlineToRST (Note contents) = do -- add to notes in state - notes <- get >>= return . stNotes + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } let ref = show $ (length notes) + 1 return $ " [" <> text ref <> "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 43405ce3c..9eb02ad02 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -56,9 +56,12 @@ rtfEmbedImage opts x@(Image _ (src,_)) = do "image/jpeg" -> "\\jpegblip" "image/png" -> "\\pngblip" _ -> error "Unknown file type" - let sizeSpec = case imageSize imgdata of - Nothing -> "" - Just sz -> "\\picw" ++ show xpx ++ + sizeSpec <- case imageSize imgdata of + Left msg -> do + warn $ "Could not determine image size in `" ++ + src ++ "': " ++ msg + return "" + Right sz -> return $ "\\picw" ++ show xpx ++ "\\pich" ++ show ypx ++ "\\picwgoal" ++ show (xpt * 20) ++ "\\pichgoal" ++ show (ypt * 20) @@ -106,7 +109,9 @@ writeRTF options (Pandoc meta@(Meta metamap) blocks) = $ metadata in if writerStandalone options then renderTemplate' (writerTemplate options) context - else body + else case reverse body of + ('\n':_) -> body + _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. tableOfContents :: [Block] -> String diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 800e741a4..d94dbac46 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2014 John MacFarlane + Copyright : Copyright (C) 2013-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 8ac717bab..2325d1425 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2014 John MacFarlane and Peter Wang +Copyright (C) 2008-2015 John MacFarlane and Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2014 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -120,7 +120,7 @@ inCmd cmd contents = char '@' <> text cmd <> braces contents -- | Convert Pandoc block element to Texinfo. blockToTexinfo :: Block -- ^ Block to convert - -> State WriterState Doc + -> State WriterState Doc blockToTexinfo Null = return empty @@ -195,9 +195,9 @@ blockToTexinfo HorizontalRule = -- XXX can't get the equivalent from LaTeX.hs to work return $ text "@iftex" $$ text "@bigskip@hrule@bigskip" $$ - text "@end iftex" $$ + text "@end iftex" $$ text "@ifnottex" $$ - text (take 72 $ repeat '-') $$ + text (take 72 $ repeat '-') $$ text "@end ifnottex" blockToTexinfo (Header 0 _ lst) = do @@ -368,7 +368,7 @@ inlineListForNode = return . text . stringToTexinfo . -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool -disallowedInNode c = c `elem` ".,:()" +disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo inlineToTexinfo :: Inline -- ^ Inline to convert @@ -421,8 +421,8 @@ inlineToTexinfo (RawInline f str) return $ text "@tex" $$ text str $$ text "@end tex" | f == "texinfo" = return $ text str | otherwise = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" -inlineToTexinfo Space = return $ char ' ' +inlineToTexinfo (LineBreak) = return $ text "@*" <> cr +inlineToTexinfo Space = return space inlineToTexinfo (Link txt (src@('#':_), _)) = do contents <- escapeCommas $ inlineListToTexinfo txt diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 05eb50349..126c1e62e 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -85,6 +85,8 @@ escapeCharForTextile x = case x of '*' -> "*" '_' -> "_" '@' -> "@" + '+' -> "+" + '-' -> "-" '|' -> "|" '\x2014' -> " -- " '\x2013' -> " - " diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 8000368aa..caa13f177 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> |
