diff options
Diffstat (limited to 'src/Text/Pandoc')
24 files changed, 1026 insertions, 1011 deletions
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 8a5ccec5c..66490d5c6 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.SelfContained + Module : Text.Pandoc.Asciify Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Compat/Except.hs b/src/Text/Pandoc/Compat/Except.hs new file mode 100644 index 000000000..7f5648e7a --- /dev/null +++ b/src/Text/Pandoc/Compat/Except.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.Except ( ExceptT + , Error(..) + , runExceptT + , throwError + , catchError ) + where + +#if MIN_VERSION_mtl(2,2,1) +import Control.Monad.Except + +class Error a where + noMsg :: a + strMsg :: String -> a + + noMsg = strMsg "" + strMsg _ = noMsg + +#else +import Control.Monad.Error +type ExceptT = ErrorT + +runExceptT :: ExceptT e m a -> m (Either e a) +runExceptT = runErrorT +#endif + + diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index b7a3a4b7b..8580a6914 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -163,7 +163,6 @@ githubMarkdownExtensions = Set.fromList , Ext_raw_html , Ext_tex_math_single_backslash , Ext_fenced_code_blocks - , Ext_fenced_code_attributes , Ext_auto_identifiers , Ext_ascii_identifiers , Ext_backtick_code_blocks diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 391131338..f77ce60d8 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,5 +1,8 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, - FlexibleInstances#-} +{-# LANGUAGE + FlexibleContexts +, GeneralizedNewtypeDeriving +, TypeSynonymInstances +, FlexibleInstances #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -29,8 +32,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( (>>~), - anyLine, +module Text.Pandoc.Parsing ( anyLine, many1Till, notFollowedBy', oneOfStrings, @@ -98,6 +100,7 @@ module Text.Pandoc.Parsing ( (>>~), macro, applyMacros', Parser, + ParserT, F(..), runF, askF, @@ -166,7 +169,7 @@ import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec import Text.Parsec.Pos (newPos) -import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit, +import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace ) import Data.List ( intercalate, transpose ) import Text.Pandoc.Shared @@ -177,12 +180,15 @@ import Text.Pandoc.Asciify (toAsciiChar) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader -import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative) +import Control.Monad.Identity +import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) import Data.Monoid import Data.Maybe (catMaybes) type Parser t s = Parsec t s +type ParserT = ParsecT + newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) runF :: F a -> ParserState -> a @@ -199,13 +205,8 @@ instance Monoid a => Monoid (F a) where mappend = liftM2 mappend mconcat = liftM mconcat . sequence --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - -- | Parse any line of text -anyLine :: Parser [Char] st [Char] +anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do -- This is much faster than: -- manyTill anyChar newline @@ -221,9 +222,10 @@ anyLine = do _ -> mzero -- | Like @manyTill@, but reads at least one item. -many1Till :: Parser [tok] st a - -> Parser [tok] st end - -> Parser [tok] st [a] +many1Till :: Stream s m t + => ParserT s st m a + -> ParserT s st m end + -> ParserT s st m [a] many1Till p end = do first <- p rest <- manyTill p end @@ -232,21 +234,21 @@ many1Till p end = do -- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st () +notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) -oneOfStrings' :: (Char -> Char -> Bool) -> [String] -> Parser [Char] st String +oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String oneOfStrings' _ [] = fail "no strings" oneOfStrings' matches strs = try $ do c <- anyChar let strs' = [xs | (x:xs) <- strs, x `matches` c] case strs' of [] -> fail "not found" - _ -> (c:) `fmap` oneOfStrings' matches strs' + _ -> (c:) <$> oneOfStrings' matches strs' <|> if "" `elem` strs' then return [c] else fail "not found" @@ -254,11 +256,11 @@ oneOfStrings' matches strs = try $ do -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. -oneOfStrings :: [String] -> Parser [Char] st String +oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. -oneOfStringsCI :: [String] -> Parser [Char] st String +oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -269,35 +271,35 @@ oneOfStringsCI = oneOfStrings' ciMatch | otherwise = toLower c -- | Parses a space or tab. -spaceChar :: Parser [Char] st Char +spaceChar :: Stream s m Char => ParserT s st m Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: Parser [Char] st Char +nonspaceChar :: Stream s m Char => ParserT s st m Char nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. -skipSpaces :: Parser [Char] st () +skipSpaces :: Stream s m Char => ParserT s st m () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: Parser [Char] st Char +blankline :: Stream s m Char => ParserT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Parser [Char] st [Char] +blanklines :: Stream s m Char => ParserT s st m [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: Parser [Char] st t -- ^ start parser - -> Parser [Char] st end -- ^ end parser - -> Parser [Char] st a -- ^ content parser (to be used repeatedly) - -> Parser [Char] st [a] +enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser + -> ParserT s st m end -- ^ end parser + -> ParserT s st m a -- ^ content parser (to be used repeatedly) + -> ParserT s st m [a] enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: [Char] -> Parser [Char] st String +stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String stringAnyCase [] = string "" stringAnyCase (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -305,7 +307,7 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a +parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -316,7 +318,7 @@ parseFromString parser str = do return result -- | Parse raw line block up to and including blank lines. -lineClump :: Parser [Char] st String +lineClump :: Stream [Char] m Char => ParserT [Char] st m String lineClump = blanklines <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) @@ -325,8 +327,8 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Char -> Char -> Parser [Char] st Char - -> Parser [Char] st String +charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char + -> ParserT s st m String charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close @@ -350,8 +352,8 @@ uppercaseRomanDigits :: [Char] uppercaseRomanDigits = map toUpper lowercaseRomanDigits -- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -- ^ Uppercase if true - -> Parser [Char] st Int +romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true + -> ParserT s st m Int romanNumeral upperCase = do let romanDigits = if upperCase then uppercaseRomanDigits @@ -383,12 +385,12 @@ romanNumeral upperCase = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: Parser [Char] st (String, String) -emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain) +emailAddress :: Stream s m Char => ParserT s st m (String, String) +emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom in (full, escapeURI $ "mailto:" ++ full) - mailbox = intercalate "." `fmap` (emailWord `sepby1` dot) - domain = intercalate "." `fmap` (subdomain `sepby1` dot) + mailbox = intercalate "." <$> (emailWord `sepby1` dot) + domain = intercalate "." <$> (subdomain `sepby1` dot) dot = char '.' subdomain = many1 $ alphaNum <|> innerPunct innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') <* @@ -398,7 +400,7 @@ emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain) isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;" -- note: sepBy1 from parsec consumes input when sep -- succeeds and p fails, so we use this variant here. - sepby1 p sep = liftA2 (:) p (many (try $ sep >> p)) + sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p)) -- Schemes from http://www.iana.org/assignments/uri-schemes.html plus @@ -426,11 +428,11 @@ schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", "ymsgr"] -uriScheme :: Parser [Char] st String +uriScheme :: Stream s m Char => ParserT s st m String uriScheme = oneOfStringsCI schemes -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Parser [Char] st (String, String) +uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' @@ -451,7 +453,7 @@ uri = try $ do <|> entity <|> (try $ punct >> lookAhead (void (satisfy isWordChar) <|> percentEscaped)) - str <- snd `fmap` withRaw (skipMany1 ( () <$ + str <- snd <$> withRaw (skipMany1 ( () <$ (enclosed (char '(') (char ')') uriChunk <|> enclosed (char '{') (char '}') uriChunk <|> enclosed (char '[') (char ']') uriChunk) @@ -460,7 +462,7 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') -mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String mathInlineWith op cl = try $ do string op notFollowedBy space @@ -474,12 +476,12 @@ mathInlineWith op cl = try $ do notFollowedBy digit -- to prevent capture of $5 return $ concat words' -mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String mathDisplayWith op cl = try $ do string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl) -mathDisplay :: Parser [Char] ParserState String +mathDisplay :: Stream s m Char => ParserT s ParserState m String mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -487,7 +489,7 @@ mathDisplay = <|> (guardEnabled Ext_tex_math_double_backslash >> mathDisplayWith "\\\\[" "\\\\]") -mathInline :: Parser [Char] ParserState String +mathInline :: Stream s m Char => ParserT s ParserState m String mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -499,8 +501,9 @@ mathInline = -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply - -> Parser [Char] st (a, Int) -- ^ (result, displacement) +withHorizDisplacement :: Stream s m Char + => ParserT s st m a -- ^ Parser to apply + -> ParserT s st m (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -509,7 +512,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) +withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -525,12 +528,13 @@ withRaw parser = do return (result, raw) -- | Parses backslash, then applies character parser. -escaped :: Parser [Char] st Char -- ^ Parser for character to escape - -> Parser [Char] st Char +escaped :: Stream s m Char + => ParserT s st m Char -- ^ Parser for character to escape + -> ParserT s st m Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: Parser [Char] st Char +characterReference :: Stream s m Char => ParserT s st m Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -539,19 +543,19 @@ characterReference = try $ do Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: Parser [Char] st (ListNumberStyle, Int) +upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: Parser [Char] st (ListNumberStyle, Int) +lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: Parser [Char] st (ListNumberStyle, Int) +decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, read num) @@ -560,7 +564,8 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int) +exampleNum :: Stream s m Char + => ParserT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) @@ -574,38 +579,39 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: Parser [Char] st (ListNumberStyle, Int) +defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: Parser [Char] st (ListNumberStyle, Int) +lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) lowerAlpha = do ch <- oneOf ['a'..'z'] return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: Parser [Char] st (ListNumberStyle, Int) +upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) upperAlpha = do ch <- oneOf ['A'..'Z'] return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: Parser [Char] st (ListNumberStyle, Int) +romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: Parser [Char] ParserState ListAttributes +anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: Parser [Char] st (ListNumberStyle, Int) - -> Parser [Char] st ListAttributes +inPeriod :: Stream s m Char + => ParserT s st m (ListNumberStyle, Int) + -> ParserT s st m ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -615,16 +621,18 @@ inPeriod num = try $ do return (start, style, delim) -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: Parser [Char] st (ListNumberStyle, Int) - -> Parser [Char] st ListAttributes +inOneParen :: Stream s m Char + => ParserT s st m (ListNumberStyle, Int) + -> ParserT s st m ListAttributes inOneParen num = try $ do (style, start) <- num char ')' return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: Parser [Char] st (ListNumberStyle, Int) - -> Parser [Char] st ListAttributes +inTwoParens :: Stream s m Char + => ParserT s st m (ListNumberStyle, Int) + -> ParserT s st m ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -633,9 +641,10 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: ListNumberStyle +orderedListMarker :: Stream s m Char + => ListNumberStyle -> ListNumberDelim - -> Parser [Char] ParserState Int + -> ParserT s ParserState m Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of @@ -655,12 +664,12 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: Parser [Char] st Inline +charRef :: Stream s m Char => ParserT s st m Inline charRef = do c <- characterReference return $ Str [c] -lineBlockLine :: Parser [Char] st String +lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String lineBlockLine = try $ do char '|' char ' ' @@ -671,7 +680,7 @@ lineBlockLine = try $ do return $ white ++ unwords (line : continuations) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Parser [Char] st [String] +lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String] lineBlockLines = try $ do lines' <- many1 lineBlockLine skipMany1 $ blankline <|> try (char '|' >> blankline) @@ -679,11 +688,12 @@ lineBlockLines = try $ do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int]) - -> ([Int] -> Parser [Char] ParserState [[Block]]) - -> Parser [Char] ParserState sep - -> Parser [Char] ParserState end - -> Parser [Char] ParserState Block +tableWith :: Stream s m Char + => ParserT s ParserState m ([[Block]], [Alignment], [Int]) + -> ([Int] -> ParserT s ParserState m [[Block]]) + -> ParserT s ParserState m sep + -> ParserT s ParserState m end + -> ParserT s ParserState m Block tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- rowParser indices `sepEndBy1` lineParser @@ -725,9 +735,10 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser +gridTableWith :: Stream [Char] m Char + => ParserT [Char] ParserState m [Block] -- ^ Block list parser -> Bool -- ^ Headerless table - -> Parser [Char] ParserState Block + -> ParserT [Char] ParserState m Block gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter @@ -736,27 +747,28 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline +gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> Parser [Char] ParserState Char +gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: Bool -- ^ Headerless table - -> Parser [Char] ParserState [Block] - -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader :: Stream [Char] m Char + => Bool -- ^ Headerless table + -> ParserT [Char] ParserState m [Block] + -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int]) gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -779,16 +791,17 @@ gridTableHeader headless blocks = try $ do heads <- mapM (parseFromString blocks) $ map trim rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] +gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: Parser [Char] ParserState [Block] +gridTableRow :: Stream [Char] m Char + => ParserT [Char] ParserState m [Block] -> [Int] - -> Parser [Char] ParserState [[Block]] + -> ParserT [Char] ParserState m [[Block]] gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ @@ -807,15 +820,16 @@ compactifyCell :: [Block] -> [Block] compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. -gridTableFooter :: Parser [Char] ParserState [Char] +gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char] gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: Parser [Char] st a -- ^ parser +readWith :: (Show s, Stream s Identity Char) + => ParserT s st Identity a -- ^ parser -> st -- ^ initial state - -> [Char] -- ^ input + -> s -- ^ input -> a readWith parser state input = case runParser parser state "source" input of @@ -823,15 +837,16 @@ readWith parser state input = let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) + theline = (lines (show input) ++ [""]) !! (errLine - 1) in error $ "\nError at " ++ show err' ++ "\n" ++ theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ "^" Right result -> result -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => Parser [Char] ParserState a - -> String +testStringWith :: (Show s, Show a, Stream s Identity Char) + => ParserT s ParserState Identity a + -> s -> IO () testStringWith parser str = UTF8.putStrLn $ show $ readWith parser defaultParserState str @@ -878,9 +893,9 @@ instance HasMeta ParserState where class HasReaderOptions st where extractReaderOptions :: st -> ReaderOptions - getOption :: (ReaderOptions -> b) -> Parser s st b + getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b -- default - getOption f = (f . extractReaderOptions) `fmap` getState + getOption f = (f . extractReaderOptions) <$> getState instance HasReaderOptions ParserState where extractReaderOptions = stateOptions @@ -946,19 +961,19 @@ defaultParserState = stateWarnings = []} -- | Succeed only if the extension is enabled. -guardEnabled :: HasReaderOptions st => Extension -> Parser s st () +guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext -- | Succeed only if the extension is disabled. -guardDisabled :: HasReaderOptions st => Extension -> Parser s st () +guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext -- | Update the position on which the last string ended. -updateLastStrPos :: HasLastStrPosition st => Parser s st () +updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m () updateLastStrPos = getPosition >>= updateState . setLastStrPos -- | Whether we are right after the end of a string. -notAfterString :: HasLastStrPosition st => Parser s st Bool +notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool notAfterString = do pos <- getPosition st <- getState @@ -998,10 +1013,10 @@ type SubstTable = M.Map Key Inlines -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers -- in state. -registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) - => Attr -> Inlines -> Parser s st Attr +registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) + => Attr -> Inlines -> ParserT s st m Attr registerHeader (ident,classes,kvs) header' = do - ids <- extractIdentifierList `fmap` getState + ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts @@ -1020,25 +1035,28 @@ registerHeader (ident,classes,kvs) header' = do return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: HasReaderOptions st => Parser s st () +failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m () failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: Parser [Char] ParserState Inlines - -> Parser [Char] ParserState Inlines +smartPunctuation :: Stream s m Char + => ParserT s ParserState m Inlines + -> ParserT s ParserState m Inlines smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: Parser [Char] ParserState Inlines +apostrophe :: Stream s m Char => ParserT s st m Inlines apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") -quoted :: Parser [Char] ParserState Inlines - -> Parser [Char] ParserState Inlines +quoted :: Stream s m Char + => ParserT s ParserState m Inlines + -> ParserT s ParserState m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser -withQuoteContext :: QuoteContext - -> Parser [tok] ParserState a - -> Parser [tok] ParserState a +withQuoteContext :: Stream s m t + => QuoteContext + -> ParserT s ParserState m a + -> ParserT s ParserState m a withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -1048,108 +1066,95 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: Parser [Char] ParserState Inlines - -> Parser [Char] ParserState Inlines +singleQuoted :: Stream s m Char + => ParserT s ParserState m Inlines + -> ParserT s ParserState m Inlines singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . B.singleQuoted . mconcat -doubleQuoted :: Parser [Char] ParserState Inlines - -> Parser [Char] ParserState Inlines +doubleQuoted :: Stream s m Char + => ParserT s ParserState m Inlines + -> ParserT s ParserState m Inlines doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>= return . B.doubleQuoted . mconcat -failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () +failIfInQuoteContext :: Stream s m t + => QuoteContext + -> ParserT s ParserState m () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () -charOrRef :: [Char] -> Parser [Char] st Char +charOrRef :: Stream s m Char => String -> ParserT s st m Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -singleQuoteStart :: Parser [Char] ParserState () +singleQuoteStart :: Stream s m Char + => ParserT s ParserState m () singleQuoteStart = do failIfInQuoteContext InSingleQuote -- single quote start can't be right after str guard =<< notAfterString () <$ charOrRef "'\8216\145" -singleQuoteEnd :: Parser [Char] st () +singleQuoteEnd :: Stream s m Char + => ParserT s st m () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: Parser [Char] ParserState () +doubleQuoteStart :: Stream s m Char + => ParserT s ParserState m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] -doubleQuoteEnd :: Parser [Char] st () -doubleQuoteEnd = do - charOrRef "\"\8221\148" - return () +doubleQuoteEnd :: Stream s m Char + => ParserT s st m () +doubleQuoteEnd = void (charOrRef "\"\8221\148") -ellipses :: Parser [Char] st Inlines -ellipses = do - try (charOrRef "\8230\133") <|> try (string "..." >> return '…') - return (B.str "\8230") +ellipses :: Stream s m Char + => ParserT s st m Inlines +ellipses = try (string "..." >> return (B.str "\8230")) -dash :: Parser [Char] ParserState Inlines -dash = do +dash :: (HasReaderOptions st, Stream s m Char) + => ParserT s st m Inlines +dash = try $ do oldDashes <- getOption readerOldDashes if oldDashes - then emDashOld <|> enDashOld - else B.str `fmap` (hyphenDash <|> emDash <|> enDash) - --- Two hyphens = en-dash, three = em-dash -hyphenDash :: Parser [Char] st String -hyphenDash = do - try $ string "--" - option "\8211" (char '-' >> return "\8212") - -emDash :: Parser [Char] st String -emDash = do - try (charOrRef "\8212\151") - return "\8212" - -enDash :: Parser [Char] st String -enDash = do - try (charOrRef "\8212\151") - return "\8211" - -enDashOld :: Parser [Char] st Inlines -enDashOld = do - try (charOrRef "\8211\150") <|> - try (char '-' >> lookAhead (satisfy isDigit) >> return '–') - return (B.str "\8211") - -emDashOld :: Parser [Char] st Inlines -emDashOld = do - try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') - return (B.str "\8212") + then do + char '-' + (char '-' >> return (B.str "\8212")) + <|> (lookAhead digit >> return (B.str "\8211")) + else do + string "--" + (char '-' >> return (B.str "\8212")) + <|> return (B.str "\8211") -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: Parser s ParserState a - -> Parser s ParserState a +nested :: Stream s m a + => ParserT s ParserState m a + -> ParserT s ParserState m a nested p = do - nestlevel <- stateMaxNestingLevel `fmap` getState + nestlevel <- stateMaxNestingLevel <$> getState guard $ nestlevel > 0 updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } res <- p updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String) +citeKey :: (Stream s m Char, HasLastStrPosition st) + => ParserT s st m (Bool, String) citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) @@ -1166,7 +1171,8 @@ citeKey = try $ do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: (HasMacros st, HasReaderOptions st) => Parser [Char] st Blocks +macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) + => ParserT [Char] st m Blocks macro = do apply <- getOption readerApplyMacros inp <- getInput @@ -1181,10 +1187,12 @@ macro = do else return $ rawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: String -> Parser [Char] ParserState String +applyMacros' :: Stream [Char] m Char + => String + -> ParserT [Char] ParserState m String applyMacros' target = do apply <- getOption readerApplyMacros if apply - then do macros <- extractMacros `fmap` getState + then do macros <- extractMacros <$> getState return $ applyMacros macros target else return target diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9f73f2e7f..882e8d7d8 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -91,7 +91,6 @@ import Data.List (delete, isPrefixOf, (\\), intercalate) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) -import System.FilePath (combine) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State @@ -102,8 +101,8 @@ readDocx :: ReaderOptions -> Pandoc readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Just docx -> Pandoc nullMeta (docxToBlocks opts docx) - Nothing -> error $ "couldn't parse docx file" + Right docx -> Pandoc nullMeta (docxToBlocks opts docx) + Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String , docxInTexSubscript :: Bool } @@ -151,7 +150,7 @@ runStyleToContainers rPr = classContainers = case rStyle rPr of Nothing -> [] Just s -> spanClassToContainers s - + formatters = map Container $ mapMaybe id [ if isBold rPr then (Just Strong) else Nothing , if isItalic rPr then (Just Emph) else Nothing @@ -159,7 +158,7 @@ runStyleToContainers rPr = , if isStrike rPr then (Just Strikeout) else Nothing , if isSuperScript rPr then (Just Superscript) else Nothing , if isSubScript rPr then (Just Subscript) else Nothing - , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) ] in classContainers ++ formatters @@ -189,7 +188,7 @@ parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs = parStyleToContainers pPr | (_:cs) <- pStyle pPr = let pPr' = pPr { pStyle = cs} in - parStyleToContainers pPr' + parStyleToContainers pPr' parStyleToContainers pPr | null (pStyle pPr), Just left <- indentation pPr >>= leftParIndent, Just hang <- indentation pPr >>= hangingParIndent = @@ -206,7 +205,7 @@ parStyleToContainers pPr | null (pStyle pPr), True -> (Container BlockQuote) : (parStyleToContainers pPr') False -> parStyleToContainers pPr' parStyleToContainers _ = [] - + strToInlines :: String -> [Inline] strToInlines = toList . text @@ -259,20 +258,17 @@ runToInlines (Run rs runElems) | otherwise = return $ rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) -runToInlines (Footnote fnId) = do - (Docx _ notes _ _ _ ) <- asks docxDocument - case (getFootNote fnId notes) of - Just bodyParts -> do - blks <- concatMapM bodyPartToBlocks bodyParts - return $ [Note blks] - Nothing -> return [Note []] -runToInlines (Endnote fnId) = do - (Docx _ notes _ _ _ ) <- asks docxDocument - case (getEndNote fnId notes) of - Just bodyParts -> do - blks <- concatMapM bodyPartToBlocks bodyParts - return $ [Note blks] - Nothing -> return [Note []] +runToInlines (Footnote bps) = + concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) +runToInlines (Endnote bps) = + concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) + +makeDataUrl :: String -> B.ByteString -> Maybe String +makeDataUrl fp bs = + case getMimeType fp of + Just mime -> Just $ "data:" ++ mime ++ ";base64," ++ + toString (encode $ BS.concat $ B.toChunks bs) + Nothing -> Nothing parPartToInlines :: ParPart -> DocxContext [Inline] parPartToInlines (PlainRun r) = runToInlines r @@ -313,22 +309,18 @@ parPartToInlines (BookMark _ anchor) = False -> anchor updateDState $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (anchor, ["anchor"], []) []] -parPartToInlines (Drawing relid) = do - (Docx _ _ _ rels _) <- asks docxDocument - return $ case lookupRelationship relid rels of - Just target -> [Image [] (combine "word" target, "")] - Nothing -> [Image [] ("", "")] +parPartToInlines (Drawing fp bs) = do + return $ case True of -- TODO: add self-contained images + True -> [Image [] (fp, "")] + False -> case makeDataUrl fp bs of + Just d -> [Image [] (d, "")] + Nothing -> [Image [] ("", "")] parPartToInlines (InternalHyperLink anchor runs) = do ils <- concatMapM runToInlines runs return [Link ils ('#' : anchor, "")] -parPartToInlines (ExternalHyperLink relid runs) = do - (Docx _ _ _ rels _) <- asks docxDocument - rs <- concatMapM runToInlines runs - return $ case lookupRelationship relid rels of - Just target -> - [Link rs (target, "")] - Nothing -> - [Link rs ("", "")] +parPartToInlines (ExternalHyperLink target runs) = do + ils <- concatMapM runToInlines runs + return [Link ils (target, "")] parPartToInlines (PlainOMath omath) = do s <- oMathToTexString omath return [Math InlineMath s] @@ -351,7 +343,7 @@ oMathElemToTexString (Bar style base) = do Top -> printf "\\overline{%s}" baseString Bottom -> printf "\\underline{%s}" baseString oMathElemToTexString (Box base) = baseToTexString base -oMathElemToTexString (BorderBox base) = +oMathElemToTexString (BorderBox base) = baseToTexString base >>= (\s -> return $ printf "\\boxed{%s}" s) oMathElemToTexString (Delimiter dPr bases) = do let beg = fromMaybe '(' (delimBegChar dPr) @@ -450,6 +442,9 @@ oMathElemToTexString (NAry _ sub sup base) = do baseString <- baseToTexString base return $ printf "\\int_{%s}^{%s}{%s}" subString supString baseString +oMathElemToTexString (Phantom base) = do + baseString <- baseToTexString base + return $ printf "\\phantom{%s}" baseString oMathElemToTexString (Radical degree base) = do degString <- concatMapM oMathElemToTexString degree baseString <- baseToTexString base @@ -475,12 +470,11 @@ oMathElemToTexString (Super base sup) = do supString <- concatMapM oMathElemToTexString sup return $ printf "%s^{%s}" baseString supString oMathElemToTexString (OMathRun _ run) = return $ stringToTex $ runToString run -oMathElemToTexString _ = return "[NOT IMPLEMENTED]" baseToTexString :: Base -> DocxContext String baseToTexString (Base mathElems) = concatMapM oMathElemToTexString mathElems - + isAnchorSpan :: Inline -> Bool isAnchorSpan (Span (ident, classes, kvs) ils) = @@ -518,9 +512,7 @@ makeHeaderAnchor blk = return blk parPartsToInlines :: [ParPart] -> DocxContext [Inline] parPartsToInlines parparts = do - ils <- concatMapM parPartToInlines parparts >>= - -- TODO: Option for self-containted images - (if False then (walkM makeImagesSelfContained) else return) + ils <- concatMapM parPartToInlines parparts return $ reduceList $ ils cellToBlocks :: Cell -> DocxContext [Block] @@ -543,7 +535,7 @@ bodyPartToBlocks (Paragraph pPr parparts) let otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr) in - return $ + return $ rebuild otherConts [CodeBlock ("", [], []) (concatMap parPartToString parparts)] @@ -563,23 +555,21 @@ bodyPartToBlocks (Paragraph pPr parparts) = do rebuild (parStyleToContainers pPr) [Para ils] -bodyPartToBlocks (ListItem pPr numId lvl parparts) = do - (Docx _ _ numbering _ _) <- asks docxDocument +bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do let - kvs = case lookupLevel numId lvl numbering of - Just (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - Just (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] - Nothing -> [] + kvs = case levelInfo of + (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ [Div ("", ["list-item"], kvs) blks] bodyPartToBlocks (Tbl _ _ _ []) = @@ -592,7 +582,7 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do hdrCells <- case hdr of Just r' -> rowToBlocksList r' Nothing -> return [] - + cells <- mapM rowToBlocksList rows let size = case null hdrCells of @@ -622,20 +612,6 @@ rewriteLink l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink il = return il -makeImagesSelfContained :: Inline -> DocxContext Inline -makeImagesSelfContained i@(Image alt (uri, title)) = do - (Docx _ _ _ _ media) <- asks docxDocument - return $ case lookup uri media of - Just bs -> - case getMimeType uri of - Just mime -> - let data_uri = "data:" ++ mime ++ ";base64," ++ - toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i - Nothing -> i -makeImagesSelfContained inline = return inline bodyToBlocks :: Body -> DocxContext [Block] bodyToBlocks (Body bps) = do @@ -646,7 +622,7 @@ bodyToBlocks (Body bps) = do blocksToBullets $ blks docxToBlocks :: ReaderOptions -> Docx -> [Block] -docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = +docxToBlocks opts d@(Docx (Document _ body)) = let dState = DState { docxAnchorMap = M.empty , docxInTexSubscript = False} dEnv = DEnv { docxOptions = opts diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 1e37d0076..ea195c14a 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -121,7 +121,7 @@ handleListParagraphs ( in handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) - + separateBlocks' :: Block -> [[Block]] -> [[Block]] separateBlocks' blk ([] : []) = [[blk]] separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] @@ -139,7 +139,7 @@ flatToBullets' :: Integer -> [Block] -> [Block] flatToBullets' _ [] = [] flatToBullets' num xs@(b : elems) | getLevelN b == num = b : (flatToBullets' num elems) - | otherwise = + | otherwise = let bNumId = getNumIdN b bLevel = getLevelN b (children, remaining) = @@ -162,7 +162,7 @@ flatToBullets elems = flatToBullets' (-1) elems blocksToBullets :: [Block] -> [Block] blocksToBullets blks = - bottomUp removeListDivs $ + bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) plainParaInlines :: Block -> [Inline] @@ -216,12 +216,12 @@ removeListDivs' blk = [blk] removeListDivs :: [Block] -> [Block] removeListDivs = concatMap removeListDivs' - + blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] - - - + + + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 44585b016..8541a1a3a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -10,59 +10,55 @@ the Free Software Foundation; either version 2 of the License, or This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above + Module : Text.Pandoc.Readers.Docx.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable Conversion of docx archive into Docx haskell type -} +module Text.Pandoc.Readers.Docx.Parse ( Docx(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , OMath(..) + , OMathElem(..) + , Base(..) + , TopBottom(..) + , AccentStyle(..) + , BarStyle(..) + , NAryStyle(..) + , DelimStyle(..) + , GroupStyle(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParIndentation(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , archiveToDocx + ) where -module Text.Pandoc.Readers.Docx.Parse ( Docx(..) - , Document(..) - , Body(..) - , BodyPart(..) - , TblLook(..) - , ParPart(..) - , OMath(..) - , OMathElem(..) - , Base(..) - , TopBottom(..) - , AccentStyle(..) - , BarStyle(..) - , NAryStyle(..) - , DelimStyle(..) - , GroupStyle(..) - , Run(..) - , RunElem(..) - , Notes - , Numbering - , Relationship - , Media - , RunStyle(..) - , ParIndentation(..) - , ParagraphStyle(..) - , Row(..) - , Cell(..) - , getFootNote - , getEndNote - , lookupLevel - , lookupRelationship - , archiveToDocx - ) where import Codec.Archive.Zip import Text.XML.Light import Data.Maybe @@ -71,56 +67,53 @@ import System.FilePath import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad.Reader +import qualified Data.Map as M +import Text.Pandoc.Compat.Except + +data ReaderEnv = ReaderEnv { envNotes :: Notes + , envNumbering :: Numbering + , envRelationships :: [Relationship] + , envMedia :: Media + } + deriving Show -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing +data DocxError = DocxError | WrongElem + deriving Show + +instance Error DocxError where + noMsg = WrongElem + +type D = ExceptT DocxError (Reader ReaderEnv) + +runD :: D a -> ReaderEnv -> Either DocxError a +runD dx re = runReader (runExceptT dx ) re + +maybeToD :: Maybe a -> D a +maybeToD (Just a) = return a +maybeToD Nothing = throwError DocxError + +mapD :: (a -> D b) -> [a] -> D [b] +mapD _ [] = return [] +mapD f (x:xs) = do + y <- (f x >>= (\z -> return [z])) `catchError` (\_ -> return []) + ys <- mapD f xs + return $ y ++ ys type NameSpaces = [(String, String)] -data Docx = Docx Document Notes Numbering [Relationship] Media +data Docx = Docx Document deriving Show -archiveToDocx :: Archive -> Maybe Docx -archiveToDocx archive = do - let notes = archiveToNotes archive - rels = archiveToRelationships archive - media = archiveToMedia archive - doc <- archiveToDocument archive - numbering <- archiveToNumbering archive - return $ Docx doc notes numbering rels media - -data Document = Document NameSpaces Body +data Document = Document NameSpaces Body deriving Show -archiveToDocument :: Archive -> Maybe Document -archiveToDocument zf = do - entry <- findEntryByPath "word/document.xml" zf - docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) - bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem - body <- elemToBody namespaces bodyElem - return $ Document namespaces body +data Body = Body [BodyPart] + deriving Show type Media = [(FilePath, B.ByteString)] -filePathIsMedia :: FilePath -> Bool -filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") - -getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) -getMediaPair zf fp = - case findEntryByPath fp zf of - Just e -> Just (fp, fromEntry e) - Nothing -> Nothing - -archiveToMedia :: Archive -> Media -archiveToMedia zf = - mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) - data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show @@ -133,240 +126,12 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -lookupLevel :: String -> String -> Numbering -> Maybe Level -lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do - absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs - lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl - -numElemToNum :: NameSpaces -> Element -> Maybe Numb -numElemToNum ns element | - qName (elName element) == "num" && - qURI (elName element) == (lookup "w" ns) = do - numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element - absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - return $ Numb numId absNumId -numElemToNum _ _ = Nothing - -absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb -absNumElemToAbsNum ns element | - qName (elName element) == "abstractNum" && - qURI (elName element) == (lookup "w" ns) = do - absNumId <- findAttr - (QName "abstractNumId" (lookup "w" ns) (Just "w")) - element - let levelElems = findChildren - (QName "lvl" (lookup "w" ns) (Just "w")) - element - levels = mapMaybe (levelElemToLevel ns) levelElems - return $ AbstractNumb absNumId levels -absNumElemToAbsNum _ _ = Nothing - -levelElemToLevel :: NameSpaces -> Element -> Maybe Level -levelElemToLevel ns element | - qName (elName element) == "lvl" && - qURI (elName element) == (lookup "w" ns) = do - ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element - fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) - return (ilvl, fmt, txt, start) -levelElemToLevel _ _ = Nothing - -archiveToNumbering :: Archive -> Maybe Numbering -archiveToNumbering zf = - case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] - Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) - numElems = findChildren - (QName "num" (lookup "w" namespaces) (Just "w")) - numberingElem - absNumElems = findChildren - (QName "abstractNum" (lookup "w" namespaces) (Just "w")) - numberingElem - nums = mapMaybe (numElemToNum namespaces) numElems - absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems - return $ Numbering namespaces nums absNums - -data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) - deriving Show - -noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) -noteElemToNote ns element - | qName (elName element) `elem` ["endnote", "footnote"] && - qURI (elName element) == (lookup "w" ns) = - do - noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = mapMaybe (elemToBodyPart ns) - $ elChildren element - return $ (noteId, bps) -noteElemToNote _ _ = Nothing - -getFootNote :: String -> Notes -> Maybe [BodyPart] -getFootNote s (Notes _ fns _) = fns >>= (lookup s) - -getEndNote :: String -> Notes -> Maybe [BodyPart] -getEndNote s (Notes _ _ ens) = ens >>= (lookup s) - -elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] -elemToNotes ns notetype element - | qName (elName element) == (notetype ++ "s") && - qURI (elName element) == (lookup "w" ns) = - Just $ mapMaybe (noteElemToNote ns) - $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element -elemToNotes _ _ _ = Nothing - -archiveToNotes :: Archive -> Notes -archiveToNotes zf = - let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") - in - Notes ns fn en - - data Relationship = Relationship (RelId, Target) deriving Show - -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) - -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") - -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = - do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - -archiveToRelationships :: Archive -> [Relationship] -archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels - -data Body = Body [BodyPart] - deriving Show - -elemToBody :: NameSpaces -> Element -> Maybe Body -elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = - Just $ Body - $ mapMaybe (elemToBodyPart ns) $ elChildren element -elemToBody _ _ = Nothing - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - do - pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element - numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr - lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - return (numId, lvl) -elemToNumInfo _ _ = Nothing - -elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart -elemToBodyPart ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) - , (c:_) <- findChildren (QName "oMathPara" (lookup "m" ns) (Just "m")) element = - let style = [] -- placeholder - maths = mapMaybe (elemToMath ns) - $ findChildren - (QName "oMath" (lookup "m" ns) (Just "m")) c - in - Just $ OMathPara style maths - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) - , Just (numId, lvl) <- elemToNumInfo ns element = - let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe (elemToParPart ns) - $ elChildren element - in - Just $ ListItem parstyle numId lvl parparts - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe (elemToParPart ns) - $ elChildren element - in - Just $ Paragraph parstyle parparts - | qName (elName element) == "tbl" && - qURI (elName element) == (lookup "w" ns) = - let - caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - grid = case - findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element - of - Just g -> elemToTblGrid ns g - Nothing -> [] - tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) - >>= elemToTblLook ns - in - Just $ Tbl - (fromMaybe "" caption) - grid - (fromMaybe defaultTblLook tblLook) - (mapMaybe (elemToRow ns) (elChildren element)) - | otherwise = Nothing - -elemToTblLook :: NameSpaces -> Element -> Maybe TblLook -elemToTblLook ns element - | qName (elName element) == "tblLook" && - qURI (elName element) == (lookup "w" ns) = - let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element - val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element - firstRowFmt = - case firstRow of - Just "1" -> True - Just _ -> False - Nothing -> case val of - Just bitMask -> testBitMask bitMask 0x020 - Nothing -> False - in - Just $ TblLook{firstRowFormatting = firstRowFmt} -elemToTblLook _ _ = Nothing - -testBitMask :: String -> Int -> Bool -testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of - [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) +data Notes = Notes NameSpaces + (Maybe (M.Map String Element)) + (Maybe (M.Map String Element)) + deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer , rightParIndent :: Maybe Integer @@ -383,40 +148,9 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing } -elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation -elemToParIndentation ns element - | qName (elName element) == "ind" && - qURI (elName element) == (lookup "w" ns) = - Just $ ParIndentation { - leftParIndent = - findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>= - stringToInteger - , rightParIndent = - findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>= - stringToInteger - , hangingParIndent = - findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>= - stringToInteger} -elemToParIndentation _ _ = Nothing - -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element = - case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of - Just pPr -> - ParagraphStyle - {pStyle = - mapMaybe - (findAttr (QName "val" (lookup "w" ns) (Just "w"))) - (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indentation = - findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - elemToParIndentation ns - } - Nothing -> defaultParagraphStyle - data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String [ParPart] + | ListItem ParagraphStyle String String Level [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara OMathParaStyle [OMath] deriving Show @@ -429,62 +163,22 @@ data TblLook = TblLook {firstRowFormatting::Bool} defaultTblLook :: TblLook defaultTblLook = TblLook{firstRowFormatting = False} -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - -elemToTblGrid :: NameSpaces -> Element -> TblGrid -elemToTblGrid ns element - | qName (elName element) == "tblGrid" && - qURI (elName element) == (lookup "w" ns) = - let - cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element - in - mapMaybe (\e -> - findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e - >>= stringToInteger - ) - cols -elemToTblGrid _ _ = [] - data Row = Row [Cell] deriving Show - -elemToRow :: NameSpaces -> Element -> Maybe Row -elemToRow ns element - | qName (elName element) == "tr" && - qURI (elName element) == (lookup "w" ns) = - let - cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element - in - Just $ Row (mapMaybe (elemToCell ns) cells) -elemToRow _ _ = Nothing - data Cell = Cell [BodyPart] deriving Show -elemToCell :: NameSpaces -> Element -> Maybe Cell -elemToCell ns element - | qName (elName element) == "tc" && - qURI (elName element) == (lookup "w" ns) = - Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) -elemToCell _ _ = Nothing - data ParPart = PlainRun Run | Insertion ChangeId Author ChangeDate [Run] | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] - | ExternalHyperLink RelId [Run] - | Drawing String + | ExternalHyperLink URL [Run] + | Drawing FilePath B.ByteString | PlainOMath OMath deriving Show -data Run = Run RunStyle [RunElem] - | Footnote String - | Endnote String - deriving Show - data OMath = OMath [OMathElem] deriving Show @@ -554,6 +248,12 @@ defaultGroupStyle = GroupStyle {groupChr = Nothing, groupPos = Nothing} type OMathRunStyle = [String] + +data Run = Run RunStyle [RunElem] + | Footnote [BodyPart] + | Endnote [BodyPart] + deriving Show + data RunElem = TextRun String | LnBrk | Tab deriving Show @@ -563,7 +263,7 @@ data RunStyle = RunStyle { isBold :: Bool , isStrike :: Bool , isSuperScript :: Bool , isSubScript :: Bool - , underline :: Maybe String + , rUnderline :: Maybe String , rStyle :: Maybe String } deriving Show @@ -574,104 +274,327 @@ defaultRunStyle = RunStyle { isBold = False , isStrike = False , isSuperScript = False , isSubScript = False - , underline = Nothing + , rUnderline = Nothing , rStyle = Nothing - } + } -elemToRunStyle :: NameSpaces -> Element -> RunStyle -elemToRunStyle ns element = - case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of - Just rPr -> - RunStyle - { - isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr - , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr - , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr - , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr - , isSuperScript = - (Just "superscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , isSubScript = - (Just "subscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , underline = - findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - , rStyle = - findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - } - Nothing -> defaultRunStyle -elemToRun :: NameSpaces -> Element -> Maybe Run -elemToRun ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case - findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Footnote s - Nothing -> - case - findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Endnote s - Nothing -> Just $ - Run (elemToRunStyle ns element) - (elemToRunElems ns element) -elemToRun _ _ = Nothing - -elemToRunElem :: NameSpaces -> Element -> Maybe RunElem -elemToRunElem ns element - | (qName (elName element) == "t" || qName (elName element) == "delText") && - qURI (elName element) `elem` [(lookup "w" ns), (lookup "m" ns)] = - Just $ TextRun (strContent element) - | qName (elName element) == "br" && - qURI (elName element) `elem` [(lookup "w" ns), (lookup "m" ns)] = - Just $ LnBrk - | qName (elName element) == "tab" && - qURI (elName element) `elem` [(lookup "w" ns), (lookup "m" ns)] = - Just $ Tab - | otherwise = Nothing - - -elemToRunElems :: NameSpaces -> Element -> [RunElem] -elemToRunElems ns element - | qName (elName element) == "r" && - qURI (elName element) `elem` [(lookup "w" ns), (lookup "m" ns)] = - mapMaybe (elemToRunElem ns) (elChildren element) - | otherwise = [] - -elemToDrawing :: NameSpaces -> Element -> Maybe ParPart -elemToDrawing ns element - | qName (elName element) == "drawing" && - qURI (elName element) == (lookup "w" ns) = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" +type Target = String +type Anchor = String +type URL = String +type BookMarkId = String +type RelId = String +type ChangeId = String +type Author = String +type ChangeDate = String + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +archiveToDocx :: Archive -> Either DocxError Docx +archiveToDocx archive = do + let notes = archiveToNotes archive + numbering = archiveToNumbering archive + rels = archiveToRelationships archive + media = archiveToMedia archive + rEnv = ReaderEnv notes numbering rels media + doc <- runD (archiveToDocument archive) rEnv + return $ Docx doc + + +archiveToDocument :: Archive -> D Document +archiveToDocument zf = do + entry <- maybeToD $ findEntryByPath "word/document.xml" zf + docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +elemToBody :: NameSpaces -> Element -> D Body +elemToBody ns element | isElem ns "w" "body" element = + mapD (elemToBodyPart ns) (elChildren element) >>= + (\bps -> return $ Body bps) +elemToBody _ _ = throwError WrongElem + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = mapMaybe (\f -> findEntryByPath f archive) relPaths + relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems + in + rels + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering' :: Archive -> Maybe Numbering +archiveToNumbering' zf = do + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe (numElemToNum namespaces) numElems + absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +archiveToNumbering :: Archive -> Numbering +archiveToNumbering archive = + fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes ns notetype element + | isElem ns "w" (notetype ++ "s") element = + let pairs = mapMaybe + (\e -> findAttr (elemName ns "w" "id") e >>= + (\a -> Just (a, e))) + (findChildren (elemName ns "w" notetype) element) in - findElement (QName "blip" (Just a_ns) (Just "a")) element - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) - >>= (\s -> Just $ Drawing s) -elemToDrawing _ _ = Nothing + Just $ M.fromList $ pairs +elemToNotes _ _ _ = Nothing + +--------------------------------------------- +--------------------------------------------- + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == (lookup prefix ns) + + +elemToTblGrid :: NameSpaces -> Element -> D TblGrid +elemToTblGrid ns element | isElem ns "w" "tblGrid" element = + let cols = findChildren (elemName ns "w" "gridCol") element + in + mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger)) + cols +elemToTblGrid _ _ = throwError WrongElem + +elemToTblLook :: NameSpaces -> Element -> D TblLook +elemToTblLook ns element | isElem ns "w" "tblLook" element = + let firstRow = findAttr (elemName ns "w" "firstRow") element + val = findAttr (elemName ns "w" "val") element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + return $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = throwError WrongElem + +elemToRow :: NameSpaces -> Element -> D Row +elemToRow ns element | isElem ns "w" "tr" element = + do + let cellElems = findChildren (elemName ns "w" "tc") element + cells <- mapD (elemToCell ns) cellElems + return $ Row cells +elemToRow _ _ = throwError WrongElem -elemToMath :: NameSpaces -> Element -> Maybe OMath -elemToMath ns element - | qName (elName element) == "oMath" && - qURI (elName element) == (lookup "m" ns) = - Just $ OMath $ mapMaybe (elemToMathElem ns) (elChildren element) -elemToMath _ _ = Nothing +elemToCell :: NameSpaces -> Element -> D Cell +elemToCell ns element | isElem ns "w" "tc" element = + do + cellContents <- mapD (elemToBodyPart ns) (elChildren element) + return $ Cell cellContents +elemToCell _ _ = throwError WrongElem + +elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation +elemToParIndentation ns element | isElem ns "w" "ind" element = + Just $ ParIndentation { + leftParIndent = + findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , rightParIndent = + findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>= + stringToInteger + , hangingParIndent = + findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>= + stringToInteger} +elemToParIndentation _ _ = Nothing + + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element | isElem ns "w" "p" element = do + let pPr = findChild (elemName ns "w" "pPr") element + numPr = pPr >>= findChild (elemName ns "w" "numPr") + lvl <- numPr >>= + findChild (elemName ns "w" "ilvl") >>= + findAttr (elemName ns "w" "val") + numId <- numPr >>= + findChild (elemName ns "w" "numId") >>= + findAttr (elemName ns "w" "val") + return (numId, lvl) +elemToNumInfo _ _ = Nothing +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) -elemToBase :: NameSpaces -> Element -> Maybe Base -elemToBase ns element - | qName (elName element) == "e" && - qURI (elName element) == (lookup "m" ns) = - Just $ Base $ mapMaybe (elemToMathElem ns) (elChildren element) -elemToBase _ _ = Nothing +elemToBodyPart :: NameSpaces -> Element -> D BodyPart +elemToBodyPart ns element + | isElem ns "w" "p" element + , (c:_) <- findChildren (elemName ns "m" "oMathPara") element = + do + let style = [] -- placeholder + maths <- mapD (elemToMath ns) (elChildren c) + return $ OMathPara style maths +elemToBodyPart ns element + | isElem ns "w" "p" element + , Just (numId, lvl) <- elemToNumInfo ns element = do + let parstyle = elemToParagraphStyle ns element + parparts <- mapD (elemToParPart ns) (elChildren element) + num <- asks envNumbering + case lookupLevel numId lvl num of + Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> throwError WrongElem +elemToBodyPart ns element + | isElem ns "w" "p" element = do + let parstyle = elemToParagraphStyle ns element + parparts <- mapD (elemToParPart ns) (elChildren element) + return $ Paragraph parstyle parparts +elemToBodyPart ns element + | isElem ns "w" "tbl" element = do + let caption' = findChild (elemName ns "w" "tblPr") element + >>= findChild (elemName ns "w" "tblCaption") + >>= findAttr (elemName ns "w" "val") + caption = (fromMaybe "" caption') + grid' = case findChild (elemName ns "w" "tblGrid") element of + Just g -> elemToTblGrid ns g + Nothing -> return [] + tblLook' = case findChild (elemName ns "w" "tblPr") element >>= + findChild (elemName ns "w" "tblLook") + of + Just l -> elemToTblLook ns l + Nothing -> return defaultTblLook + + grid <- grid' + tblLook <- tblLook' + rows <- mapD (elemToRow ns) (elChildren element) + return $ Tbl caption grid tblLook rows +elemToBodyPart _ _ = throwError WrongElem + +elemToMath :: NameSpaces -> Element -> D OMath +elemToMath ns element | isElem ns "m" "oMath" element = + mapD (elemToMathElem ns) (elChildren element) >>= + (\es -> return $ OMath es) +elemToMath _ _ = throwError WrongElem + +elemToBase :: NameSpaces -> Element -> D Base +elemToBase ns element | isElem ns "m" "e" element = + mapD (elemToMathElem ns) (elChildren element) >>= + (\es -> return $ Base es) +elemToBase _ _ = throwError WrongElem elemToNAryStyle :: NameSpaces -> Element -> NAryStyle elemToNAryStyle ns element @@ -721,225 +644,287 @@ elemToGroupStyle ns element GroupStyle { groupChr = chr, groupPos = pos } elemToGroupStyle _ _ = defaultGroupStyle -elemToMathElem :: NameSpaces -> Element -> Maybe OMathElem -elemToMathElem ns element - | qName (elName element) == "acc" && - qURI (elName element) == (lookup "m" ns) = do - let accChar = - findChild (QName "accPr" (lookup "m" ns) (Just "m")) element >>= - findChild (QName "chr" (lookup "m" ns) (Just "m")) >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - Just . head - accPr = AccentStyle { accentChar = accChar} - base <-findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - return $ Accent accPr base -elemToMathElem ns element - | qName (elName element) == "bar" && - qURI (elName element) == (lookup "m" ns) = do - barPr <- findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= - findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\s -> - Just $ BarStyle { - barPos = (if s == "bot" then Bottom else Top) - }) - base <-findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - return $ Bar barPr base -elemToMathElem ns element - | qName (elName element) == "box" && - qURI (elName element) == (lookup "m" ns) = - findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns >>= - (\b -> Just $ Box b) -elemToMathElem ns element - | qName (elName element) == "borderBox" && - qURI (elName element) == (lookup "m" ns) = - findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns >>= - (\b -> Just $ BorderBox b) -elemToMathElem ns element - | qName (elName element) == "d" && - qURI (elName element) == (lookup "m" ns) = - let style = elemToDelimStyle ns element - in - Just $ Delimiter style $ mapMaybe (elemToBase ns) (elChildren element) -elemToMathElem ns element - | qName (elName element) == "eqArr" && - qURI (elName element) == (lookup "m" ns) = - Just $ EquationArray - $ mapMaybe (elemToBase ns) (elChildren element) -elemToMathElem ns element - | qName (elName element) == "f" && - qURI (elName element) == (lookup "m" ns) = do - num <- findChild (QName "num" (lookup "m" ns) (Just "m")) element - den <- findChild (QName "den" (lookup "m" ns) (Just "m")) element - let numElems = mapMaybe (elemToMathElem ns) (elChildren num) - denElems = mapMaybe (elemToMathElem ns) (elChildren den) - return $ Fraction numElems denElems -elemToMathElem ns element - | qName (elName element) == "func" && - qURI (elName element) == (lookup "m" ns) = do - fName <- findChild (QName "fName" (lookup "m" ns) (Just "m")) element - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - let fnElems = mapMaybe (elemToMathElem ns) (elChildren fName) - return $ Function fnElems base -elemToMathElem ns element - | qName (elName element) == "groupChr" && - qURI (elName element) == (lookup "m" ns) = - let style = elemToGroupStyle ns element - in - findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns >>= - (\b -> Just $ Group style b) -elemToMathElem ns element - | qName (elName element) == "limLow" && - qURI (elName element) == (lookup "m" ns) = do - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element - >>= elemToBase ns - lim <- findChild (QName "lim" (lookup "m" ns) (Just "m")) element - return $ LowerLimit base (mapMaybe (elemToMathElem ns) (elChildren lim)) -elemToMathElem ns element - | qName (elName element) == "limUpp" && - qURI (elName element) == (lookup "m" ns) = do - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element - >>= elemToBase ns - lim <- findChild (QName "lim" (lookup "m" ns) (Just "m")) element - return $ UpperLimit base (mapMaybe (elemToMathElem ns) (elChildren lim)) -elemToMathElem ns element - | qName (elName element) == "m" && - qURI (elName element) == (lookup "m" ns) = - let rows = findChildren (QName "mr" (lookup "m" ns) (Just "m")) element - bases = map (\mr -> mapMaybe (elemToBase ns) (elChildren mr)) rows - in - Just $ Matrix bases -elemToMathElem ns element - | qName (elName element) == "nary" && - qURI (elName element) == (lookup "m" ns) = do - let style = elemToNAryStyle ns element - sub <- findChild (QName "sub" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - sup <- findChild (QName "sup" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - return $ NAry style sub sup base -elemToMathElem ns element - | qName (elName element) == "rad" && - qURI (elName element) == (lookup "m" ns) = do - deg <- findChild (QName "deg" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - return $ Radical deg base --- skipping for now: --- phant -elemToMathElem ns element - | qName (elName element) == "sPre" && - qURI (elName element) == (lookup "m" ns) = do - sub <- findChild (QName "sub" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - sup <- findChild (QName "sup" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - return $ PreSubSuper sub sup base -elemToMathElem ns element - | qName (elName element) == "sSub" && - qURI (elName element) == (lookup "m" ns) = do - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - sub <- findChild (QName "sub" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - return $ Sub base sub -elemToMathElem ns element - | qName (elName element) == "sSubSup" && - qURI (elName element) == (lookup "m" ns) = do - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - sub <- findChild (QName "sub" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - sup <- findChild (QName "sup" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - return $ SubSuper base sub sup -elemToMathElem ns element - | qName (elName element) == "sSup" && - qURI (elName element) == (lookup "m" ns) = do - base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= - elemToBase ns - sup <- findChild (QName "sup" (lookup "m" ns) (Just "m")) element >>= - (\e -> Just $ mapMaybe (elemToMathElem ns) (elChildren e)) - return $ Super base sup -elemToMathElem ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "m" ns) = - let style = [] -- placeholder - rstyle = elemToRunStyle ns element - relems = elemToRunElems ns element - in - Just $ OMathRun style $ Run rstyle relems -elemToMathElem _ _ = Nothing +elemToMathElem :: NameSpaces -> Element -> D OMathElem +elemToMathElem ns element | isElem ns "m" "acc" element = do + let accChar = + findChild (QName "accPr" (lookup "m" ns) (Just "m")) element >>= + findChild (QName "chr" (lookup "m" ns) (Just "m")) >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + Just . head + accPr = AccentStyle { accentChar = accChar} + base <-(maybeToD $ findChild (elemName ns "m" "e") element) >>= + elemToBase ns + return $ Accent accPr base +elemToMathElem ns element | isElem ns "m" "bar" element = do + barPr <- maybeToD $ + findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= + findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\s -> + Just $ BarStyle { + barPos = (if s == "bot" then Bottom else Top) + }) + base <-maybeToD (findChild (QName "e" (lookup "m" ns) (Just "m")) element) >>= + elemToBase ns + return $ Bar barPr base +elemToMathElem ns element | isElem ns "m" "box" element = + maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns >>= + (\b -> return $ Box b) +elemToMathElem ns element | isElem ns "m" "borderBox" element = + maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns >>= + (\b -> return $ BorderBox b) +elemToMathElem ns element | isElem ns "m" "d" element = + let style = elemToDelimStyle ns element + in + mapD (elemToBase ns) (elChildren element) >>= + (\es -> return $ Delimiter style es) +elemToMathElem ns element | isElem ns "m" "eqArr" element = + mapD (elemToBase ns) (elChildren element) >>= + (\es -> return $ EquationArray es) +elemToMathElem ns element | isElem ns "m" "f" element = do + num <- maybeToD $ findChild (elemName ns "m" "num") element + den <- maybeToD $ findChild (elemName ns "m" "den") element + numElems <- mapD (elemToMathElem ns) (elChildren num) + denElems <- mapD (elemToMathElem ns) (elChildren den) + return $ Fraction numElems denElems +elemToMathElem ns element | isElem ns "m" "func" element = do + fName <- maybeToD $ findChild (elemName ns "m" "fName") element + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + fnElems <- mapD (elemToMathElem ns) (elChildren fName) + return $ Function fnElems base +elemToMathElem ns element | isElem ns "m" "groupChr" element = + let style = elemToGroupStyle ns element + in + maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns >>= + (\b -> return $ Group style b) +elemToMathElem ns element | isElem ns "m" "limLow" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) + >>= elemToBase ns + lim <- maybeToD $ findChild (elemName ns "m" "lim") element + limElems <- mapD (elemToMathElem ns) (elChildren lim) + return $ LowerLimit base limElems +elemToMathElem ns element | isElem ns "m" "limUpp" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) + >>= elemToBase ns + lim <- maybeToD $ findChild (elemName ns "m" "lim") element + limElems <- mapD (elemToMathElem ns) (elChildren lim) + return $ UpperLimit base limElems +elemToMathElem ns element | isElem ns "m" "m" element = do + let rows = findChildren (elemName ns "m" "mr") element + bases <- mapD (\mr -> mapD (elemToBase ns) (elChildren mr)) rows + return $ Matrix bases +elemToMathElem ns element | isElem ns "m" "nary" element = do + let style = elemToNAryStyle ns element + sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + return $ NAry style sub sup base +elemToMathElem ns element | isElem ns "m" "rad" element = do + deg <- maybeToD (findChild (elemName ns "m" "deg") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + return $ Radical deg base +elemToMathElem ns element | isElem ns "m" "phant" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + return $ Phantom base +elemToMathElem ns element | isElem ns "m" "sPre" element = do + sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + return $ PreSubSuper sub sup base +elemToMathElem ns element | isElem ns "m" "sSub" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + return $ Sub base sub +elemToMathElem ns element | isElem ns "m" "sSubSup" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + return $ SubSuper base sub sup +elemToMathElem ns element | isElem ns "m" "sSup" element = do + base <- maybeToD (findChild (elemName ns "m" "e") element) >>= + elemToBase ns + sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= + (\e -> mapD (elemToMathElem ns) (elChildren e)) + return $ Sub base sup +elemToMathElem ns element | isElem ns "m" "r" element = do + let style = [] -- placeholder + rstyle = elemToRunStyle ns element + relems <- elemToRunElems ns element + return $ OMathRun style $ Run rstyle relems +elemToMathElem _ _ = throwError WrongElem +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) - -elemToParPart :: NameSpaces -> Element -> Maybe ParPart +expandDrawingId :: String -> D ParPart +expandDrawingId s = do + target <- asks (lookupRelationship s . envRelationships) + case target of + Just t -> do let filepath = combine "word" t + bytes <- asks (lookup filepath . envMedia) + case bytes of + Just bs -> return $ Drawing filepath bs + Nothing -> throwError DocxError + Nothing -> throwError DocxError + +elemToParPart :: NameSpaces -> Element -> D ParPart elemToParPart ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of - Just drawingElem -> elemToDrawing ns drawingElem - Nothing -> do - r <- elemToRun ns element - return $ PlainRun r + | isElem ns "w" "r" element + , Just _ <- findChild (elemName ns "w" "drawing") element = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + in + case drawing of + Just s -> expandDrawingId s + Nothing -> throwError WrongElem elemToParPart ns element - | qName (elName element) == "ins" && - qURI (elName element) == (lookup "w" ns) = do - cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element - cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element - let runs = mapMaybe (elemToRun ns) (elChildren element) - return $ Insertion cId cAuthor cDate runs + | isElem ns "w" "r" element = + elemToRun ns element >>= (\r -> return $ PlainRun r) elemToParPart ns element - | qName (elName element) == "del" && - qURI (elName element) == (lookup "w" ns) = do - cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element - cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element - let runs = mapMaybe (elemToRun ns) (elChildren element) - return $ Deletion cId cAuthor cDate runs + | isElem ns "w" "ins" element + , Just cId <- findAttr (elemName ns "w" "id") element + , Just cAuthor <- findAttr (elemName ns "w" "author") element + , Just cDate <- findAttr (elemName ns "w" "date") element = do + runs <- mapD (elemToRun ns) (elChildren element) + return $ Insertion cId cAuthor cDate runs elemToParPart ns element - | qName (elName element) == "bookmarkStart" && - qURI (elName element) == (lookup "w" ns) = do - bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element - return $ BookMark bmId bmName + | isElem ns "w" "del" element + , Just cId <- findAttr (elemName ns "w" "id") element + , Just cAuthor <- findAttr (elemName ns "w" "author") element + , Just cDate <- findAttr (elemName ns "w" "date") element = do + runs <- mapD (elemToRun ns) (elChildren element) + return $ Deletion cId cAuthor cDate runs elemToParPart ns element - | qName (elName element) == "hyperlink" && - qURI (elName element) == (lookup "w" ns) = - let runs = mapMaybe (elemToRun ns) - $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element - in - case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of - Just anchor -> - Just $ InternalHyperLink anchor runs - Nothing -> - case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of - Just relId -> Just $ ExternalHyperLink relId runs - Nothing -> Nothing + | isElem ns "w" "bookmarkStart" element + , Just bmId <- findAttr (elemName ns "w" "id") element + , Just bmName <- findAttr (elemName ns "w" "name") element = + return $ BookMark bmId bmName elemToParPart ns element - | qName (elName element) == "oMath" && - qURI (elName element) == (lookup "m" ns) = - elemToMath ns element >>= - (\m -> Just $ PlainOMath m) -elemToParPart _ _ = Nothing + | isElem ns "w" "hyperlink" element + , Just anchor <- findAttr (elemName ns "w" "anchor") element = do + runs <- mapD (elemToRun ns) (elChildren element) + return $ InternalHyperLink anchor runs +elemToParPart ns element + | isElem ns "w" "hyperlink" element + , Just relId <- findAttr (elemName ns "r" "id") element = do + runs <- mapD (elemToRun ns) (elChildren element) + rels <- asks envRelationships + return $ case lookupRelationship relId rels of + Just target -> ExternalHyperLink target runs + Nothing -> ExternalHyperLink "" runs +elemToParPart _ _ = throwError WrongElem + +lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s) + +lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s) + +elemToRun :: NameSpaces -> Element -> D Run +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "footnoteReference") element + , Just fnId <- findAttr (elemName ns "w" "id") ref = do + notes <- asks envNotes + case lookupFootnote fnId notes of + Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + return $ Footnote bps + Nothing -> return $ Footnote [] +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "endnoteReference") element + , Just enId <- findAttr (elemName ns "w" "id") ref = do + notes <- asks envNotes + case lookupEndnote enId notes of + Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + return $ Footnote bps + Nothing -> return $ Footnote [] +elemToRun ns element + | isElem ns "w" "r" element = do + runElems <- elemToRunElems ns element + return $ Run (elemToRunStyle ns element) runElems +elemToRun _ _ = throwError WrongElem + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element + | Just pPr <- findChild (elemName ns "w" "pPr") element = + ParagraphStyle + {pStyle = + mapMaybe + (findAttr (elemName ns "w" "val")) + (findChildren (elemName ns "w" "pStyle") pPr) + , indentation = + findChild (elemName ns "w" "ind") pPr >>= + elemToParIndentation ns + } +elemToParagraphStyle _ _ = defaultParagraphStyle + + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element + | Just rPr <- findChild (elemName ns "w" "rPr") element = + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , rUnderline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } +elemToRunStyle _ _ = defaultRunStyle + +elemToRunElem :: NameSpaces -> Element -> D RunElem +elemToRunElem ns element + | isElem ns "w" "t" element || isElem ns "w" "delText" element = + return $ TextRun $ strContent element + | isElem ns "w" "br" element = return LnBrk + | isElem ns "w" "tab" element = return Tab + | otherwise = throwError WrongElem + +elemToRunElems :: NameSpaces -> Element -> D [RunElem] +elemToRunElems ns element + | isElem ns "w" "r" element = mapD (elemToRunElem ns) (elChildren element) +elemToRunElems _ _ = throwError WrongElem + + + + + + + + -type Target = String -type Anchor = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type Author = String -type ChangeDate = String diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index 8c105d1f1..e8e407844 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -90,7 +90,7 @@ combineReducibles r s = True -> case (not . null) rs && isSpace (last rs) of True -> rebuild conts (init rs) ++ [last rs, s] False -> [r,s] - False -> rebuild + False -> rebuild shared $ reduceList $ (rebuild remaining rs) ++ (rebuild remaining' ss) @@ -145,7 +145,7 @@ instance Reducible Inline where isSpace _ = False instance Reducible Block where - (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = + (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = [Div (ident, classes, kvs) (reduceList blks), blk] blk <++> blk' = combineReducibles blk blk' @@ -177,5 +177,5 @@ rebuild :: [Container a] -> [a] -> [a] rebuild [] xs = xs rebuild ((Container f) : cs) xs = rebuild cs $ [f xs] rebuild (NullContainer : cs) xs = rebuild cs $ xs - - + + diff --git a/src/Text/Pandoc/Readers/Docx/TexChar.hs b/src/Text/Pandoc/Readers/Docx/TexChar.hs index 1bef8d7da..eddcabecc 100644 --- a/src/Text/Pandoc/Readers/Docx/TexChar.hs +++ b/src/Text/Pandoc/Readers/Docx/TexChar.hs @@ -4382,5 +4382,5 @@ uniconvMap = M.fromList [ ('\8193', "\\quad") -- , ('\120829', "\\mttseven") -- , ('\120830', "\\mtteight") -- , ('\120831', "\\mttnine") - + -- ] diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 381b67e18..cedbb8c9e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -128,7 +128,7 @@ pBulletList = try $ do -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem - items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") + items <- manyTill (pInTags "li" block <* skipMany nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items pOrderedList :: TagParser Blocks @@ -156,7 +156,7 @@ pOrderedList = try $ do -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem - items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") + items <- manyTill (pInTags "li" block <* skipMany nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items pDefinitionList :: TagParser Blocks @@ -244,7 +244,7 @@ pTable :: TagParser Blocks pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank - caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank + caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 97bfaa455..339f8e3c9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -104,7 +104,7 @@ dimenarg = try $ do sp :: LP () sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline) + <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 80d6698de..1e74f051c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -571,7 +571,7 @@ attributes :: MarkdownParser Attr attributes = try $ do char '{' spnl - attrs <- many (attribute >>~ spnl) + attrs <- many (attribute <* spnl) char '}' return $ foldl (\x f -> f x) nullAttr attrs @@ -688,7 +688,7 @@ birdTrackLine c = try $ do -- emailBlockQuoteStart :: MarkdownParser Char -emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') +emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') emailBlockQuote :: MarkdownParser [String] emailBlockQuote = try $ do @@ -948,7 +948,7 @@ rawVerbatimBlock = try $ do ["pre", "style", "script"]) (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) - return $ open ++ contents ++ renderTags [TagClose tag] + return $ open ++ contents ++ renderTags' [TagClose tag] rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do @@ -1165,7 +1165,7 @@ gridPart ch = do return (length dashes, length dashes + 1) gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String removeFinalBar = @@ -1436,52 +1436,60 @@ math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) enclosure :: Char -> MarkdownParser (F Inlines) enclosure c = do + -- we can't start an enclosure with _ if after a string and + -- the intraword_underscores extension is enabled: + guardDisabled Ext_intraword_underscores + <|> guard (c == '*') + <|> (guard =<< notAfterString) cs <- many1 (char c) (return (B.str cs) <>) <$> whitespace - <|> case length cs of + <|> do + case length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty _ -> return (return $ B.str cs) +ender :: Char -> Int -> MarkdownParser () +ender c n = try $ do + count n (char c) + guard (c == '*') + <|> guardDisabled Ext_intraword_underscores + <|> notFollowedBy alphaNum + -- Parse inlines til you hit one c or a sequence of two cs. -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. three :: Char -> MarkdownParser (F Inlines) three c = do - contents <- mconcat <$> many (notFollowedBy (char c) >> inline) - (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents)) - <|> (try (string [c,c]) >> one c (B.strong <$> contents)) - <|> (char c >> two c (B.emph <$> contents)) + contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) + (ender c 3 >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> one c (B.strong <$> contents)) + <|> (ender c 1 >> two c (B.emph <$> contents)) <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. two :: Char -> F Inlines -> MarkdownParser (F Inlines) two c prefix' = do - let ender = try $ string [c,c] - contents <- mconcat <$> many (try $ notFollowedBy ender >> inline) - (ender >> return (B.strong <$> (prefix' <> contents))) + contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) + (ender c 2 >> return (B.strong <$> (prefix' <> contents))) <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. one :: Char -> F Inlines -> MarkdownParser (F Inlines) one c prefix' = do - contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) + contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> - notFollowedBy (char c) >> + notFollowedBy (ender c 1) >> two c mempty) ) - (char c >> return (B.emph <$> (prefix' <> contents))) + (ender c 1 >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) strongOrEmph :: MarkdownParser (F Inlines) -strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') - where checkIntraword = do - exts <- getOption readerExtensions - when (Ext_intraword_underscores `Set.member` exts) $ do - guard =<< notAfterString +strongOrEmph = enclosure '*' <|> enclosure '_' -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1491,7 +1499,7 @@ inlinesBetween :: (Show b) inlinesBetween start end = (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) - innerSpace = try $ whitespace >>~ notFollowedBy' end + innerSpace = try $ whitespace <* notFollowedBy' end strikeout :: MarkdownParser (F Inlines) strikeout = fmap B.strikeout <$> @@ -1749,12 +1757,17 @@ divHtml :: MarkdownParser (F Blocks) divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + -- we set stateInHtmlBlock so that closing tags that can be either block or + -- inline will not be parsed as inline tags + oldInHtmlBlock <- stateInHtmlBlock <$> getState + updateState $ \st -> st{ stateInHtmlBlock = Just "div" } bls <- option "" (blankline >> option "" blanklines) contents <- mconcat <$> many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) closed <- option False (True <$ htmlTag (~== TagClose "div")) if closed then do + updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index f1dcce8f7..719bde160 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -634,7 +634,7 @@ inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines inlinesBetween start end = (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) - innerSpace = try $ whitespace >>~ notFollowedBy' end + innerSpace = try $ whitespace <* notFollowedBy' end emph :: MWParser Inlines emph = B.emph <$> nested (inlinesBetween start end) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index fa8438e70..b7bc83e86 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -460,7 +460,7 @@ listItem :: RSTParser Int listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline >>~ lookAhead start), + blanks <- choice [ try (many blankline <* lookAhead start), many1 blankline ] -- whole list must end with blank. -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. @@ -480,7 +480,7 @@ listItem start = try $ do orderedList :: RSTParser Blocks orderedList = try $ do - (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) + (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify' items return $ B.orderedListWith (start, style, delim) items' @@ -747,7 +747,7 @@ simpleReferenceName = do referenceName :: RSTParser Inlines referenceName = quotedReferenceName <|> - (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> + (try $ simpleReferenceName <* lookAhead (char ':')) <|> unquotedReferenceName referenceKey :: RSTParser [Char] @@ -1076,7 +1076,7 @@ explicitLink = try $ do referenceLink :: RSTParser Inlines referenceLink = try $ do - (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~ + (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <* char '_' state <- getState let keyTable = stateKeys state diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index dacd4e104..8504e996c 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -57,6 +57,7 @@ module Text.Pandoc.Shared ( normalize, normalizeInlines, normalizeBlocks, + removeFormatting, stringify, compactify, compactify', @@ -335,10 +336,10 @@ isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False -- | Extract the leading and trailing spaces from inside an inline element --- and place them outside the element. +-- and place them outside the element. extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines -extractSpaces f is = +extractSpaces f is = let contents = B.unMany is left = case viewl contents of (Space :< _) -> B.space @@ -493,6 +494,19 @@ normalizeInlines (Cite cs ils : ys) = normalizeInlines (x : xs) = x : normalizeInlines xs normalizeInlines [] = [] +-- | Remove inline formatting from a list of inlines. +removeFormatting :: [Inline] -> [Inline] +removeFormatting = query go . walk deNote + where go :: Inline -> [Inline] + go (Str xs) = [Str xs] + go Space = [Space] + go (Code _ x) = [Str x] + go (Math _ x) = [Str x] + go LineBreak = [Space] + go _ = [] + deNote (Note _) = Str "" + deNote x = x + -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 19112d8f5..8d36efeee 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -142,10 +142,10 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do let len = offset contents -- ident seem to be empty most of the time and asciidoc will generate them automatically -- so lets make them not show up when null - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") let setext = writerSetextHeaders opts - return $ - (if setext + return $ + (if setext then identifier $$ contents $$ (case level of @@ -155,7 +155,7 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do 4 -> text $ replicate len '+' _ -> empty) <> blankline else - identifier $$ text (replicate level '=') <> space <> contents <> blankline) + identifier $$ text (replicate level '=') <> space <> contents <> blankline) blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush (attrs <> dashes <> space <> attrs <> cr <> text str <> cr <> dashes) <> blankline diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 61f548b0c..f04dab76d 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -44,7 +44,6 @@ import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Walk (query) import Data.List ( intersect, intercalate ) import Network.URI ( isURI ) import Control.Monad.State @@ -86,15 +85,6 @@ pandocToDokuWiki opts (Pandoc meta blocks) = do escapeString :: String -> String escapeString str = substitute "__" "%%__%%" ( substitute "**" "%%**%%" ( substitute "//" "%%//%%" str ) ) --- | Remove unsupported formatting from headings -unfancy :: [Inline] -> [Inline] -unfancy = query plainContent - -plainContent :: Inline -> [Inline] -plainContent (Str x) = [Str x] -plainContent Space = [Space] -plainContent _ = [] - -- | Convert Pandoc block element to DokuWiki. blockToDokuWiki :: WriterOptions -- ^ Options -> Block -- ^ Block element @@ -136,7 +126,9 @@ blockToDokuWiki _ (RawBlock f str) blockToDokuWiki _ HorizontalRule = return "\n----\n" blockToDokuWiki opts (Header level _ inlines) = do - contents <- inlineListToDokuWiki opts ( unfancy inlines ) + -- emphasis, links etc. not allowed in headers, apparently, + -- so we remove formatting: + contents <- inlineListToDokuWiki opts $ removeFormatting inlines let eqs = replicate ( 7 - level ) '=' return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f7968884e..c53a0c13d 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -784,7 +784,7 @@ transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw tags' <- mapM (transformTag opts mediaRef) tags - return $ RawBlock fmt (renderTags tags') + return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions @@ -804,7 +804,7 @@ transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw tags' <- mapM (transformTag opts mediaRef) tags - return $ RawInline fmt (renderTags tags') + return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9a26cf2ac..744e88c16 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -429,9 +429,11 @@ blockToHtml opts (Div attr@(_,classes,_) bs) = do let contents' = nl opts >> contents >> nl opts return $ if "notes" `elem` classes - then case writerSlideVariant opts of - RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents' - NoSlides -> addAttrs opts attr $ H.div $ contents' + then let opts' = opts{ writerIncremental = False } in + -- we don't want incremental output inside speaker notes + case writerSlideVariant opts of + RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' + NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' blockToHtml _ (RawBlock f str) diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 19d486b25..ae20efd4b 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -42,7 +42,7 @@ type WS a = State WriterState a defaultWriterState :: WriterState defaultWriterState = WriterState{ - blockStyles = Set.empty + blockStyles = Set.empty , inlineStyles = Set.empty , links = [] , listDepth = 1 @@ -267,7 +267,7 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs $ inTags False "BorderColor" [("type","enumeration")] (text "Black") $$ (inTags False "Destination" [("type","object")] $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) - + -- | Convert a list of Pandoc blocks to ICML. blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc @@ -352,7 +352,7 @@ listItemsToICML opts listType style attribs (first:rest) = do -- | Convert a list of blocks to ICML list items. listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc listItemToICML opts style isFirst attribs item = - let makeNumbStart (Just (beginsWith, numbStl, _)) = + let makeNumbStart (Just (beginsWith, numbStl, _)) = let doN DefaultStyle = [] doN LowerRoman = [lowerRomanName] doN UpperRoman = [upperRomanName] @@ -467,7 +467,7 @@ parStyle opts style lst = -- | Wrap a Doc in an ICML Character Style. charStyle :: Style -> Doc -> WS Doc -charStyle style content = +charStyle style content = let (stlStr, attrs) = styleToStrAttr style doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content in do diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 100bf900d..5bbe30fc8 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -471,19 +471,18 @@ blockToLaTeX (Table caption aligns widths heads rows) = do captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\addlinespace" - $$ text "\\caption" <> braces captionText + else text "\\caption" <> braces captionText <> "\\\\" rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end + $$ capt $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' $$ "\\bottomrule" - $$ capt $$ "\\end{longtable}" toColDescriptor :: Alignment -> String diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index a67271a5d..3beba3bdd 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -46,7 +46,7 @@ import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Readers.TeXMath (readTeXMath') -import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) +import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) @@ -405,8 +405,8 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,[cls],_) -> " " <> text cls - _ -> empty + (_,(cls:_),_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks @@ -471,7 +471,7 @@ addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> - renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs) where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, x /= "markdown"] _ -> s diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index b6da2694c..e2b9a68f1 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -504,7 +504,7 @@ paraStyle parent attrs = do tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = if (i /= 0 || b) + indent = if (i /= 0 || b) then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) @@ -534,7 +534,7 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 31c97349b..5e97d2ac3 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -174,7 +174,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let alt = ":alt: " <> if null tit then capt else text tit return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline blockToRST (Para inlines) - | LineBreak `elem` inlines = do -- use line block if LineBreaks + | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines return $ (vcat $ map (text "| " <>) lns) <> blankline | otherwise = do |