diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 84 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 28 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 127 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 136 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/TeXMath.hs | 24 |
5 files changed, 181 insertions, 218 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5c188e3d9..5ccbc4fb1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -44,13 +44,14 @@ module Text.Pandoc.Readers.HTML ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isSuffixOf, intercalate ) import Data.Char ( toLower, isAlphaNum ) import Network.URI ( parseURIReference, URI (..) ) -import Control.Monad ( liftM ) +import Control.Monad ( liftM, when ) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ParserState -- ^ Parser state @@ -198,11 +199,11 @@ inlinesTilEnd tag = manyTill inline (htmlEndTag tag) -- | Parse blocks between open and close tag. blocksIn :: String -> GenParser Char ParserState [Block] -blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag +blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag -- | Parse inlines between open and close tag. inlinesIn :: String -> GenParser Char ParserState [Inline] -inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag +inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag -- | Extract type from a tag: e.g. @br@ from @\<br\>@ extractTagType :: String -> String @@ -258,18 +259,33 @@ anyHtmlEndTag = try $ do then return $ "<!-- unsafe HTML removed -->" else return result -htmlTag :: String -> GenParser Char ParserState (String, [(String, String)]) -htmlTag tag = try $ do +htmlTag :: Bool + -> String + -> GenParser Char ParserState (String, [(String, String)]) +htmlTag selfClosing tag = try $ do char '<' spaces stringAnyCase tag attribs <- many htmlAttribute spaces - optional (string "/") - spaces + -- note: we want to handle both HTML and XHTML, + -- so we don't require the / + when selfClosing $ optional $ char '/' >> spaces char '>' return (tag, (map (\(name, content, _) -> (name, content)) attribs)) +htmlOpenTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlOpenTag = htmlTag False + +htmlCloseTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlCloseTag = htmlTag False . ('/':) + +htmlSelfClosingTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlSelfClosingTag = htmlTag True + -- parses a quoted html attribute value quoted :: Char -> GenParser Char st (String, String) quoted quoteChar = do @@ -344,7 +360,7 @@ anyHtmlInlineTag = try $ do -- Scripts must be treated differently, because they can contain '<>' etc. htmlScript :: GenParser Char ParserState [Char] htmlScript = try $ do - lookAhead $ htmlTag "script" + lookAhead $ htmlOpenTag "script" open <- anyHtmlTag rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") st <- getState @@ -379,7 +395,7 @@ scriptChunk = jsComment <|> jsString <|> jsChars -- Style tags must be treated differently, because they can contain CSS htmlStyle :: GenParser Char ParserState [Char] htmlStyle = try $ do - lookAhead $ htmlTag "style" + lookAhead $ htmlOpenTag "style" open <- anyHtmlTag rest <- manyTill anyChar (htmlEndTag "style") st <- getState @@ -411,7 +427,8 @@ rawVerbatimBlock = try $ do -- We don't want to parse </body> or </html> as raw HTML, since these -- are handled in parseHtml. rawHtmlBlock' :: GenParser Char ParserState Block -rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") +rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|> + htmlCloseTag "html") rawHtmlBlock -- | Parses an HTML comment. @@ -441,13 +458,13 @@ definition = try $ do nonTitleNonHead :: GenParser Char ParserState Char nonTitleNonHead = try $ do - notFollowedBy $ (htmlTag "title" >> return ' ') <|> + notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|> (htmlEndTag "head" >> return ' ') (rawHtmlBlock >> return ' ') <|> anyChar parseTitle :: GenParser Char ParserState [Inline] parseTitle = try $ do - (tag, _) <- htmlTag "title" + (tag, _) <- htmlOpenTag "title" contents <- inlinesTilEnd tag spaces return contents @@ -455,7 +472,7 @@ parseTitle = try $ do -- parse header and return meta-information (for now, just title) parseHead :: GenParser Char ParserState Meta parseHead = try $ do - htmlTag "head" + htmlOpenTag "head" spaces skipMany nonTitleNonHead contents <- option [] parseTitle @@ -463,13 +480,10 @@ parseHead = try $ do htmlEndTag "head" return $ Meta contents [] [] -skipHtmlTag :: String -> GenParser Char ParserState () -skipHtmlTag tag = optional (htmlTag tag) - -- h1 class="title" representation of title in body bodyTitle :: GenParser Char ParserState [Inline] bodyTitle = try $ do - (_, attribs) <- htmlTag "h1" + (_, attribs) <- htmlOpenTag "h1" case (extractAttribute "class" attribs) of Just "title" -> return "" _ -> fail "not title" @@ -487,11 +501,11 @@ parseHtml :: GenParser Char ParserState Pandoc parseHtml = do sepEndBy (choice [xmlDec, definition, htmlComment]) spaces spaces - skipHtmlTag "html" + optional $ htmlOpenTag "html" spaces meta <- option (Meta [] [] []) parseHead spaces - skipHtmlTag "body" + optional $ htmlOpenTag "body" spaces optional bodyTitle -- skip title in body, because it's represented in meta blocks <- parseBlocks @@ -527,7 +541,7 @@ header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" headerLevel :: Int -> GenParser Char ParserState Block headerLevel n = try $ do let level = "h" ++ show n - htmlTag level + htmlOpenTag level contents <- inlinesTilEnd level return $ Header n (normalizeSpaces contents) @@ -537,7 +551,7 @@ headerLevel n = try $ do hrule :: GenParser Char ParserState Block hrule = try $ do - (_, attribs) <- htmlTag "hr" + (_, attribs) <- htmlSelfClosingTag "hr" state <- getState if not (null attribs) && stateParseRaw state then unexpected "attributes in hr" -- parse as raw in this case @@ -551,7 +565,7 @@ hrule = try $ do -- skipped, because they are not portable to output formats other than HTML. codeBlock :: GenParser Char ParserState Block codeBlock = try $ do - htmlTag "pre" + htmlOpenTag "pre" result <- manyTill (many1 (satisfy (/= '<')) <|> ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) @@ -572,7 +586,7 @@ codeBlock = try $ do -- blockQuote :: GenParser Char ParserState Block -blockQuote = try $ htmlTag "blockquote" >> spaces >> +blockQuote = try $ htmlOpenTag "blockquote" >> spaces >> blocksTilEnd "blockquote" >>= (return . BlockQuote) -- @@ -584,7 +598,7 @@ list = choice [ bulletList, orderedList, definitionList ] <?> "list" orderedList :: GenParser Char ParserState Block orderedList = try $ do - (_, attribs) <- htmlTag "ol" + (_, attribs) <- htmlOpenTag "ol" (start, style) <- option (1, DefaultStyle) $ do failIfStrict let sta = fromMaybe "1" $ @@ -609,7 +623,7 @@ orderedList = try $ do bulletList :: GenParser Char ParserState Block bulletList = try $ do - htmlTag "ul" + htmlOpenTag "ul" spaces -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... @@ -620,7 +634,7 @@ bulletList = try $ do definitionList :: GenParser Char ParserState Block definitionList = try $ do failIfStrict -- def lists not part of standard markdown - htmlTag "dl" + htmlOpenTag "dl" spaces items <- sepEndBy1 definitionListItem spaces htmlEndTag "dl" @@ -638,7 +652,7 @@ definitionListItem = try $ do -- para :: GenParser Char ParserState Block -para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= +para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces -- @@ -672,8 +686,8 @@ inline = choice [ charRef code :: GenParser Char ParserState Inline code = try $ do - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") + result <- (htmlOpenTag "code" >> manyTill (noneOf "<>") (htmlEndTag "code")) + <|> (htmlOpenTag "tt" >> manyTill (noneOf "<>") (htmlEndTag "tt")) -- remove internal line breaks, leading and trailing space, -- and decode character references return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ @@ -686,7 +700,7 @@ rawHtmlInline = do if stateParseRaw state then return (HtmlInline result) else return (Str "") betweenTags :: [Char] -> GenParser Char ParserState [Inline] -betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= +betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>= return . normalizeSpaces emph :: GenParser Char ParserState Inline @@ -708,7 +722,7 @@ strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= spanStrikeout :: GenParser Char ParserState Inline spanStrikeout = try $ do failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - (_, attributes) <- htmlTag "span" + (_, attributes) <- htmlOpenTag "span" result <- case (extractAttribute "class" attributes) of Just "strikeout" -> inlinesTilEnd "span" _ -> fail "not a strikeout" @@ -719,7 +733,7 @@ whitespace = many1 space >> return Space -- hard line break linebreak :: GenParser Char ParserState Inline -linebreak = htmlTag "br" >> optional newline >> return LineBreak +linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak str :: GenParser Char st Inline str = many1 (noneOf "< \t\n&") >>= return . Str @@ -740,7 +754,7 @@ extractAttribute name ((attrName, contents):rest) = link :: GenParser Char ParserState Inline link = try $ do - (_, attributes) <- htmlTag "a" + (_, attributes) <- htmlOpenTag "a" url <- case (extractAttribute "href" attributes) of Just url -> return url Nothing -> fail "no href" @@ -750,7 +764,7 @@ link = try $ do image :: GenParser Char ParserState Inline image = try $ do - (_, attributes) <- htmlTag "img" + (_, attributes) <- htmlSelfClosingTag "img" url <- case (extractAttribute "src" attributes) of Just url -> return url Nothing -> fail "no src" diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 01fca9f2b..406809dfc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe ) import Data.Char ( chr ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -167,16 +168,37 @@ block = choice [ hrule -- header :: GenParser Char ParserState Block -header = try $ do +header = section <|> chapter + +chapter :: GenParser Char ParserState Block +chapter = try $ do + string "\\chapter" + result <- headerWithLevel 1 + updateState $ \s -> s{ stateHasChapters = True } + return result + +section :: GenParser Char ParserState Block +section = try $ do char '\\' subs <- many (try (string "sub")) base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4) + st <- getState + let lev = if stateHasChapters st + then length subs + base + 1 + else length subs + base + headerWithLevel lev + +headerWithLevel :: Int -> GenParser Char ParserState Block +headerWithLevel lev = try $ do + spaces optional (char '*') + spaces optional $ bracketedText '[' ']' -- alt title + spaces char '{' title' <- manyTill inline (char '}') spaces - return $ Header (length subs + base) (normalizeSpaces title') + return $ Header lev (normalizeSpaces title') -- -- hrule block diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a6d383fca..b655ea1a9 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -37,7 +37,8 @@ import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, @@ -45,7 +46,8 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, htmlBlockElement, htmlComment, unsanitaryURI ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, unless) +import Control.Monad (when, liftM, unless, guard) +import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition) -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -- ^ Parser state, including options for parser @@ -68,7 +70,7 @@ setextHChars = "=-" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&'\";" +specialChars = "\\[]*_~`<>$!^-.&@'\";" -- -- auxiliary functions @@ -184,7 +186,18 @@ parseMarkdown = do -- now parse it for real... (title, author, date) <- option ([],[],[]) titleBlock blocks <- parseBlocks - return $ Pandoc (Meta title author date) $ filter (/= Null) blocks + let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks + -- if there are labeled examples, change references into numbers + examples <- liftM stateExamples getState + let handleExampleRef :: Inline -> Inline + handleExampleRef z@(Str ('@':xs)) = + case M.lookup xs examples of + Just n -> Str (show n) + Nothing -> z + handleExampleRef z = z + if M.null examples + then return doc + else return $ processWith handleExampleRef doc -- -- initial pass for references and notes @@ -272,6 +285,7 @@ block = do , plain , nullBlock ] else [ codeBlockDelimited + , macro , header , table , codeBlockIndented @@ -716,7 +730,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -735,7 +749,9 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - return (rawHeads', aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads' + return (heads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. tableFooter :: GenParser Char ParserState [Char] @@ -764,65 +780,27 @@ multilineRow :: [Int] -> GenParser Char ParserState [[Block]] multilineRow indices = do colLines <- many1 (rawTableLine indices) - optional blanklines let cols = map unlines $ transpose colLines mapM (parseFromString (many plain)) cols --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption :: GenParser Char ParserState [Inline] tableCaption = try $ do skipNonindentSpaces - string "Table:" + string ":" <|> string "Table:" result <- many1 inline blanklines return $ normalizeSpaces result --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' - -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine - (if headless then tableFooter else tableFooter <|> blanklines) + (return ()) + (if headless then tableFooter else tableFooter <|> blanklines) + tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l @@ -833,10 +811,10 @@ simpleTable headless = do multilineTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow tableFooter + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' @@ -860,7 +838,9 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -880,9 +860,37 @@ alignType strLst len = (True, True) -> AlignCenter (False, False) -> AlignDefault +gridTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTable = gridTableWith block tableCaption + table :: GenParser Char ParserState Block table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <?> "table" + simpleTable False <|> multilineTable True <|> + gridTable False <|> gridTable True <?> "table" + +-- +-- Macros +-- + +-- | Parse a \newcommand or \renewcommand macro definition. +macro :: GenParser Char ParserState Block +macro = getState >>= guard . stateApplyMacros >> + pMacroDefinition >>= addMacro >> blanklines >> return Null + +-- | Add a macro to the list of macros in state. +addMacro :: Macro -> GenParser Char ParserState () +addMacro m = do + updateState $ \st -> st{ stateMacros = m : stateMacros st } + +-- | Apply current macros to string. +applyMacros' :: String -> GenParser Char ParserState String +applyMacros' target = do + apply <- liftM stateApplyMacros getState + if apply + then do macros <- liftM stateMacros getState + return $ applyMacros macros target + else return target -- -- inline @@ -916,6 +924,7 @@ inlineParsers = [ str , rawHtmlInline' , rawLaTeXInline' , escapedChar + , exampleRef , symbol , ltSign ] @@ -951,6 +960,14 @@ ltSign = do specialCharsMinusLt :: [Char] specialCharsMinusLt = filter (/= '<') specialChars +exampleRef :: GenParser Char ParserState Inline +exampleRef = try $ do + char '@' + lab <- many1 (alphaNum <|> oneOf "-_") + -- We just return a Str. These are replaced with numbers + -- later. See the end of parseMarkdown. + return $ Str $ '@' : lab + symbol :: GenParser Char ParserState Inline symbol = do result <- oneOf specialCharsMinusLt @@ -977,8 +994,8 @@ mathChunk = do char '\\' <|> many1 (noneOf " \t\n\\$") math :: GenParser Char ParserState Inline -math = (mathDisplay >>= return . Math DisplayMath) - <|> (mathInline >>= return . Math InlineMath) +math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) + <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) mathDisplay :: GenParser Char ParserState String mathDisplay = try $ do @@ -1285,7 +1302,7 @@ rawHtmlInline' = do st <- getState result <- if stateStrict st then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else anyHtmlInlineTag + else choice [htmlComment, anyHtmlInlineTag] return $ HtmlInline result #ifdef _CITEPROC diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7b4b5eee8..13afe5053 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,9 +31,10 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, unless, liftM ) +import Control.Monad ( when, unless ) import Data.List ( findIndex, intercalate, transpose, sort ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -424,7 +425,7 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char st Int + -> GenParser Char ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar @@ -607,41 +608,20 @@ dashedLine ch = do simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -gridPart :: Char -> GenParser Char st (Int, Int) -gridPart ch = do - dashes <- many1 (char ch) - char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline - -- Parse a table row separator simpleTableSep :: Char -> GenParser Char ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -gridTableSep :: Char -> GenParser Char ParserState Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - -- Parse a table footer simpleTableFooter :: GenParser Char ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -gridTableFooter :: GenParser Char ParserState [Char] -gridTableFooter = blanklines - -- Parse a raw line and split it into chunks by indices. simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices $ removeTrailingSpace line) - -- Parse a table row and return a list of blocks (columns). simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] simpleTableRow indices = do @@ -651,64 +631,13 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : colLines mapM (parseFromString (many plain)) cols -gridTableRow :: [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols - -compactifyCell :: [Block] -> [Block] -compactifyCell bs = head $ compactify [bs] - simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = - map removeFinalBar $ tail $ splitByIndices (init indices) line - -removeFinalBar :: String -> String -removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . - reverse - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - return (rawHeads, aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments - let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose - $ map (gridTableSplitLine indices) rawContent - return (rawHeads, aligns, indices) - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser - footerParser - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let captions = [] -- no notion of captions in RST - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table captions aligns widths heads lines' - --- Parse a simple table with '---' header and one line per row. +-- Parse a simple table. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + -> GenParser Char ParserState Block +gridTable = gridTableWith block (return []) table :: GenParser Char ParserState Block table = gridTable False <|> simpleTable False <|> diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 40cf39987..ca839dd08 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -86,6 +86,28 @@ expToInlines (ESubsup x y z) = do y' <- expToInlines y z' <- expToInlines z return $ x' ++ [Subscript y'] ++ [Superscript z'] -expToInlines (EText _ x) = Just [Emph [Str x]] +expToInlines (EDown x y) = expToInlines (ESub x y) +expToInlines (EUp x y) = expToInlines (ESuper x y) +expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) +expToInlines (EText "normal" x) = Just [Str x] +expToInlines (EText "bold" x) = Just [Strong [Str x]] +expToInlines (EText "monospace" x) = Just [Code x] +expToInlines (EText "italic" x) = Just [Emph [Str x]] +expToInlines (EText _ x) = Just [Str x] +expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = + case accent of + '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar + '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute + '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave + '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve + '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check + '.' -> Just [Emph [Str [c,'\x0307']]] -- dot + '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring + '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right + '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left + '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat + '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat + '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde + _ -> Nothing expToInlines _ = Nothing |
