From 1bfe39e24cb58c361a05f419ef9a4a5263f558f6 Mon Sep 17 00:00:00 2001 From: Emanuel Evans Date: Sun, 24 Apr 2016 21:58:53 -0700 Subject: Ignore leading space in org code blocks Fixes #2862 Also fix up tab handling for leading whitespace in code blocks. --- tests/Tests/Readers/Org.hs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tests/Tests/Readers/Org.hs') diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index b095ac60a..bb9b37d13 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -1054,6 +1054,33 @@ tests = " where greeting = \"moin\"\n" in codeBlockWith attr' code' + , "Source block with indented code" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Source block with tab-indented code" =: + unlines [ "\t#+BEGIN_SRC haskell" + , "\tmain = putStrLn greeting" + , "\t where greeting = \"moin\"" + , "\t#+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Empty source block" =: + unlines [ " #+BEGIN_SRC haskell" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "" + in codeBlockWith attr' code' + , "Source block between paragraphs" =: unlines [ "Low German greeting" , " #+BEGIN_SRC haskell" -- cgit v1.2.3 From d5e4bc179c0517a5da84ed95d55e1f10f1f16a94 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 3 May 2016 22:52:10 +0200 Subject: Org reader: stop padding short table rows Emacs Org-mode doesn't add any padding to table rows. The first row (header or first body row) is used to determine the column count, no other magic is performed. The org reader was padding rows to the length of the longest table row. This was done due to a misunderstanding of how Org handles tables. This feature reflected how Org-mode handles tables when pressing . The Org exporter however, which is what the reader should implement, doesn't do any of this. So this was a mis-feature that made the reader more complex and reduced comparability. It was hence removed. --- src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++----------------------- tests/Tests/Readers/Org.hs | 8 ++++---- 2 files changed, 24 insertions(+), 28 deletions(-) (limited to 'tests/Tests/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e98be31d..0de64c663 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -774,9 +774,13 @@ data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow +-- OrgTable is strongly related to the pandoc table ADT. Using the same +-- (i.e. pandoc-global) ADT would mean that the reader would break if the +-- global structure was to be changed, which would be bad. The final table +-- should be generated using a builder function. Column widths aren't +-- implemented yet, so they are not tracked here. data OrgTable = OrgTable - { orgTableColumns :: Int - , orgTableAlignments :: [Alignment] + { orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] } @@ -792,7 +796,7 @@ table = try $ do orgToPandocTable :: OrgTable -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) caption = +orgToPandocTable (OrgTable aligns heads lns) caption = B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char @@ -840,20 +844,18 @@ tableHline = try $ rowsToTable :: [OrgTableRow] -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable - where zeroTable = OrgTable 0 mempty mempty mempty - -normalizeTable :: OrgTable - -> OrgTable -normalizeTable (OrgTable cols aligns heads lns) = - let aligns' = fillColumns aligns AlignDefault - heads' = if heads == mempty - then mempty - else fillColumns heads (B.plain mempty) - lns' = map (`fillColumns` B.plain mempty) lns - fillColumns base padding = take cols $ base ++ repeat padding - in OrgTable cols aligns' heads' lns' +rowsToTable = foldM (flip rowToContent) emptyTable + where emptyTable = OrgTable mempty mempty mempty +normalizeTable :: OrgTable -> OrgTable +normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows + where + refRow = if heads /= mempty + then heads + else if rows == mempty then mempty else head rows + cols = length refRow + fillColumns base padding = take cols $ base ++ repeat padding + aligns' = fillColumns aligns AlignDefault -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. @@ -861,16 +863,10 @@ rowToContent :: OrgTableRow -> OrgTable -> F OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t +rowToContent (OrgAlignRow as) t = setAligns as t rowToContent (OrgContentRow rf) t = do rs <- rf - setLongestRow rs =<< appendToBody rs t - -setLongestRow :: [a] - -> OrgTable - -> F OrgTable -setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } + appendToBody rs t maybeBodyToHeader :: OrgTable -> F OrgTable diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index bb9b37d13..98658482c 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -1024,10 +1024,10 @@ tests = , "| 1 | One | foo |" , "| 2" ] =?> - table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) - [ plain "Numbers", plain "Text" , plain mempty ] - [ [ plain "1" , plain "One" , plain "foo" ] - , [ plain "2" , plain mempty , plain mempty ] + table "" (zip [AlignCenter, AlignRight] [0, 0]) + [ plain "Numbers", plain "Text" ] + [ [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" ] ] , "Table with caption" =: -- cgit v1.2.3 From 2d825603c684d6c7af6adb08f26ed34a078a5afe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 4 May 2016 15:33:18 +0200 Subject: Org reader: fix handling of empty table cells, rows This fixes Org mode parsing of some corner cases regarding empty cells and rows. Empty cells weren't parsed correctly, e.g. `|||` should be two empty cells, but would be parsed as a single cell containing a pipe character. Empty rows where parsed as alignment rows and dropped from the output. This fixes #2616. --- src/Text/Pandoc/Readers/Org.hs | 30 +++++++++++++++++------------- tests/Tests/Readers/Org.hs | 13 ++++++++++++- 2 files changed, 29 insertions(+), 14 deletions(-) (limited to 'tests/Tests/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index e41dd5dd8..493e94d2e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -57,8 +58,6 @@ import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) -import Text.Pandoc.Error - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) @@ -807,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ - OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells tableAlignCell :: OrgParser Alignment tableAlignCell = @@ -833,15 +833,19 @@ tableAlignCell = where emptyCell = try $ skipSpaces *> endOfCell tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + rowsToTable :: [OrgTableRow] -> F OrgTable rowsToTable = foldM rowToContent emptyTable diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 98658482c..81684d8ef 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -941,7 +941,7 @@ tests = , "Empty table" =: "||" =?> - simpleTable' 1 mempty mempty + simpleTable' 1 mempty [[mempty]] , "Glider Table" =: unlines [ "| 1 | 0 | 0 |" @@ -996,6 +996,17 @@ tests = , [ plain "dynamic", plain "Lisp" ] ] + , "Table with empty cells" =: + "|||c|" =?> + simpleTable' 3 mempty [[mempty, mempty, plain "c"]] + + , "Table with empty rows" =: + unlines [ "| first |" + , "| |" + , "| third |" + ] =?> + simpleTable' 1 mempty [[plain "first"], [mempty], [plain "third"]] + , "Table with alignment row" =: unlines [ "| Numbers | Text | More |" , "| | | |" -- cgit v1.2.3 From 405c3e9c36837226b0f714f241b115c72f0b8861 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 4 May 2016 23:16:23 +0200 Subject: Org reader: fix spacing after LaTeX-style symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The org-reader was droping space after unescaped LaTeX-style symbol commands: `\ForAll \Auml` resulted in `∀Ä` but should give `∀ Ä` instead. This seems to be because the LaTeX-reader treats the command-terminating space as part of the command. Dropping the trailing space from the symbol-command fixes this issue. --- src/Text/Pandoc/Readers/Org.hs | 12 +++++++----- tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'tests/Tests/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e98be31d..a7987475a 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -49,7 +49,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks, local) -import Data.Char (isAlphaNum, toLower) +import Data.Char (isAlphaNum, isSpace, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -1587,8 +1587,8 @@ inlineLaTeX = try $ do state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} - texMathToPandoc inp = (maybeRight $ readTeX inp) >>= - writePandoc DisplayInline + texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -1598,9 +1598,11 @@ inlineLaTeXCommand = try $ do rest <- getInput case runParser rawLaTeXInline def "source" rest of Right (RawInline _ cs) -> do - let len = length cs + -- drop any trailing whitespace, those should not be part of the command + let cmdNoSpc = takeWhile (not . isSpace) $ cs + let len = length cmdNoSpc count len anyChar - return cs + return cmdNoSpc _ -> mzero smart :: OrgParser (F Inlines) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index bb9b37d13..6112055ba 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -328,6 +328,10 @@ tests = "\\copy" =?> para "©" + , "MathML symbols, space separated" =: + "\\ForAll \\Auml" =?> + para "∀ Ä" + , "LaTeX citation" =: "\\cite{Coffee}" =?> let citation = Citation -- cgit v1.2.3 From 10a809f1260945b61cae6aa8912399ad83051552 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 9 May 2016 17:50:27 +0200 Subject: Org reader: fix inline-LaTeX regression The last fix for whitespace handling of inline LaTeX commands was incorrect, preventing correct recognition of inline LaTeX commands which contain spaces. This fix ensures that only trailing whitespace is cut off. --- src/Text/Pandoc/Readers/Org.hs | 13 +++++++++---- tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'tests/Tests/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index db1e70ea0..5a50a8f34 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1581,8 +1581,8 @@ inlineLaTeX = try $ do parseAsMathMLSym :: String -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- dropWhileEnd would be nice here, but it's not available before base 4.5 - where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1 + -- drop initial backslash and any trailing "{}" + where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} @@ -1598,13 +1598,18 @@ inlineLaTeXCommand = try $ do rest <- getInput case runParser rawLaTeXInline def "source" rest of Right (RawInline _ cs) -> do - -- drop any trailing whitespace, those should not be part of the command - let cmdNoSpc = takeWhile (not . isSpace) $ cs + -- drop any trailing whitespace, those are not be part of the command as + -- far as org mode is concerned. + let cmdNoSpc = dropWhileEnd isSpace cs let len = length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero +-- Taken from Data.OldList. +dropWhileEnd :: (a -> Bool) -> [a] -> [a] +dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] + smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 3fab92e53..17682fb32 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -308,6 +308,10 @@ tests = "\\textit{Emphasised}" =?> para (emph "Emphasised") + , "Inline LaTeX command with spaces" =: + "\\emph{Emphasis mine}" =?> + para (emph "Emphasis mine") + , "Inline LaTeX math symbol" =: "\\tau" =?> para (emph "τ") -- cgit v1.2.3