diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-03-04 10:12:16 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-03-04 10:12:16 -0800 |
commit | b1e6ea80fde0c3a21336747cd34a3ab0ddfe3cfe (patch) | |
tree | c699699b2e44d6d37bbaee6ebfbd00367e495596 | |
parent | 4d0bf3c5d685cbee3b13f562503a572af803ab95 (diff) | |
parent | 8909229671fb41101bc12b7ed3e1ccbdcdfd631a (diff) | |
download | pandoc-b1e6ea80fde0c3a21336747cd34a3ab0ddfe3cfe.tar.gz |
Merge pull request #1179 from tarleb/org
Add a simple Emacs Org-mode reader
-rw-r--r-- | README | 33 | ||||
-rw-r--r-- | pandoc.cabal | 2 | ||||
-rw-r--r-- | pandoc.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 552 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 533 | ||||
-rw-r--r-- | tests/test-pandoc.hs | 2 |
7 files changed, 1110 insertions, 16 deletions
@@ -13,14 +13,15 @@ Description Pandoc is a [Haskell] library for converting from one markup format to another, and a command-line tool that uses this library. It can read [markdown] and (subsets of) [Textile], [reStructuredText], [HTML], -[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], and [DocBook]; and -it can write plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], -[LaTeX] (including [beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], -[OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], -[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs -Org-Mode], [AsciiDoc], and [Slidy], [Slideous], [DZSlides], [reveal.js] -or [S5] HTML slide shows. It can also produce [PDF] output on systems -where LaTeX is installed. +[LaTeX], [MediaWiki markup], [Haddock markup], [OPML], [Emacs Org-mode] +and [DocBook]; and it can write plain text, [markdown], +[reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide +shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], +[Word docx], [GNU Texinfo], [MediaWiki markup], [EPUB] (v2 or v3), +[FictionBook2], [Textile], [groff man] pages, [Emacs Org-Mode], +[AsciiDoc], and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] +HTML slide shows. It can also produce [PDF] output on systems where +LaTeX is installed. Pandoc's enhanced version of markdown includes syntax for footnotes, tables, flexible ordered lists, definition lists, fenced code blocks, @@ -143,14 +144,14 @@ General options `markdown_phpextra` (PHP Markdown Extra extended markdown), `markdown_github` (github extended markdown), `textile` (Textile), `rst` (reStructuredText), `html` (HTML), - `docbook` (DocBook), `opml` (OPML), `mediawiki` (MediaWiki markup), - `haddock` (Haddock markup), or `latex` (LaTeX). - If `+lhs` is appended to `markdown`, `rst`, `latex`, or `html`, - the input will be treated as literate Haskell source: - see [Literate Haskell support](#literate-haskell-support), below. - Markdown syntax extensions can be individually enabled or disabled - by appending `+EXTENSION` or `-EXTENSION` to the format name. - So, for example, `markdown_strict+footnotes+definition_lists` + `docbook` (DocBook), `opml` (OPML), `org` (Emacs Org-mode), + `mediawiki` (MediaWiki markup), `haddock` (Haddock markup), or + latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`, + `latex`, or `html`, the input will be treated as literate Haskell + source: see [Literate Haskell support](#literate-haskell-support), + below. Markdown syntax extensions can be individually enabled or + disabled by appending `+EXTENSION` or `-EXTENSION` to the format + name. So, for example, `markdown_strict+footnotes+definition_lists` is strict markdown with footnotes and definition lists enabled, and `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown without pipe tables and with hard line breaks. See [Pandoc's diff --git a/pandoc.cabal b/pandoc.cabal index bbf963672..ccd23e551 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -263,6 +263,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.MediaWiki, Text.Pandoc.Readers.RST, + Text.Pandoc.Readers.Org, Text.Pandoc.Readers.DocBook, Text.Pandoc.Readers.OPML, Text.Pandoc.Readers.TeXMath, @@ -381,6 +382,7 @@ Test-Suite test-pandoc Tests.Walk Tests.Readers.LaTeX Tests.Readers.Markdown + Tests.Readers.Org Tests.Readers.RST Tests.Writers.Native Tests.Writers.ConTeXt @@ -834,6 +834,7 @@ defaultReaderName fallback (x:xs) = ".latex" -> "latex" ".ltx" -> "latex" ".rst" -> "rst" + ".org" -> "org" ".lhs" -> "markdown+lhs" ".db" -> "docbook" ".opml" -> "opml" diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3ae81db00..e511ed861 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -65,6 +65,7 @@ module Text.Pandoc , readMarkdown , readMediaWiki , readRST + , readOrg , readLaTeX , readHtml , readTextile @@ -115,6 +116,7 @@ import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.LaTeX @@ -201,6 +203,7 @@ readers = [ ("native" , \_ s -> return $ readNative s) ,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("docbook" , \o s -> return $ readDocBook o s) ,("opml" , \o s -> return $ readOPML o s) + ,("org" , \o s -> return $ readOrg o s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs new file mode 100644 index 000000000..5dc250f04 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org.hs @@ -0,0 +1,552 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org + Copyright : Copyright (C) 2014 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + +Conversion of Org-Mode to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Org ( readOrg ) where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Shared (compactify') + +import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Monad (guard, mzero) +import Data.Char (toLower) +import Data.List (foldl') +import Data.Maybe (listToMaybe, fromMaybe) +import Data.Monoid (mconcat, mempty, mappend) + +-- | Parse org-mode string and return a Pandoc document. +readOrg :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] ParserState + +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = stateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: OrgParser Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: OrgParser Blocks +block = choice [ mempty <$ blanklines + , orgBlock + , example + , drawer + , specialLine + , header + , hline + , list + , table + , paraOrPlain + ] <?> "block" + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +orgBlock :: OrgParser Blocks +orgBlock = try $ do + (indent, blockType, args) <- blockHeader + blockStr <- rawBlockContent indent blockType + let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] + case blockType of + "comment" -> return mempty + "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> B.divWith ("", [blockType], []) + <$> (parseFromString parseBlocks blockStr) + +blockHeader :: OrgParser (Int, String, [String]) +blockHeader = (,,) <$> blockIndent + <*> blockType + <*> (skipSpaces *> blockArgs) + where blockIndent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) + blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline + +rawBlockContent :: Int -> String -> OrgParser String +rawBlockContent indent blockType = + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +translateLang :: String -> String +translateLang "sh" = "bash" +translateLang cs = cs + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser Blocks +example = try $ + B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine + +exampleLine :: OrgParser String +exampleLine = try $ string ": " *> anyLine + +-- Drawers for properties or a logbook +drawer :: OrgParser Blocks +drawer = try $ do + drawerStart + manyTill drawerLine (try drawerEnd) + return mempty + +drawerStart :: OrgParser String +drawerStart = try $ + skipSpaces *> drawerName <* skipSpaces <* newline + where drawerName = try $ char ':' *> validDrawerName <* char ':' + validDrawerName = stringAnyCase "PROPERTIES" + <|> stringAnyCase "LOGBOOK" + +drawerLine :: OrgParser String +drawerLine = try $ anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + + +-- Comments, Options and Metadata +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine + +metaLine :: OrgParser Blocks +metaLine = try $ metaLineStart *> declarationLine + +commentLine :: OrgParser Blocks +commentLine = try $ commentLineStart *> anyLine *> pure mempty + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLineStart :: OrgParser String +metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" + +commentLineStart :: OrgParser String +commentLineStart = try $ mappend <$> many spaceChar <*> string "# " + +declarationLine :: OrgParser Blocks +declarationLine = try $ do + meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + return mempty + +metaValue :: OrgParser MetaValue +metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine + +metaKey :: OrgParser [Char] +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +-- | Headers +header :: OrgParser Blocks +header = try $ + B.header <$> headerStart + <*> (trimInlines <$> restOfLine) + +headerStart :: OrgParser Int +headerStart = try $ + (length <$> many1 (char '*')) <* many1 (char ' ') + +-- Horizontal Line (five dashes or more) +hline :: OrgParser Blocks +hline = try $ do + skipSpaces + string "-----" + many (char '-') + skipSpaces + newline + return B.horizontalRule + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow [Blocks] + | OrgAlignRow [Alignment] + | OrgHlineRow + deriving (Eq, Show) + +type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) + +table :: OrgParser Blocks +table = try $ do + lookAhead tableStart + (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows + return $ B.table "" (zip aligns widths) heads lns + +tableStart :: OrgParser Char +tableStart = try $ skipSpaces *> char '|' + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + +tableContentCell :: OrgParser Blocks +tableContentCell = try $ + B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + +endOfCell :: OrgParser Char +-- endOfCell = char '|' <|> newline +endOfCell = try $ char '|' <|> lookAhead newline + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ + OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return (AlignDefault) + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] <?> "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +tableContent :: [OrgTableRow] + -> OrgTableContent +tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) + +normalizeTable :: OrgTableContent + -> OrgTableContent +normalizeTable (cols, aligns, widths, heads, lns) = + let aligns' = fillColumns aligns AlignDefault + widths' = fillColumns widths 0.0 + heads' = if heads == mempty + then heads + else fillColumns heads (B.plain mempty) + lns' = map (flip fillColumns (B.plain mempty)) lns + fillColumns base padding = take cols $ base ++ repeat padding + in (cols, aligns', widths', heads', lns') + + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTableRow + -> OrgTableContent + -> OrgTableContent +rowToContent OrgHlineRow = maybeBodyToHeader +rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs +rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + +setLongestRow :: [a] + -> OrgTableContent + -> OrgTableContent +setLongestRow r (cols, aligns, widths, heads, lns) = + (max cols (length r), aligns, widths, heads, lns) + +maybeBodyToHeader :: OrgTableContent + -> OrgTableContent +maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) +maybeBodyToHeader content = content + +appendToBody :: [Blocks] + -> OrgTableContent + -> OrgTableContent +appendToBody r (cols, aligns, widths, heads, lns) = + (cols, aligns, widths, heads, lns ++ [r]) + +setAligns :: [Alignment] + -> OrgTableContent + -> OrgTableContent +setAligns aligns (cols, _, widths, heads, lns) = + (cols, aligns, widths, heads, lns) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser Blocks +paraOrPlain = try $ + trimInlines . mconcat + <$> many1 inline + <**> option B.plain + (try $ newline *> pure B.para) + +restOfLine :: OrgParser Inlines +restOfLine = mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser Blocks +list = choice [ bulletList, orderedList ] <?> "list" + +bulletList :: OrgParser Blocks +bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) + +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) + +genericListStart :: OrgParser String + -> OrgParser Int +genericListStart listMarker = try $ + (+) <$> (length <$> many spaceChar) + <*> (length <$> listMarker <* many1 spaceChar) + +-- parses bullet list start and returns its length (excl. following whitespace) +bulletListStart :: OrgParser Int +bulletListStart = genericListStart bulletListMarker + where bulletListMarker = pure <$> oneOf "*-+" + +orderedListStart :: OrgParser Int +orderedListStart = genericListStart orderedListMarker + -- Ordered list markers allowed in org-mode + where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + +listItem :: OrgParser Int + -> OrgParser Blocks +listItem start = try $ do + (markerLength, first) <- try (start >>= rawListItem) + rest <- many (listContinuation markerLength) + parseFromString parseBlocks $ concat (first:rest) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: Int + -> OrgParser (Int, String) +rawListItem markerLength = try $ do + firstLine <- anyLine + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + mappend <$> many blankline + <*> (concat <$> many1 (listLine markerLength)) + +-- parse a line of a list item +listLine :: Int + -> OrgParser String +listLine markerLength = try $ + indentWith markerLength *> anyLine + <**> pure (++ "\n") + + +-- +-- inline +-- + +inline :: OrgParser Inlines +inline = choice inlineParsers <?> "inline" + where inlineParsers = [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , verbatim + , subscript + , superscript + , symbol + ] + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" + + +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" + +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos + +-- an endline character that can be treated as a space, not a structural break +endline :: OrgParser Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' exampleLine + notFollowedBy' hline + notFollowedBy' tableStart + notFollowedBy' drawerStart + notFollowedBy' headerStart + notFollowedBy' metaLineStart + notFollowedBy' commentLineStart + notFollowedBy' bulletListStart + notFollowedBy' orderedListStart + return B.space + +link :: OrgParser Inlines +link = explicitLink <|> selfLink <?> "link" + +explicitLink :: OrgParser Inlines +explicitLink = try $ do + char '[' + src <- enclosedRaw (char '[') (char ']') + title <- enclosedInlines (char '[') (char ']') + char ']' + return $ B.link src "" title + +selfLink :: OrgParser Inlines +selfLink = try $ do + src <- enclosedRaw (string "[[") (string "]]") + return $ B.link src "" (B.str src) + +emph :: OrgParser Inlines +emph = B.emph <$> inlinesEnclosedBy '/' + +strong :: OrgParser Inlines +strong = B.strong <$> inlinesEnclosedBy '*' + +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> inlinesEnclosedBy '+' + +-- There is no underline, so we use strong instead. +underline :: OrgParser Inlines +underline = B.strong <$> inlinesEnclosedBy '_' + +code :: OrgParser Inlines +code = B.code <$> rawEnclosedBy '=' + +verbatim :: OrgParser Inlines +verbatim = B.rawInline "" <$> rawEnclosedBy '~' + +subscript :: OrgParser Inlines +subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) + +superscript :: OrgParser Inlines +superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) + +maybeGroupedByBraces :: OrgParser Inlines +maybeGroupedByBraces = try $ + choice [ try $ enclosedInlines (char '{') (char '}') + , B.str . (:"") <$> anyChar + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> oneOf specialChars + +enclosedInlines :: OrgParser a + -> OrgParser b + -> OrgParser Inlines +enclosedInlines start end = try $ + trimInlines . mconcat <$> enclosed start end inline + +-- FIXME: This is a hack +inlinesEnclosedBy :: Char + -> OrgParser Inlines +inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + +enclosedRaw :: OrgParser a + -> OrgParser b + -> OrgParser String +enclosedRaw start end = try $ + start *> (onSingleLine <|> spanningTwoLines) + where onSingleLine = try $ many1Till (noneOf "\n\r") end + spanningTwoLines = try $ + anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine + +rawEnclosedBy :: Char + -> OrgParser String +rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) + +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: OrgParser a -> OrgParser a +atStart p = do + pos <- getPosition + st <- getState + guard $ stateLastStrPos st /= Just pos + p + +-- | succeeds only if we're at the end of a word +atEnd :: OrgParser a -> OrgParser a +atEnd p = try $ p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars + +postWordChars :: [Char] +postWordChars = "\t\n\r !\"'),-.:?}" + +-- FIXME: These functions are hacks and should be replaced +endsOnThisOrNextLine :: Char + -> OrgParser () +endsOnThisOrNextLine c = do + inp <- getInput + let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) + endsOnThisLine inp c doOtherwise + +endsOnThisLine :: [Char] + -> Char + -> ([Char] -> OrgParser ()) + -> OrgParser () +endsOnThisLine input c doOnOtherLines = do + case break (`elem` c:"\n") input of + (_,'\n':rest) -> doOnOtherLines rest + (_,_:rest@(n:_)) -> if n `elem` postWordChars + then return () + else endsOnThisLine rest c doOnOtherLines + _ -> mzero + diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs new file mode 100644 index 000000000..8c5982302 --- /dev/null +++ b/tests/Tests/Readers/Org.hs @@ -0,0 +1,533 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Org (tests) where + +import Text.Pandoc.Definition +import Test.Framework +import Tests.Helpers +import Tests.Arbitrary() +import Text.Pandoc.Builder +import Text.Pandoc +import Data.List (intersperse) +import Data.Monoid (mempty, mconcat) + +org :: String -> Pandoc +org = readOrg def + +infix 4 =: +(=:) :: ToString c + => String -> (String, c) -> Test +(=:) = test org + +spcSep :: [Inlines] -> Inlines +spcSep = mconcat . intersperse space + +simpleTable' :: Int + -> [Blocks] + -> [[Blocks]] + -> Blocks +simpleTable' n = table "" (take n $ repeat (AlignDefault, 0.0)) + +tests :: [Test] +tests = + [ testGroup "Inlines" $ + [ "Plain String" =: + "Hello, World" =?> + para (spcSep [ "Hello,", "World" ]) + + , "Emphasis" =: + "/Planet Punk/" =?> + para (emph . spcSep $ ["Planet", "Punk"]) + + , "Strong" =: + "*Cider*" =?> + para (strong "Cider") + + , "Strikeout" =: + "+Kill Bill+" =?> + para (strikeout . spcSep $ [ "Kill", "Bill" ]) + + , "Code" =: + "=Robot.rock()=" =?> + para (code "Robot.rock()") + + , "Verbatim" =: + "~word for word~" =?> + para (rawInline "" "word for word") + + , "Symbol" =: + "A * symbol" =?> + para (str "A" <> space <> str "*" <> space <> "symbol") + + , "Superscript single char" =: + "2^n" =?> + para (str "2" <> superscript "n") + + , "Superscript multi char" =: + "2^{n-1}" =?> + para (str "2" <> superscript "n-1") + + , "Subscript single char" =: + "a_n" =?> + para (str "a" <> subscript "n") + + , "Subscript multi char" =: + "a_{n+1}" =?> + para (str "a" <> subscript "n+1") + + , "Markup-chars not occuring on word break are symbols" =: + unlines [ "this+that+ +so+on" + , "seven*eight* nine*" + , "+not+funny+" + ] =?> + para (spcSep [ "this+that+", "+so+on" + , "seven*eight*", "nine*" + , strikeout "not+funny" + ]) + + , "Markup may not span more than two lines" =: + unlines [ "/this *is", "not*", "emph/" ] =?> + para (spcSep [ "/this" + , (strong ("is" <> space <> "not")) + , "emph/" ]) + + , "Explicit link" =: + "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + (para $ link "http://zeitlens.com/" "" + ("pseudo-random" <> space <> "nonsense")) + + , "Self-link" =: + "[[http://zeitlens.com/]]" =?> + (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + ] + + , testGroup "Meta Information" $ + [ "Comment" =: + "# Nothing to see here" =?> + (mempty::Blocks) + + , "Not a comment" =: + "#-tag" =?> + para "#-tag" + + , "Comment surrounded by Text" =: + unlines [ "Before" + , "# Comment" + , "After" + ] =?> + mconcat [ para "Before" + , para "After" + ] + + , "Title" =: + "#+TITLE: Hello, World" =?> + let titleInline = toList $ "Hello," <> space <> "World" + meta = setMeta "title" (MetaInlines titleInline) $ nullMeta + in Pandoc meta mempty + + , "Author" =: + "#+author: Albert /Emacs-Fanboy/ Krewinkel" =?> + let author = toList . spcSep $ [ "Albert", emph "Emacs-Fanboy", "Krewinkel" ] + meta = setMeta "author" (MetaInlines author) $ nullMeta + in Pandoc meta mempty + + , "Date" =: + "#+Date: Feb. *28*, 2014" =?> + let date = toList . spcSep $ [ "Feb.", (strong "28") <> ",", "2014" ] + meta = setMeta "date" (MetaInlines date) $ nullMeta + in Pandoc meta mempty + + , "Description" =: + "#+DESCRIPTION: Explanatory text" =?> + let description = toList . spcSep $ [ "Explanatory", "text" ] + meta = setMeta "description" (MetaInlines description) $ nullMeta + in Pandoc meta mempty + + , "Properties drawer" =: + unlines [ " :PROPERTIES:" + , " :setting: foo" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Logbook drawer" =: + unlines [ " :LogBook:" + , " - State \"DONE\" from \"TODO\" [2014-03-03 Mon 11:00]" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Drawer surrounded by text" =: + unlines [ "Before" + , ":PROPERTIES:" + , ":END:" + , "After" + ] =?> + para "Before" <> para "After" + + , "Drawer start is the only text in first line of a drawer" =: + unlines [ " :LOGBOOK: foo" + , " :END:" + ] =?> + para (spcSep [ ":LOGBOOK:", "foo", ":END:" ]) + + , "Drawers with unknown names are just text" =: + unlines [ ":FOO:" + , ":END:" + ] =?> + para (":FOO:" <> space <> ":END:") + ] + + , testGroup "Basic Blocks" $ + [ "Paragraph" =: + "Paragraph\n" =?> + para "Paragraph" + + , "First Level Header" =: + "* Headline\n" =?> + header 1 "Headline" + + , "Third Level Header" =: + "*** Third Level Headline\n" =?> + header 3 ("Third" <> space <> + "Level" <> space <> + "Headline") + + , "Compact Headers with Paragraph" =: + unlines [ "* First Level" + , "** Second Level" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Separated Headers with Paragraph" =: + unlines [ "* First Level" + , "" + , "** Second Level" + , "" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Headers not preceded by a blank line" =: + unlines [ "** eat dinner" + , "Spaghetti and meatballs tonight." + , "** walk dog" + ] =?> + mconcat [ header 2 ("eat" <> space <> "dinner") + , para $ spcSep [ "Spaghetti", "and", "meatballs", "tonight." ] + , header 2 ("walk" <> space <> "dog") + ] + + , "Paragraph starting with an asterisk" =: + "*five" =?> + para "*five" + + , "Paragraph containing asterisk at beginning of line" =: + unlines [ "lucky" + , "*star" + ] =?> + para ("lucky" <> space <> "*star") + + , "Example block" =: + unlines [ ": echo hello" + , ": echo dear tester" + ] =?> + codeBlockWith ("", ["example"], []) "echo hello\necho dear tester\n" + + , "Example block surrounded by text" =: + unlines [ "Greetings" + , ": echo hello" + , ": echo dear tester" + , "Bye" + ] =?> + mconcat [ para "Greetings" + , codeBlockWith ("", ["example"], []) + "echo hello\necho dear tester\n" + , para "Bye" + ] + + , "Horizontal Rule" =: + unlines [ "before" + , "-----" + , "after" + ] =?> + mconcat [ para "before" + , horizontalRule + , para "after" + ] + + , "Not a Horizontal Rule" =: + "----- five dashes" =?> + (para $ spcSep [ "-----", "five", "dashes" ]) + + , "Comment Block" =: + unlines [ "#+BEGIN_COMMENT" + , "stuff" + , "bla" + , "#+END_COMMENT"] =?> + (mempty::Blocks) + + , "Source Block in Text" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Source Block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + ] + + , testGroup "Lists" $ + [ "Simple Bullet Lists" =: + ("- Item1\n" ++ + "- Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Indented Bullet Lists" =: + (" - Item1\n" ++ + " - Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Multi-line Bullet Lists" =: + ("- *Fat\n" ++ + " Tony*\n" ++ + "- /Sideshow\n" ++ + " Bob/") =?> + bulletList [ plain $ strong ("Fat" <> space <> "Tony") + , plain $ emph ("Sideshow" <> space <> "Bob") + ] + + , "Nested Bullet Lists" =: + ("- Discovery\n" ++ + " + One More Time\n" ++ + " + Harder, Better, Faster, Stronger\n" ++ + "- Homework\n" ++ + " + Around the World\n"++ + "- Human After All\n" ++ + " + Technologic\n" ++ + " + Robot Rock\n") =?> + bulletList [ mconcat + [ para "Discovery" + , bulletList [ plain ("One" <> space <> + "More" <> space <> + "Time") + , plain ("Harder," <> space <> + "Better," <> space <> + "Faster," <> space <> + "Stronger") + ] + ] + , mconcat + [ para "Homework" + , bulletList [ plain ("Around" <> space <> + "the" <> space <> + "World") + ] + ] + , mconcat + [ para ("Human" <> space <> "After" <> space <> "All") + , bulletList [ plain "Technologic" + , plain ("Robot" <> space <> "Rock") + ] + ] + ] + + , "Simple Ordered List" =: + ("1. Item1\n" ++ + "2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Simple Ordered List with Parens" =: + ("1) Item1\n" ++ + "2) Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Indented Ordered List" =: + (" 1. Item1\n" ++ + " 2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Nested Ordered Lists" =: + ("1. One\n" ++ + " 1. One-One\n" ++ + " 2. One-Two\n" ++ + "2. Two\n" ++ + " 1. Two-One\n"++ + " 2. Two-Two\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ mconcat + [ para "One" + , orderedList [ plain "One-One" + , plain "One-Two" + ] + ] + , mconcat + [ para "Two" + , orderedList [ plain "Two-One" + , plain "Two-Two" + ] + ] + ] + in orderedListWith listStyle listStructure + + , "Ordered List in Bullet List" =: + ("- Emacs\n" ++ + " 1. Org\n") =?> + bulletList [ (para "Emacs") <> + (orderedList [ plain "Org"]) + ] + + , "Bullet List in Ordered List" =: + ("1. GNU\n" ++ + " - Freedom\n") =?> + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + ] + + , testGroup "Tables" + [ "Single cell table" =: + "|Test|" =?> + simpleTable' 1 mempty [[plain "Test"]] + + , "Multi cell table" =: + "| One | Two |" =?> + simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + + , "Multi line table" =: + unlines [ "| One |" + , "| Two |" + , "| Three |" + ] =?> + simpleTable' 1 mempty + [ [ plain "One" ] + , [ plain "Two" ] + , [ plain "Three" ] + ] + + , "Empty table" =: + "||" =?> + simpleTable' 1 mempty mempty + + , "Glider Table" =: + unlines [ "| 1 | 0 | 0 |" + , "| 0 | 1 | 1 |" + , "| 1 | 1 | 0 |" + ] =?> + simpleTable' 3 mempty + [ [ plain "1", plain "0", plain "0" ] + , [ plain "0", plain "1", plain "1" ] + , [ plain "1", plain "1", plain "0" ] + ] + + , "Table between Paragraphs" =: + unlines [ "Before" + , "| One | Two |" + , "After" + ] =?> + mconcat [ para "Before" + , simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + , para "After" + ] + + , "Table with Header" =: + unlines [ "| Species | Status |" + , "|--------------+--------------|" + , "| cervisiae | domesticated |" + , "| paradoxus | wild |" + ] =?> + simpleTable [ plain "Species", plain "Status" ] + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table with final hline" =: + unlines [ "| cervisiae | domesticated |" + , "| paradoxus | wild |" + , "|--------------+--------------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table in a box" =: + unlines [ "|---------|---------|" + , "| static | Haskell |" + , "| dynamic | Lisp |" + , "|---------+---------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "static", plain "Haskell" ] + , [ plain "dynamic", plain "Lisp" ] + ] + + , "Table with alignment row" =: + unlines [ "| Numbers | Text | More |" + , "| <c> | <r> | |" + , "| 1 | One | foo |" + , "| 2 | Two | bar |" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [] + [ [ plain "Numbers", plain "Text", plain "More" ] + , [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain "Two" , plain "bar" ] + ] + + , "Pipe within text doesn't start a table" =: + "Ceci n'est pas une | pipe " =?> + para (spcSep [ "Ceci", "n'est", "pas", "une", "|", "pipe" ]) + + , "Missing pipe at end of row" =: + "|incomplete-but-valid" =?> + simpleTable' 1 mempty [ [ plain "incomplete-but-valid" ] ] + + , "Table with differing row lengths" =: + unlines [ "| Numbers | Text " + , "|-" + , "| <c> | <r> |" + , "| 1 | One | foo |" + , "| 2" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [ plain "Numbers", plain "Text" , plain mempty ] + [ [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain mempty , plain mempty ] + ] + ] + ] diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index ae521541a..74f8e5044 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -7,6 +7,7 @@ import GHC.IO.Encoding import qualified Tests.Old import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown +import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX @@ -31,6 +32,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests ] ] |