From 24b2ac43b0a8596f7baea10579c95ee75b6e584f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 4 Mar 2014 00:33:25 +0100
Subject: Add a simple Emacs Org-mode reader

The basic structure of org-mode documents is recognized; however,
org-mode features like todo markers, tags etc. are not supported yet.
---
 src/Text/Pandoc/Readers/Org.hs | 552 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 552 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/Org.hs

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
new file mode 100644
index 000000000..5dc250f04
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -0,0 +1,552 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org
+   Copyright   : Copyright (C) 2014 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>
+
+Conversion of Org-Mode to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Org ( readOrg ) where
+
+import qualified Text.Pandoc.Builder as B
+import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import           Text.Pandoc.Definition
+import           Text.Pandoc.Options
+import           Text.Pandoc.Parsing hiding (orderedListMarker)
+import           Text.Pandoc.Shared (compactify')
+
+import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Monad (guard, mzero)
+import           Data.Char (toLower)
+import           Data.List (foldl')
+import           Data.Maybe (listToMaybe, fromMaybe)
+import           Data.Monoid (mconcat, mempty, mappend)
+
+-- | Parse org-mode string and return a Pandoc document.
+readOrg :: ReaderOptions -- ^ Reader options
+        -> String        -- ^ String to parse (assuming @'\n'@ line endings)
+        -> Pandoc
+readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] ParserState
+
+parseOrg:: OrgParser Pandoc
+parseOrg = do
+  blocks' <- B.toList <$> parseBlocks
+  st <- getState
+  let meta = stateMeta st
+  return $ Pandoc meta $ filter (/= Null) blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: OrgParser Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: OrgParser Blocks
+block = choice [ mempty <$ blanklines
+               , orgBlock
+               , example
+               , drawer
+               , specialLine
+               , header
+               , hline
+               , list
+               , table
+               , paraOrPlain
+               ] <?> "block"
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+orgBlock :: OrgParser Blocks
+orgBlock = try $ do
+  (indent, blockType, args) <- blockHeader
+  blockStr <- rawBlockContent indent blockType
+  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
+  case blockType of
+    "comment" -> return mempty
+    "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
+    _         -> B.divWith ("", [blockType], [])
+                            <$> (parseFromString parseBlocks blockStr)
+
+blockHeader :: OrgParser (Int, String, [String])
+blockHeader = (,,) <$> blockIndent
+                   <*> blockType
+                   <*> (skipSpaces *> blockArgs)
+ where blockIndent = length <$> many spaceChar
+       blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
+       blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
+
+rawBlockContent :: Int -> String -> OrgParser String
+rawBlockContent indent blockType =
+  unlines . map commaEscaped <$> manyTill indentedLine blockEnder
+ where
+   indentedLine = try $ choice [ blankline         *> pure "\n"
+                               , indentWith indent *> anyLine
+                               ]
+   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> OrgParser String
+indentWith num = do
+  tabStop <- getOption readerTabStop
+  if (num < tabStop)
+     then count num (char ' ')
+     else choice [ try (count num (char ' '))
+                 , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+translateLang :: String -> String
+translateLang "sh" = "bash"
+translateLang cs = cs
+
+commaEscaped :: String -> String
+commaEscaped (',':cs@('*':_))     = cs
+commaEscaped (',':cs@('#':'+':_)) = cs
+commaEscaped cs                   = cs
+
+example :: OrgParser Blocks
+example = try $
+  B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
+
+exampleLine :: OrgParser String
+exampleLine = try $ string ": " *> anyLine
+
+-- Drawers for properties or a logbook
+drawer :: OrgParser Blocks
+drawer = try $ do
+  drawerStart
+  manyTill drawerLine (try drawerEnd)
+  return mempty
+
+drawerStart :: OrgParser String
+drawerStart = try $
+  skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = try $  char ':' *> validDrawerName <* char ':'
+       validDrawerName =  stringAnyCase "PROPERTIES"
+                          <|> stringAnyCase "LOGBOOK"
+
+drawerLine :: OrgParser String
+drawerLine = try $ anyLine
+
+drawerEnd :: OrgParser String
+drawerEnd = try $
+  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+
+-- Comments, Options and Metadata
+specialLine :: OrgParser Blocks
+specialLine = try $ metaLine <|> commentLine
+
+metaLine :: OrgParser Blocks
+metaLine = try $ metaLineStart *> declarationLine
+
+commentLine :: OrgParser Blocks
+commentLine = try $ commentLineStart *> anyLine *> pure mempty
+
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLineStart :: OrgParser String
+metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
+
+commentLineStart :: OrgParser String
+commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
+
+declarationLine :: OrgParser Blocks
+declarationLine = try $ do
+  meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
+  updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+  return mempty
+
+metaValue :: OrgParser MetaValue
+metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
+
+metaKey :: OrgParser [Char]
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+                      <*  char ':'
+                      <*  skipSpaces
+
+-- | Headers
+header :: OrgParser Blocks
+header = try $
+  B.header <$> headerStart
+           <*> (trimInlines <$> restOfLine)
+
+headerStart :: OrgParser Int
+headerStart = try $
+  (length <$> many1 (char '*')) <* many1 (char ' ')
+
+-- Horizontal Line (five dashes or more)
+hline :: OrgParser Blocks
+hline = try $ do
+  skipSpaces
+  string "-----"
+  many (char '-')
+  skipSpaces
+  newline
+  return B.horizontalRule
+
+--
+-- Tables
+--
+
+data OrgTableRow = OrgContentRow [Blocks]
+                 | OrgAlignRow [Alignment]
+                 | OrgHlineRow
+ deriving (Eq, Show)
+
+type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+
+table :: OrgParser Blocks
+table = try $ do
+  lookAhead tableStart
+  (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
+  return $ B.table "" (zip aligns widths) heads lns
+
+tableStart :: OrgParser Char
+tableStart = try $ skipSpaces *> char '|'
+
+tableRows :: OrgParser [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: OrgParser OrgTableRow
+tableContentRow = try $
+  OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
+
+tableContentCell :: OrgParser Blocks
+tableContentCell = try $
+  B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+
+endOfCell :: OrgParser Char
+-- endOfCell =  char '|' <|> newline
+endOfCell = try $ char '|' <|> lookAhead newline
+
+tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow = try $
+  OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+
+tableAlignCell :: OrgParser Alignment
+tableAlignCell =
+  choice [ try $ emptyCell *> return (AlignDefault)
+         , try $ skipSpaces
+                   *> char '<'
+                   *> tableAlignFromChar
+                   <* many digit
+                   <* char '>'
+                   <* emptyCell
+         ] <?> "alignment info"
+    where emptyCell = try $ skipSpaces *> endOfCell
+
+tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
+                                  , char 'c' *> return AlignCenter
+                                  , char 'r' *> return AlignRight
+                                  ]
+
+tableHline :: OrgParser OrgTableRow
+tableHline = try $
+  OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+tableContent :: [OrgTableRow]
+             -> OrgTableContent
+tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+
+normalizeTable :: OrgTableContent
+               -> OrgTableContent
+normalizeTable (cols, aligns, widths, heads, lns) =
+  let aligns' = fillColumns aligns AlignDefault
+      widths' = fillColumns widths 0.0
+      heads'  = if heads == mempty
+                then heads
+                else fillColumns heads  (B.plain mempty)
+      lns'    = map (flip fillColumns (B.plain mempty)) lns
+      fillColumns base padding = take cols $ base ++ repeat padding
+  in (cols, aligns', widths', heads', lns')
+
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header.  All other horizontal lines are discarded.
+rowToContent :: OrgTableRow
+             -> OrgTableContent
+             -> OrgTableContent
+rowToContent OrgHlineRow        = maybeBodyToHeader
+rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
+rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
+
+setLongestRow :: [a]
+              -> OrgTableContent
+              -> OrgTableContent
+setLongestRow r (cols, aligns, widths, heads, lns) =
+  (max cols (length r), aligns, widths, heads, lns)
+
+maybeBodyToHeader :: OrgTableContent
+                  -> OrgTableContent
+maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
+maybeBodyToHeader content                          = content
+
+appendToBody :: [Blocks]
+             -> OrgTableContent
+             -> OrgTableContent
+appendToBody r (cols, aligns, widths, heads, lns) =
+  (cols, aligns, widths, heads, lns ++ [r])
+
+setAligns :: [Alignment]
+          -> OrgTableContent
+          -> OrgTableContent
+setAligns aligns (cols, _, widths, heads, lns) =
+   (cols, aligns, widths, heads, lns)
+
+-- Paragraphs or Plain text
+paraOrPlain :: OrgParser Blocks
+paraOrPlain = try $
+  trimInlines . mconcat
+    <$> many1 inline
+    <**> option B.plain
+                (try $ newline *> pure B.para)
+
+restOfLine :: OrgParser Inlines
+restOfLine = mconcat <$> manyTill inline newline
+
+
+--
+-- list blocks
+--
+
+list :: OrgParser Blocks
+list = choice [ bulletList, orderedList ] <?> "list"
+
+bulletList :: OrgParser Blocks
+bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+
+orderedList :: OrgParser Blocks
+orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+
+genericListStart :: OrgParser String
+                 -> OrgParser Int
+genericListStart listMarker = try $
+  (+) <$> (length <$> many spaceChar)
+      <*> (length <$> listMarker <* many1 spaceChar)
+
+-- parses bullet list start and returns its length (excl. following whitespace)
+bulletListStart :: OrgParser Int
+bulletListStart = genericListStart bulletListMarker
+  where bulletListMarker = pure <$> oneOf "*-+"
+
+orderedListStart :: OrgParser Int
+orderedListStart = genericListStart orderedListMarker
+  -- Ordered list markers allowed in org-mode
+  where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+listItem :: OrgParser Int
+         -> OrgParser Blocks
+listItem start = try $ do
+  (markerLength, first) <- try (start >>= rawListItem)
+  rest <- many (listContinuation markerLength)
+  parseFromString parseBlocks $ concat (first:rest)
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: Int
+            -> OrgParser (Int, String)
+rawListItem markerLength = try $ do
+  firstLine <- anyLine
+  restLines <- many (listLine markerLength)
+  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+                 -> OrgParser String
+listContinuation markerLength = try $
+  mappend <$> many blankline
+          <*> (concat <$> many1 (listLine markerLength))
+
+-- parse a line of a list item
+listLine :: Int
+         -> OrgParser String
+listLine markerLength = try $
+  indentWith markerLength *> anyLine
+    <**> pure (++ "\n")
+
+
+--
+-- inline
+--
+
+inline :: OrgParser Inlines
+inline = choice inlineParsers <?> "inline"
+  where inlineParsers = [ whitespace
+                        , link
+                        , str
+                        , endline
+                        , emph
+                        , strong
+                        , strikeout
+                        , underline
+                        , code
+                        , verbatim
+                        , subscript
+                        , superscript
+                        , symbol
+                        ]
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+
+
+whitespace :: OrgParser Inlines
+whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+
+str :: OrgParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+            <* updateLastStrPos
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: OrgParser Inlines
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  notFollowedBy' exampleLine
+  notFollowedBy' hline
+  notFollowedBy' tableStart
+  notFollowedBy' drawerStart
+  notFollowedBy' headerStart
+  notFollowedBy' metaLineStart
+  notFollowedBy' commentLineStart
+  notFollowedBy' bulletListStart
+  notFollowedBy' orderedListStart
+  return B.space
+
+link :: OrgParser Inlines
+link = explicitLink <|> selfLink <?> "link"
+
+explicitLink :: OrgParser Inlines
+explicitLink = try $ do
+  char '['
+  src   <- enclosedRaw     (char '[') (char ']')
+  title <- enclosedInlines (char '[') (char ']')
+  char ']'
+  return $ B.link src "" title
+
+selfLink :: OrgParser Inlines
+selfLink = try $ do
+  src <- enclosedRaw (string "[[") (string "]]")
+  return $ B.link src "" (B.str src)
+
+emph      :: OrgParser Inlines
+emph      = B.emph         <$> inlinesEnclosedBy '/'
+
+strong    :: OrgParser Inlines
+strong    = B.strong       <$> inlinesEnclosedBy '*'
+
+strikeout :: OrgParser Inlines
+strikeout = B.strikeout    <$> inlinesEnclosedBy '+'
+
+-- There is no underline, so we use strong instead.
+underline :: OrgParser Inlines
+underline = B.strong       <$> inlinesEnclosedBy '_'
+
+code      :: OrgParser Inlines
+code      = B.code         <$> rawEnclosedBy '='
+
+verbatim  ::  OrgParser Inlines
+verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
+
+subscript ::  OrgParser Inlines
+subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
+
+superscript ::  OrgParser Inlines
+superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
+
+maybeGroupedByBraces :: OrgParser Inlines
+maybeGroupedByBraces = try $
+  choice [ try $ enclosedInlines (char '{') (char '}')
+         , B.str . (:"") <$> anyChar
+         ]
+
+symbol :: OrgParser Inlines
+symbol = B.str . (: "") <$> oneOf specialChars
+
+enclosedInlines :: OrgParser a
+                -> OrgParser b
+                -> OrgParser Inlines
+enclosedInlines start end = try $
+  trimInlines . mconcat <$> enclosed start end inline
+
+-- FIXME: This is a hack
+inlinesEnclosedBy :: Char
+                  -> OrgParser Inlines
+inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+                                      (atEnd $ char c)
+
+enclosedRaw :: OrgParser a
+            -> OrgParser b
+            -> OrgParser String
+enclosedRaw start end = try $
+  start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+       spanningTwoLines = try $
+         anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+rawEnclosedBy :: Char
+              -> OrgParser String
+rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
+
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: OrgParser a -> OrgParser a
+atStart p = do
+  pos <- getPosition
+  st <- getState
+  guard $ stateLastStrPos st /= Just pos
+  p
+
+-- | succeeds only if we're at the end of a word
+atEnd :: OrgParser a -> OrgParser a
+atEnd p = try $ p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+
+postWordChars :: [Char]
+postWordChars = "\t\n\r !\"'),-.:?}"
+
+-- FIXME: These functions are hacks and should be replaced
+endsOnThisOrNextLine :: Char
+                     -> OrgParser ()
+endsOnThisOrNextLine c = do
+  inp <- getInput
+  let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
+  endsOnThisLine inp c doOtherwise
+
+endsOnThisLine :: [Char]
+               -> Char
+               -> ([Char] -> OrgParser ())
+               -> OrgParser ()
+endsOnThisLine input c doOnOtherLines = do
+  case break (`elem` c:"\n") input of
+    (_,'\n':rest)    -> doOnOtherLines rest
+    (_,_:rest@(n:_)) -> if n `elem` postWordChars
+                        then return ()
+                        else endsOnThisLine rest c doOnOtherLines
+    _                -> mzero
+
-- 
cgit v1.2.3


From 7cf7e45e4cbb99b320a92b4bd31e433f535d3ef7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 14:17:43 +0200
Subject: Org reader: Slight cleaning of table parsing code

---
 src/Text/Pandoc/Readers/Org.hs | 68 ++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dc250f04..8b155194b 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -217,13 +217,18 @@ data OrgTableRow = OrgContentRow [Blocks]
                  | OrgHlineRow
  deriving (Eq, Show)
 
-type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+data OrgTable = OrgTable
+  { orgTableColumns    :: Int
+  , orgTableAlignments :: [Alignment]
+  , orgTableHeader     :: [Blocks]
+  , orgTableRows       :: [[Blocks]]
+  } deriving (Eq, Show)
 
 table :: OrgParser Blocks
 table = try $ do
   lookAhead tableStart
-  (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
-  return $ B.table "" (zip aligns widths) heads lns
+  OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows
+  return $ B.table "" (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
@@ -237,10 +242,9 @@ tableContentRow = try $
 
 tableContentCell :: OrgParser Blocks
 tableContentCell = try $
-  B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+  B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
 
 endOfCell :: OrgParser Char
--- endOfCell =  char '|' <|> newline
 endOfCell = try $ char '|' <|> lookAhead newline
 
 tableAlignRow :: OrgParser OrgTableRow
@@ -269,54 +273,53 @@ tableHline :: OrgParser OrgTableRow
 tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
-tableContent :: [OrgTableRow]
-             -> OrgTableContent
-tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+rowsToTable :: [OrgTableRow]
+            -> OrgTable
+rowsToTable = foldl' (flip rowToContent) zeroTable
+  where zeroTable = OrgTable 0 mempty mempty mempty
 
-normalizeTable :: OrgTableContent
-               -> OrgTableContent
-normalizeTable (cols, aligns, widths, heads, lns) =
+normalizeTable :: OrgTable
+               -> OrgTable
+normalizeTable (OrgTable cols aligns heads lns) =
   let aligns' = fillColumns aligns AlignDefault
-      widths' = fillColumns widths 0.0
       heads'  = if heads == mempty
-                then heads
+                then mempty
                 else fillColumns heads  (B.plain mempty)
       lns'    = map (flip fillColumns (B.plain mempty)) lns
       fillColumns base padding = take cols $ base ++ repeat padding
-  in (cols, aligns', widths', heads', lns')
+  in OrgTable cols aligns' heads' lns'
 
 
 -- One or more horizontal rules after the first content line mark the previous
 -- line as a header.  All other horizontal lines are discarded.
 rowToContent :: OrgTableRow
-             -> OrgTableContent
-             -> OrgTableContent
+             -> OrgTable
+             -> OrgTable
 rowToContent OrgHlineRow        = maybeBodyToHeader
 rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
 rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
 
 setLongestRow :: [a]
-              -> OrgTableContent
-              -> OrgTableContent
-setLongestRow r (cols, aligns, widths, heads, lns) =
-  (max cols (length r), aligns, widths, heads, lns)
+              -> OrgTable
+              -> OrgTable
+setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
 
-maybeBodyToHeader :: OrgTableContent
-                  -> OrgTableContent
-maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
-maybeBodyToHeader content                          = content
+maybeBodyToHeader :: OrgTable
+                  -> OrgTable
+maybeBodyToHeader t = case t of
+  OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+         t{ orgTableHeader = b , orgTableRows = [] }
+  _   -> t
 
 appendToBody :: [Blocks]
-             -> OrgTableContent
-             -> OrgTableContent
-appendToBody r (cols, aligns, widths, heads, lns) =
-  (cols, aligns, widths, heads, lns ++ [r])
+             -> OrgTable
+             -> OrgTable
+appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
 
 setAligns :: [Alignment]
-          -> OrgTableContent
-          -> OrgTableContent
-setAligns aligns (cols, _, widths, heads, lns) =
-   (cols, aligns, widths, heads, lns)
+          -> OrgTable
+          -> OrgTable
+setAligns aligns t = t{ orgTableAlignments = aligns }
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser Blocks
@@ -549,4 +552,3 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
-
-- 
cgit v1.2.3


From d43c3e81017734170fb25460c4b9ab9cccb1e0db Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 17:20:36 +0200
Subject: Org reader: Use specialized org parser state

The default pandoc ParserState is replaced with `OrgParserState`.  This
is done to simplify the introduction of new state fields required for
efficient Org parsing.
---
 src/Text/Pandoc/Readers/Org.hs | 48 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 8b155194b..0ae4d231c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,15 +29,16 @@ Conversion of Org-Mode to 'Pandoc' document.
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
-import           Text.Pandoc.Parsing hiding (orderedListMarker)
+import           Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
+import           Data.Default
 import           Data.List (foldl')
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
@@ -46,15 +47,48 @@ import           Data.Monoid (mconcat, mempty, mappend)
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] OrgParserState
+
+-- | Org-mode parser state
+data OrgParserState = OrgParserState
+                      { orgOptions          :: ReaderOptions
+                      , orgInlineCharStack  :: [Char]
+                      , orgLastStrPos       :: Maybe SourcePos
+                      , orgMeta             :: Meta
+                      } deriving (Show)
+
+instance HasReaderOptions OrgParserState where
+  extractReaderOptions = orgOptions
+
+instance HasMeta OrgParserState where
+  setMeta field val st =
+    st{ orgMeta = setMeta field val $ orgMeta st }
+  deleteMeta field st =
+    st{ orgMeta = deleteMeta field $ orgMeta st }
+
+instance Default OrgParserState where
+  def = defaultOrgParserState
+
+defaultOrgParserState :: OrgParserState
+defaultOrgParserState = OrgParserState
+                        { orgOptions = def
+                        , orgInlineCharStack = []
+                        , orgLastStrPos = Nothing
+                        , orgMeta = nullMeta
+                        }
+
+updateLastStrPos :: OrgParser ()
+updateLastStrPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgLastStrPos = Just p }
 
-type OrgParser = Parser [Char] ParserState
 
 parseOrg:: OrgParser Pandoc
 parseOrg = do
   blocks' <- B.toList <$> parseBlocks
   st <- getState
-  let meta = stateMeta st
+  let meta = orgMeta st
   return $ Pandoc meta $ filter (/= Null) blocks'
 
 --
@@ -177,7 +211,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
   meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+  updateState $ \st -> st { orgMeta  = orgMeta st <> meta' }
   return mempty
 
 metaValue :: OrgParser MetaValue
@@ -522,7 +556,7 @@ atStart :: OrgParser a -> OrgParser a
 atStart p = do
   pos <- getPosition
   st <- getState
-  guard $ stateLastStrPos st /= Just pos
+  guard $ orgLastStrPos st /= Just pos
   p
 
 -- | succeeds only if we're at the end of a word
-- 
cgit v1.2.3


From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:37:46 +0200
Subject: Org reader: Fix parsing of nested inlines

Text such as /*this*/ was not correctly parsed as a strong, emphasised
word.  This was due to the end-of-word recognition being to strict as it
did not accept markup chars as part of a word.  The fix involves an
additional parser state field, listing the markup chars which might be
parsed as part of a word.
---
 src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++-------
 tests/Tests/Readers/Org.hs     |  4 ++++
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0ae4d231c..ad66caab9 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -535,8 +535,15 @@ enclosedInlines start end = try $
 -- FIXME: This is a hack
 inlinesEnclosedBy :: Char
                   -> OrgParser Inlines
-inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
-                                      (atEnd $ char c)
+inlinesEnclosedBy c = try $ do
+  updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
+  res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+                         (atEnd $ char c)
+  updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st }
+  return res
+ where shift xs
+           | null xs   = []
+           | otherwise = tail xs
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -561,11 +568,16 @@ atStart p = do
 
 -- | succeeds only if we're at the end of a word
 atEnd :: OrgParser a -> OrgParser a
-atEnd p = try $ p <* lookingAtEndOfWord
- where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+atEnd p = try $ do
+  p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars
 
-postWordChars :: [Char]
-postWordChars = "\t\n\r !\"'),-.:?}"
+postWordChars :: OrgParser [Char]
+postWordChars = do
+  st <- getState
+  return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st)
+ where safeSecond (_:x2:_) = [x2]
+       safeSecond _        = []
 
 -- FIXME: These functions are hacks and should be replaced
 endsOnThisOrNextLine :: Char
@@ -580,9 +592,10 @@ endsOnThisLine :: [Char]
                -> ([Char] -> OrgParser ())
                -> OrgParser ()
 endsOnThisLine input c doOnOtherLines = do
+  postWordChars' <- postWordChars
   case break (`elem` c:"\n") input of
     (_,'\n':rest)    -> doOnOtherLines rest
-    (_,_:rest@(n:_)) -> if n `elem` postWordChars
+    (_,_:rest@(n:_)) -> if n `elem` postWordChars'
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 8c5982302..9091d9c74 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -42,6 +42,10 @@ tests =
           "*Cider*" =?>
           para (strong "Cider")
 
+      , "Strong Emphasis" =:
+        "/*strength*/" =?>
+        para (emph . strong $ "strength")
+
       , "Strikeout" =:
           "+Kill Bill+" =?>
           para (strikeout . spcSep $ [ "Kill", "Bill" ])
-- 
cgit v1.2.3


From d76d2b707b2b5cebb38122e117527a70996c2c4f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:09:44 +0200
Subject: Org reader: Provide more language identifier translations

Org-mode and Pandoc use different language identifiers, marking source
code as being written in a certain programming language.  This adds more
translations from identifiers as used in Org to identifiers used in
Pandoc.

The full list of identifiers used in Org and Pandoc is available through
http://orgmode.org/manual/Languages.html and `pandoc -v`, respectively.
---
 src/Text/Pandoc/Readers/Org.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ad66caab9..62088a04d 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -153,7 +153,14 @@ indentWith num = do
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
 translateLang :: String -> String
-translateLang "sh" = "bash"
+translateLang "C"          = "c"
+translateLang "C++"        = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js"         = "javascript"
+translateLang "lisp"       = "commonlisp"
+translateLang "R"          = "r"
+translateLang "sh"         = "bash"
+translateLang "sqlite"     = "sql"
 translateLang cs = cs
 
 commaEscaped :: String -> String
-- 
cgit v1.2.3


From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 16:10:52 +0200
Subject: Org reader: Support inline images

---
 src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++----------
 tests/Tests/Readers/Org.hs     | 12 ++++++++++--
 2 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 62088a04d..8b1b4fa23 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,7 +39,7 @@ import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (foldl')
+import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
@@ -484,20 +484,26 @@ endline = try $ do
   return B.space
 
 link :: OrgParser Inlines
-link = explicitLink <|> selfLink <?> "link"
+link = explicitOrImageLink <|> selflinkOrImage <?> "link"
 
-explicitLink :: OrgParser Inlines
-explicitLink = try $ do
+explicitOrImageLink :: OrgParser Inlines
+explicitOrImageLink = try $ do
   char '['
-  src   <- enclosedRaw     (char '[') (char ']')
-  title <- enclosedInlines (char '[') (char ']')
+  src    <- enclosedRaw (char '[') (char ']')
+  title  <- enclosedRaw (char '[') (char ']')
+  title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n")
   char ']'
-  return $ B.link src "" title
+  return $ if (isImage src) && (isImage title)
+           then B.link src "" (B.image title "" "")
+           else B.link src "" title'
+ where butLast = reverse . tail . reverse
 
-selfLink :: OrgParser Inlines
-selfLink = try $ do
+selflinkOrImage :: OrgParser Inlines
+selflinkOrImage = try $ do
   src <- enclosedRaw (string "[[") (string "]]")
-  return $ B.link src "" (B.str src)
+  return $ if isImage src
+           then B.image src "" ""
+           else B.link src "" (B.str src)
 
 emph      :: OrgParser Inlines
 emph      = B.emph         <$> inlinesEnclosedBy '/'
@@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
+
+isImage filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  ':' `notElem` filename
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 9091d9c74..1088d6611 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -94,14 +94,22 @@ tests =
                        , (strong ("is" <> space <> "not"))
                        , "emph/" ])
 
+      , "Image" =:
+          "[[./sunset.jpg]]" =?>
+          (para $ image "./sunset.jpg" "" "")
+
       , "Explicit link" =:
-          "[[http://zeitlens.com/][pseudo-random nonsense]]" =?>
+          "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?>
           (para $ link "http://zeitlens.com/" ""
-                       ("pseudo-random" <> space <> "nonsense"))
+                       ("pseudo-random" <> space <> emph "nonsense"))
 
       , "Self-link" =:
           "[[http://zeitlens.com/]]" =?>
           (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/")
+
+      , "Image link" =:
+          "[[sunset.png][dusk.svg]]" =?>
+          (para $ link "sunset.png" "" (image "dusk.svg" "" ""))
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3


From f2deb9d86d864c79c5ca18205b4ca565b7199413 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 15:50:46 -0700
Subject: Org reader:  Added type signature.

---
 src/Text/Pandoc/Readers/Org.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 8b1b4fa23..5ad2531ac 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -613,6 +613,7 @@ endsOnThisLine input c doOnOtherLines = do
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
 
+isImage :: String -> Bool
 isImage filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
   any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-- 
cgit v1.2.3


From 4ebf6f6ebf7d679252ade08203ec13e3e92c2db5 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 19:09:33 +0200
Subject: Org reader: Minor code clean-up

---
 src/Text/Pandoc/Readers/Org.hs | 51 +++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5ad2531ac..6652925aa 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -268,8 +268,12 @@ data OrgTable = OrgTable
 table :: OrgParser Blocks
 table = try $ do
   lookAhead tableStart
-  OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows
-  return $ B.table "" (zip aligns $ repeat 0) heads lns
+  orgToPandocTable . normalizeTable . rowsToTable <$> tableRows
+
+orgToPandocTable :: OrgTable
+                 -> Blocks
+orgToPandocTable (OrgTable _ aligns heads lns) =
+  B.table "" (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
@@ -403,20 +407,14 @@ orderedListStart = genericListStart orderedListMarker
   -- Ordered list markers allowed in org-mode
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
+-- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
          -> OrgParser Blocks
 listItem start = try $ do
-  (markerLength, first) <- try (start >>= rawListItem)
-  rest <- many (listContinuation markerLength)
-  parseFromString parseBlocks $ concat (first:rest)
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: Int
-            -> OrgParser (Int, String)
-rawListItem markerLength = try $ do
-  firstLine <- anyLine
-  restLines <- many (listLine markerLength)
-  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+  markerLength <- try start
+  firstLine <- anyLineNewline
+  rest <- concat <$> many (listContinuation markerLength)
+  parseFromString parseBlocks $ firstLine ++ rest
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
@@ -424,14 +422,11 @@ listContinuation :: Int
                  -> OrgParser String
 listContinuation markerLength = try $
   mappend <$> many blankline
-          <*> (concat <$> many1 (listLine markerLength))
+          <*> (concat <$> many1 listLine)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
 
--- parse a line of a list item
-listLine :: Int
-         -> OrgParser String
-listLine markerLength = try $
-  indentWith markerLength *> anyLine
-    <**> pure (++ "\n")
+anyLineNewline :: OrgParser String
+anyLineNewline = (++ "\n") <$> anyLine
 
 
 --
@@ -491,12 +486,11 @@ explicitOrImageLink = try $ do
   char '['
   src    <- enclosedRaw (char '[') (char ']')
   title  <- enclosedRaw (char '[') (char ']')
-  title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n")
+  title' <- parseFromString (mconcat <$> many inline) title
   char ']'
   return $ if (isImage src) && (isImage title)
            then B.link src "" (B.image title "" "")
            else B.link src "" title'
- where butLast = reverse . tail . reverse
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
@@ -552,11 +546,8 @@ inlinesEnclosedBy c = try $ do
   updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
   res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
                          (atEnd $ char c)
-  updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st }
+  updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st }
   return res
- where shift xs
-           | null xs   = []
-           | otherwise = tail xs
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -583,14 +574,13 @@ atStart p = do
 atEnd :: OrgParser a -> OrgParser a
 atEnd p = try $ do
   p <* lookingAtEndOfWord
- where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars
+ where lookingAtEndOfWord =
+           eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
 
 postWordChars :: OrgParser [Char]
 postWordChars = do
   st <- getState
-  return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st)
- where safeSecond (_:x2:_) = [x2]
-       safeSecond _        = []
+  return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st)
 
 -- FIXME: These functions are hacks and should be replaced
 endsOnThisOrNextLine :: Char
@@ -608,6 +598,7 @@ endsOnThisLine input c doOnOtherLines = do
   postWordChars' <- postWordChars
   case break (`elem` c:"\n") input of
     (_,'\n':rest)    -> doOnOtherLines rest
+    (_,_:[])         -> return ()
     (_,_:rest@(n:_)) -> if n `elem` postWordChars'
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
-- 
cgit v1.2.3


From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 14:49:57 +0200
Subject: Org reader: Add support for definition lists

---
 src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++-
 tests/Tests/Readers/Org.hs     | 26 +++++++++++++++++++++++---
 2 files changed, 39 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 6652925aa..20bca3e28 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline
 --
 
 list :: OrgParser Blocks
-list = choice [ bulletList, orderedList ] <?> "list"
+list = choice [ definitionList, bulletList, orderedList ] <?> "list"
+
+definitionList :: OrgParser Blocks
+definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart)
 
 bulletList :: OrgParser Blocks
 bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
@@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker
   -- Ordered list markers allowed in org-mode
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
+definitionListItem :: OrgParser Int
+                   -> OrgParser (Inlines, [Blocks])
+definitionListItem parseMarkerGetLength = try $ do
+  markerLength <- parseMarkerGetLength
+  term <- manyTill (noneOf "\n\r") (try $ string "::")
+  first <- anyLineNewline
+  cont <- concat <$> many (listContinuation markerLength)
+  term' <- parseFromString inline term
+  contents' <- parseFromString parseBlocks $ first ++ cont
+  return (term', [contents'])
+
+
 -- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
          -> OrgParser Blocks
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 1088d6611..eb9f4d741 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -43,8 +43,8 @@ tests =
           para (strong "Cider")
 
       , "Strong Emphasis" =:
-        "/*strength*/" =?>
-        para (emph . strong $ "strength")
+          "/*strength*/" =?>
+          para (emph . strong $ "strength")
 
       , "Strikeout" =:
           "+Kill Bill+" =?>
@@ -428,7 +428,27 @@ tests =
       , "Bullet List in Ordered List" =:
           ("1. GNU\n" ++
            "   - Freedom\n") =?>
-            orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ]
+          orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ]
+
+      , "Definition List" =:
+          unlines [ "- PLL :: phase-locked loop"
+                  , "- TTL ::"
+                  , "  transistor-transistor logic"
+                  , "- PSK::phase-shift keying"
+                  , ""
+                  , "  a digital modulation scheme"
+                  ] =?>
+          definitionList [ ("PLL", [ plain $ "phase-locked" <> space <> "loop" ])
+                         , ("TTL", [ plain $ "transistor-transistor" <> space <>
+                                               "logic" ])
+                         , ("PSK", [ mconcat
+                                     [ para  $ "phase-shift" <> space <> "keying"
+                                     , plain $ spcSep [ "a", "digital"
+                                                      , "modulation", "scheme" ]
+                                     ]
+                                   ]
+                                   )
+                         ]
       ]
 
   , testGroup "Tables"
-- 
cgit v1.2.3


From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 7 Apr 2014 11:00:30 +0200
Subject: Org reader: Support inline math (like $E=mc^2$)

Closes #1223.
---
 src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------
 tests/Tests/Readers/Org.hs     |  4 ++++
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 20bca3e28..2bb6ee122 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -459,6 +459,7 @@ inline = choice inlineParsers <?> "inline"
                         , strikeout
                         , underline
                         , code
+                        , math
                         , verbatim
                         , subscript
                         , superscript
@@ -530,10 +531,13 @@ underline = B.strong       <$> inlinesEnclosedBy '_'
 code      :: OrgParser Inlines
 code      = B.code         <$> rawEnclosedBy '='
 
-verbatim  ::  OrgParser Inlines
+math      :: OrgParser Inlines
+math      = B.math         <$> rawEnclosedBy '$'
+
+verbatim  :: OrgParser Inlines
 verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
 
-subscript ::  OrgParser Inlines
+subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
 
 superscript ::  OrgParser Inlines
@@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
 -- succeeds only if we're not right after a str (ie. in middle of word)
 atStart :: OrgParser a -> OrgParser a
 atStart p = do
-  pos <- getPosition
-  st <- getState
-  guard $ orgLastStrPos st /= Just pos
+  guard =<< not <$> isRightAfterString
   p
 
 -- | succeeds only if we're at the end of a word
 atEnd :: OrgParser a -> OrgParser a
 atEnd p = try $ do
-  p <* lookingAtEndOfWord
+ p <* lookingAtEndOfWord
  where lookingAtEndOfWord =
            eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
 
+isRightAfterString :: OrgParser Bool
+isRightAfterString = do
+  pos <- getPosition
+  st <- getState
+  -- the position `Nothing` isn't after a String, either, hence the double
+  -- negation
+  return $ not $ orgLastStrPos st /= Just pos
+
 postWordChars :: OrgParser [Char]
 postWordChars = do
   st <- getState
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index eb9f4d741..77b9d9327 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -54,6 +54,10 @@ tests =
           "=Robot.rock()=" =?>
           para (code "Robot.rock()")
 
+      , "Math" =:
+          "$E=mc^2$" =?>
+           para (math "E=mc^2")
+
       , "Verbatim" =:
           "~word for word~" =?>
           para (rawInline "" "word for word")
-- 
cgit v1.2.3


From 030020236c85c736892a6f8e0dcefca1681e5ce0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 8 Apr 2014 22:39:25 +0200
Subject: Org reader: Precise rules for the recognition of markup

The inline parsers have been rewritten using the org source code as a
reference. This fixes a couple of bugs related to erroneous markup
recognition.
---
 src/Text/Pandoc/Readers/Org.hs | 380 ++++++++++++++++++++++++++++-------------
 tests/Tests/Readers/Org.hs     |  25 ++-
 2 files changed, 283 insertions(+), 122 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2bb6ee122..392b17bbc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
-import           Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
+import qualified Text.Pandoc.Parsing as P
+import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
-import           Control.Monad (guard, mzero)
+import           Control.Monad (guard, when)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (foldl', isPrefixOf, isSuffixOf)
@@ -47,49 +48,100 @@ import           Data.Monoid (mconcat, mempty, mappend)
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
+readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
+parseOrg:: OrgParser Pandoc
+parseOrg = do
+  blocks' <- B.toList <$> parseBlocks
+  st <- getState
+  let meta = orgStateMeta st
+  return $ Pandoc meta $ filter (/= Null) blocks'
+
+--
+-- Parser State for Org
+--
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
-                      { orgOptions          :: ReaderOptions
-                      , orgInlineCharStack  :: [Char]
-                      , orgLastStrPos       :: Maybe SourcePos
-                      , orgMeta             :: Meta
+                      { orgStateOptions              :: ReaderOptions
+                      , orgStateEmphasisCharStack    :: [Char]
+                      , orgStateEmphasisNewlines     :: Maybe Int
+                      , orgStateLastForbiddenCharPos :: Maybe SourcePos
+                      , orgStateLastPreCharPos       :: Maybe SourcePos
+                      , orgStateLastStrPos           :: Maybe SourcePos
+                      , orgStateMeta                 :: Meta
                       } deriving (Show)
 
 instance HasReaderOptions OrgParserState where
-  extractReaderOptions = orgOptions
+  extractReaderOptions = orgStateOptions
 
 instance HasMeta OrgParserState where
   setMeta field val st =
-    st{ orgMeta = setMeta field val $ orgMeta st }
+    st{ orgStateMeta = setMeta field val $ orgStateMeta st }
   deleteMeta field st =
-    st{ orgMeta = deleteMeta field $ orgMeta st }
+    st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 
 instance Default OrgParserState where
   def = defaultOrgParserState
 
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
-                        { orgOptions = def
-                        , orgInlineCharStack = []
-                        , orgLastStrPos = Nothing
-                        , orgMeta = nullMeta
+                        { orgStateOptions = def
+                        , orgStateEmphasisCharStack = []
+                        , orgStateEmphasisNewlines = Nothing
+                        , orgStateLastForbiddenCharPos = Nothing
+                        , orgStateLastPreCharPos = Nothing
+                        , orgStateLastStrPos = Nothing
+                        , orgStateMeta = nullMeta
                         }
 
 updateLastStrPos :: OrgParser ()
 updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgLastStrPos = Just p }
+  updateState $ \s -> s{ orgStateLastStrPos = Just p }
 
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
 
-parseOrg:: OrgParser Pandoc
-parseOrg = do
-  blocks' <- B.toList <$> parseBlocks
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+
+pushToInlineCharStack :: Char -> OrgParser ()
+pushToInlineCharStack c = updateState $ \st ->
+  st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
+
+popInlineCharStack :: OrgParser ()
+popInlineCharStack = updateState $ \st ->
+  st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
+
+surroundingEmphasisChar :: OrgParser [Char]
+surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+
+startEmphasisNewlinesCounting :: Int -> OrgParser ()
+startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
+  s { orgStateEmphasisNewlines = Just maxNewlines }
+
+decEmphasisNewlinesCount :: OrgParser ()
+decEmphasisNewlinesCount = updateState $ \s ->
+      s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+
+newlinesCountWithinLimits :: OrgParser Bool
+newlinesCountWithinLimits = do
   st <- getState
-  let meta = orgMeta st
-  return $ Pandoc meta $ filter (/= Null) blocks'
+  return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
+
+resetEmphasisNewlines :: OrgParser ()
+resetEmphasisNewlines = updateState $ \s ->
+  s{ orgStateEmphasisNewlines = Nothing }
+
+newline :: OrgParser Char
+newline =
+  P.newline
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
 
 --
 -- parsing blocks
@@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
   meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { orgMeta  = orgMeta st <> meta' }
+  updateState $ \st -> st { orgStateMeta  = orgStateMeta st <> meta' }
   return mempty
 
 metaValue :: OrgParser MetaValue
@@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine
 --
 
 inline :: OrgParser Inlines
-inline = choice inlineParsers <?> "inline"
-  where inlineParsers = [ whitespace
-                        , link
-                        , str
-                        , endline
-                        , emph
-                        , strong
-                        , strikeout
-                        , underline
-                        , code
-                        , math
-                        , verbatim
-                        , subscript
-                        , superscript
-                        , symbol
-                        ]
+inline =
+  choice [ whitespace
+         , link
+         , str
+         , endline
+         , emph
+         , strong
+         , strikeout
+         , underline
+         , code
+         , math
+         , verbatim
+         , subscript
+         , superscript
+         , symbol
+         ] <* (guard =<< newlinesCountWithinLimits)
+  <?> "inline"
+
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
@@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
 
 
 whitespace :: OrgParser Inlines
-whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+whitespace = B.space <$ skipMany1 spaceChar
+                     <* updateLastPreCharPos
+                     <* updateLastForbiddenCharPos
+             <?> "whitespace"
 
 str :: OrgParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
@@ -492,6 +549,9 @@ endline = try $ do
   notFollowedBy' commentLineStart
   notFollowedBy' bulletListStart
   notFollowedBy' orderedListStart
+  decEmphasisNewlinesCount
+  guard =<< newlinesCountWithinLimits
+  updateLastPreCharPos
   return B.space
 
 link :: OrgParser Inlines
@@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage <?> "link"
 explicitOrImageLink :: OrgParser Inlines
 explicitOrImageLink = try $ do
   char '['
-  src    <- enclosedRaw (char '[') (char ']')
+  src    <- linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if (isImage src) && (isImage title)
+  return $ if (isImageFilename src) && (isImageFilename title)
            then B.link src "" (B.image title "" "")
            else B.link src "" title'
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
-  src <- enclosedRaw (string "[[") (string "]]")
-  return $ if isImage src
+  src <- (char '[') *> linkTarget <* char ']'
+  return $ if isImageFilename src
            then B.image src "" ""
            else B.link src "" (B.str src)
 
+linkTarget :: OrgParser String
+linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
+
+isImageFilename :: String -> Bool
+isImageFilename filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  ':' `notElem` filename
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
+
 emph      :: OrgParser Inlines
-emph      = B.emph         <$> inlinesEnclosedBy '/'
+emph      = B.emph         <$> emphasisBetween '/'
 
 strong    :: OrgParser Inlines
-strong    = B.strong       <$> inlinesEnclosedBy '*'
+strong    = B.strong       <$> emphasisBetween '*'
 
 strikeout :: OrgParser Inlines
-strikeout = B.strikeout    <$> inlinesEnclosedBy '+'
+strikeout = B.strikeout    <$> emphasisBetween '+'
 
 -- There is no underline, so we use strong instead.
 underline :: OrgParser Inlines
-underline = B.strong       <$> inlinesEnclosedBy '_'
+underline = B.strong       <$> emphasisBetween '_'
 
 code      :: OrgParser Inlines
-code      = B.code         <$> rawEnclosedBy '='
-
-math      :: OrgParser Inlines
-math      = B.math         <$> rawEnclosedBy '$'
+code      = B.code         <$> verbatimBetween '='
 
 verbatim  :: OrgParser Inlines
-verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
+verbatim  = B.rawInline "" <$> verbatimBetween '~'
+
+math      :: OrgParser Inlines
+math      = B.math         <$> mathStringBetween '$'
 
 subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
@@ -550,7 +622,72 @@ maybeGroupedByBraces = try $
          ]
 
 symbol :: OrgParser Inlines
-symbol = B.str . (: "") <$> oneOf specialChars
+symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+ where updatePositions c
+           | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
+           | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
+           | otherwise = return c
+
+emphasisBetween :: Char
+                -> OrgParser Inlines
+emphasisBetween c = try $ do
+  startEmphasisNewlinesCounting emphasisAllowedNewlines
+  res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
+  isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
+  when isTopLevelEmphasis
+       resetEmphasisNewlines
+  return res
+
+verbatimBetween :: Char
+                -> OrgParser String
+verbatimBetween c = try $
+  emphasisStart c *>
+  many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
+
+-- | Parses a raw string delimited by @c@ using Org's math rules
+mathStringBetween :: Char
+                  -> OrgParser String
+mathStringBetween c = try $ do
+  mathStart c
+  body <- many1TillNOrLessNewlines mathAllowedNewlines
+                                   (noneOf (c:"\n\r"))
+                                   (lookAhead $ mathEnd c)
+  final <- mathEnd c
+  return $ body ++ [final]
+
+-- | Parses the start (opening character) of emphasis
+emphasisStart :: Char -> OrgParser Char
+emphasisStart c = try $ do
+  guard =<< afterEmphasisPreChar
+  guard =<< notAfterString
+  char c
+  lookAhead (noneOf emphasisForbiddenBorderChars)
+  pushToInlineCharStack c
+  return c
+
+-- | Parses the closing character of emphasis
+emphasisEnd :: Char -> OrgParser Char
+emphasisEnd c = try $ do
+  guard =<< notAfterForbiddenBorderChar
+  char c
+  eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
+                         oneOf (x ++ emphasisPostChars))
+          *> return ()
+  updateLastStrPos
+  popInlineCharStack
+  return c
+
+mathStart :: Char -> OrgParser Char
+mathStart c = try $ do
+  char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
+
+mathEnd :: Char -> OrgParser Char
+mathEnd c = try $ do
+  res <- noneOf (c:mathForbiddenBorderChars)
+  char c
+  eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
+  return res
+
 
 enclosedInlines :: OrgParser a
                 -> OrgParser b
@@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a
 enclosedInlines start end = try $
   trimInlines . mconcat <$> enclosed start end inline
 
--- FIXME: This is a hack
-inlinesEnclosedBy :: Char
-                  -> OrgParser Inlines
-inlinesEnclosedBy c = try $ do
-  updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
-  res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
-                         (atEnd $ char c)
-  updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st }
-  return res
-
 enclosedRaw :: OrgParser a
             -> OrgParser b
             -> OrgParser String
@@ -577,63 +704,76 @@ enclosedRaw start end = try $
        spanningTwoLines = try $
          anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
 
-rawEnclosedBy :: Char
-              -> OrgParser String
-rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
-
--- succeeds only if we're not right after a str (ie. in middle of word)
-atStart :: OrgParser a -> OrgParser a
-atStart p = do
-  guard =<< not <$> isRightAfterString
-  p
-
--- | succeeds only if we're at the end of a word
-atEnd :: OrgParser a -> OrgParser a
-atEnd p = try $ do
- p <* lookingAtEndOfWord
- where lookingAtEndOfWord =
-           eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
-
-isRightAfterString :: OrgParser Bool
-isRightAfterString = do
+-- | Like many1Till, but parses at most @n+1@ lines.  @p@ must not consume
+--   newlines.
+many1TillNOrLessNewlines :: Int
+                         -> OrgParser Char
+                         -> OrgParser a
+                         -> OrgParser String
+many1TillNOrLessNewlines n p end = try $
+  nMoreLines (Just n) mempty >>= oneOrMore
+ where
+   nMoreLines Nothing  cs = return cs
+   nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
+   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
+                                  >>= uncurry nMoreLines
+   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> (try finalLine)
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
+   finalLine = try $ manyTill p end
+   minus1 k = k - 1
+   oneOrMore cs = guard (not $ null cs) *> return cs
+
+-- Org allows customization of the way it reads emphasis.  We use the defaults
+-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
+-- for details).
+
+-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
+emphasisPreChars :: [Char]
+emphasisPreChars = "\t \"'({"
+
+-- | Chars allowed at after emphasis
+emphasisPostChars :: [Char]
+emphasisPostChars = "\t\n !\"'),-.:;?\\}"
+
+-- | Chars not allowed at the (inner) border of emphasis
+emphasisForbiddenBorderChars :: [Char]
+emphasisForbiddenBorderChars = "\t\n\r \"',"
+
+-- | The maximum number of newlines within
+emphasisAllowedNewlines :: Int
+emphasisAllowedNewlines = 1
+
+-- LaTeX-style math: see `org-latex-regexps` for details
+
+-- | Chars allowed after an inline ($...$) math statement
+mathPostChars :: [Char]
+mathPostChars = "\t\n \"',-.:;?"
+
+-- | Chars not allowed at the (inner) border of math
+mathForbiddenBorderChars :: [Char]
+mathForbiddenBorderChars = "\t\n\r ,;.$"
+
+-- | Maximum number of newlines in an inline math statement
+mathAllowedNewlines :: Int
+mathAllowedNewlines = 2
+
+-- | Whether we are right behind a char allowed before emphasis
+afterEmphasisPreChar :: OrgParser Bool
+afterEmphasisPreChar = do
   pos <- getPosition
-  st <- getState
-  -- the position `Nothing` isn't after a String, either, hence the double
-  -- negation
-  return $ not $ orgLastStrPos st /= Just pos
+  lastPrePos <- orgStateLastPreCharPos <$> getState
+  return $ lastPrePos == Nothing || lastPrePos == Just pos
 
-postWordChars :: OrgParser [Char]
-postWordChars = do
-  st <- getState
-  return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st)
-
--- FIXME: These functions are hacks and should be replaced
-endsOnThisOrNextLine :: Char
-                     -> OrgParser ()
-endsOnThisOrNextLine c = do
-  inp <- getInput
-  let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
-  endsOnThisLine inp c doOtherwise
-
-endsOnThisLine :: [Char]
-               -> Char
-               -> ([Char] -> OrgParser ())
-               -> OrgParser ()
-endsOnThisLine input c doOnOtherLines = do
-  postWordChars' <- postWordChars
-  case break (`elem` c:"\n") input of
-    (_,'\n':rest)    -> doOnOtherLines rest
-    (_,_:[])         -> return ()
-    (_,_:rest@(n:_)) -> if n `elem` postWordChars'
-                        then return ()
-                        else endsOnThisLine rest c doOnOtherLines
-    _                -> mzero
-
-isImage :: String -> Bool
-isImage filename =
-  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-  ':' `notElem` filename
- where
-   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
-   protocols = [ "file", "http", "https" ]
+-- | Whether we are right after the end of a string
+notAfterString :: OrgParser Bool
+notAfterString = do
+  pos <- getPosition
+  lastStrPos <- orgStateLastStrPos <$> getState
+  return $ lastStrPos /= Just pos
+
+-- | Whether the parser is right after a forbidden border char
+notAfterForbiddenBorderChar :: OrgParser Bool
+notAfterForbiddenBorderChar = do
+  pos <- getPosition
+  lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
+  return $ lastFBCPos /= Just pos
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 77b9d9327..efd8fe977 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -86,16 +86,37 @@ tests =
           unlines [ "this+that+ +so+on"
                   , "seven*eight* nine*"
                   , "+not+funny+"
+                  , "this == self"
                   ] =?>
           para (spcSep [ "this+that+", "+so+on"
                        , "seven*eight*", "nine*"
                        , strikeout "not+funny"
+                       , "this" <> space <> "==" <> space <> "self"
                        ])
 
+      , "Adherence to Org's rules for markup borders" =:
+          "/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
+          para (spcSep [ emph $ "t/&" <> space <> "a"
+                       , "/"
+                       , "./r/"
+                       , "(" <> (strong "l") <> ")"
+                       , (emph "e") <> "!"
+                       , (emph "b") <> "."
+                       ])
+
+      , "Inline math must stay within three lines" =:
+          unlines [ "$a", "b", "c$", "$d", "e", "f", "g$" ] =?>
+          para ((math "a\nb\nc") <> space <>
+                spcSep [ "$d", "e", "f", "g$" ])
+
       , "Markup may not span more than two lines" =:
-          unlines [ "/this *is", "not*", "emph/" ] =?>
+          unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?>
           para (spcSep [ "/this"
-                       , (strong ("is" <> space <> "not"))
+                       , (strong (spcSep
+                                  [ "is"
+                                  , (strikeout ("totally" <> space <> "nice"))
+                                  , "not"
+                                  ]))
                        , "emph/" ])
 
       , "Image" =:
-- 
cgit v1.2.3


From 1715d7cee0b9388ac77b8b2a31fcbb00ead80adf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 10 Apr 2014 15:11:03 +0200
Subject: Org reader: Support more inline/display math variants

Support all of the following variants as valid ways to define inline or
display math inlines:

  - `\[..\]` (display)
  - `$$..$$` (display)
  - `\(..\)` (inline)
  - `$..$`   (inline)

This closes #1223.  Again.
---
 src/Text/Pandoc/Readers/Org.hs | 28 ++++++++++++++++++++++++++--
 tests/Tests/Readers/Org.hs     | 36 ++++++++++++++++++++++++++++++------
 2 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 392b17bbc..1d0400d96 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -512,6 +512,7 @@ inline =
          , underline
          , code
          , math
+         , displayMath
          , verbatim
          , subscript
          , superscript
@@ -607,7 +608,15 @@ verbatim  :: OrgParser Inlines
 verbatim  = B.rawInline "" <$> verbatimBetween '~'
 
 math      :: OrgParser Inlines
-math      = B.math         <$> mathStringBetween '$'
+math      = B.math         <$> choice [ math1CharBetween '$'
+                                      , mathStringBetween '$'
+                                      , rawMathBetween "\\(" "\\)"
+                                      ]
+
+displayMath :: OrgParser Inlines
+displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+                                       , rawMathBetween "$$"  "$$"
+                                       ]
 
 subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
@@ -655,6 +664,21 @@ mathStringBetween c = try $ do
   final <- mathEnd c
   return $ body ++ [final]
 
+-- | Parse a single character between @c@ using math rules
+math1CharBetween :: Char
+                -> OrgParser String
+math1CharBetween c = try $ do
+  char c
+  res <- noneOf $ c:mathForbiddenBorderChars
+  char c
+  eof <|> lookAhead (oneOf mathPostChars) *> return ()
+  return [res]
+
+rawMathBetween :: String
+               -> String
+               -> OrgParser String
+rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+
 -- | Parses the start (opening character) of emphasis
 emphasisStart :: Char -> OrgParser Char
 emphasisStart c = try $ do
@@ -747,7 +771,7 @@ emphasisAllowedNewlines = 1
 
 -- | Chars allowed after an inline ($...$) math statement
 mathPostChars :: [Char]
-mathPostChars = "\t\n \"',-.:;?"
+mathPostChars = "\t\n \"'),-.:;?"
 
 -- | Chars not allowed at the (inner) border of math
 mathForbiddenBorderChars :: [Char]
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index efd8fe977..9e9482e45 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -54,14 +54,26 @@ tests =
           "=Robot.rock()=" =?>
           para (code "Robot.rock()")
 
-      , "Math" =:
-          "$E=mc^2$" =?>
-           para (math "E=mc^2")
-
       , "Verbatim" =:
           "~word for word~" =?>
           para (rawInline "" "word for word")
 
+      , "Math $..$" =:
+          "$E=mc^2$" =?>
+           para (math "E=mc^2")
+
+      , "Math $$..$$" =:
+          "$$E=mc^2$$" =?>
+          para (displayMath "E=mc^2")
+
+      , "Math \\[..\\]" =:
+          "\\[E=ℎν\\]" =?>
+          para (displayMath "E=ℎν")
+
+      , "Math \\(..\\)" =:
+          "\\(σ_x σ_p ≥ \\frac{ℏ}{2}\\)" =?>
+          para (math "σ_x σ_p ≥ \\frac{ℏ}{2}")
+
       , "Symbol" =:
           "A * symbol" =?>
           para (str "A" <> space <> str "*" <> space <> "symbol")
@@ -86,14 +98,19 @@ tests =
           unlines [ "this+that+ +so+on"
                   , "seven*eight* nine*"
                   , "+not+funny+"
-                  , "this == self"
                   ] =?>
           para (spcSep [ "this+that+", "+so+on"
                        , "seven*eight*", "nine*"
                        , strikeout "not+funny"
-                       , "this" <> space <> "==" <> space <> "self"
                        ])
 
+      , "No empty markup" =:
+          -- FIXME: __ is erroneously parsed as subscript "_"
+          -- "// ** __ ++ == ~~ $$" =?>
+          -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
+          "// ** ++ == ~~ $$" =?>
+          para (spcSep [ "//", "**", "++", "==", "~~", "$$" ])
+
       , "Adherence to Org's rules for markup borders" =:
           "/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
           para (spcSep [ emph $ "t/&" <> space <> "a"
@@ -109,6 +126,13 @@ tests =
           para ((math "a\nb\nc") <> space <>
                 spcSep [ "$d", "e", "f", "g$" ])
 
+      , "Single-character math" =:
+          "$a$ $b$! $c$?" =?>
+          para (spcSep [ math "a"
+                       , "$b$!"
+                       , (math "c") <> "?"
+                       ])
+
       , "Markup may not span more than two lines" =:
           unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?>
           para (spcSep [ "/this"
-- 
cgit v1.2.3


From ace8837cd691b17e994b41dcb797de6ca1940136 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 10 Apr 2014 17:25:05 +0200
Subject: Org reader: Improve code by following HLint recommendations

HLint's recommendations for better code are applied to the Org-mode
reader code.
---
 src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1d0400d96..29611e8cc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,11 +44,14 @@ import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
+-- Ignore HLint warnings to use String instead of [Char]
+{-# ANN module ("HLint: ignore Use String" :: String) #-}
+
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
+readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
@@ -111,7 +114,7 @@ updateLastPreCharPos = getPosition >>= \p ->
 
 pushToInlineCharStack :: Char -> OrgParser ()
 pushToInlineCharStack c = updateState $ \st ->
-  st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
+  st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
 
 popInlineCharStack :: OrgParser ()
 popInlineCharStack = updateState $ \st ->
@@ -176,7 +179,7 @@ orgBlock = try $ do
     "comment" -> return mempty
     "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
     _         -> B.divWith ("", [blockType], [])
-                            <$> (parseFromString parseBlocks blockStr)
+                 <$> parseFromString parseBlocks blockStr
 
 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@@ -199,7 +202,7 @@ rawBlockContent indent blockType =
 indentWith :: Int -> OrgParser String
 indentWith num = do
   tabStop <- getOption readerTabStop
-  if (num < tabStop)
+  if num < tabStop
      then count num (char ' ')
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
@@ -242,7 +245,7 @@ drawerStart = try $
                           <|> stringAnyCase "LOGBOOK"
 
 drawerLine :: OrgParser String
-drawerLine = try $ anyLine
+drawerLine = try anyLine
 
 drawerEnd :: OrgParser String
 drawerEnd = try $
@@ -276,7 +279,7 @@ declarationLine = try $ do
 metaValue :: OrgParser MetaValue
 metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
 
-metaKey :: OrgParser [Char]
+metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
@@ -350,7 +353,7 @@ tableAlignRow = try $
 
 tableAlignCell :: OrgParser Alignment
 tableAlignCell =
-  choice [ try $ emptyCell *> return (AlignDefault)
+  choice [ try $ emptyCell *> return AlignDefault
          , try $ skipSpaces
                    *> char '<'
                    *> tableAlignFromChar
@@ -381,8 +384,8 @@ normalizeTable (OrgTable cols aligns heads lns) =
   let aligns' = fillColumns aligns AlignDefault
       heads'  = if heads == mempty
                 then mempty
-                else fillColumns heads  (B.plain mempty)
-      lns'    = map (flip fillColumns (B.plain mempty)) lns
+                else fillColumns heads (B.plain mempty)
+      lns'    = map (`fillColumns` B.plain mempty) lns
       fillColumns base padding = take cols $ base ++ repeat padding
   in OrgTable cols aligns' heads' lns'
 
@@ -565,13 +568,14 @@ explicitOrImageLink = try $ do
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if (isImageFilename src) && (isImageFilename title)
-           then B.link src "" (B.image title "" "")
-           else B.link src "" title'
+  return . B.link src ""
+         $ if isImageFilename src && isImageFilename title
+           then B.image title "" ""
+           else title'
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
-  src <- (char '[') *> linkTarget <* char ']'
+  src <- char '[' *> linkTarget <* char ']'
   return $ if isImageFilename src
            then B.image src "" ""
            else B.link src "" (B.str src)
@@ -619,10 +623,10 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
                                        ]
 
 subscript :: OrgParser Inlines
-subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
+subscript = B.subscript    <$> try (char '_' *> maybeGroupedByBraces)
 
 superscript ::  OrgParser Inlines
-superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
+superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
 
 maybeGroupedByBraces :: OrgParser Inlines
 maybeGroupedByBraces = try $
@@ -702,14 +706,14 @@ emphasisEnd c = try $ do
   return c
 
 mathStart :: Char -> OrgParser Char
-mathStart c = try $ do
+mathStart c = try $
   char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
 
 mathEnd :: Char -> OrgParser Char
 mathEnd c = try $ do
   res <- noneOf (c:mathForbiddenBorderChars)
   char c
-  eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
+  eof <|> lookAhead (oneOf mathPostChars *> pure ())
   return res
 
 
@@ -741,8 +745,8 @@ many1TillNOrLessNewlines n p end = try $
    nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
    nMoreLines k        cs = try $ (final k cs <|> rest k cs)
                                   >>= uncurry nMoreLines
-   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> (try finalLine)
-   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
+   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
    finalLine = try $ manyTill p end
    minus1 k = k - 1
    oneOrMore cs = guard (not $ null cs) *> return cs
@@ -786,7 +790,7 @@ afterEmphasisPreChar :: OrgParser Bool
 afterEmphasisPreChar = do
   pos <- getPosition
   lastPrePos <- orgStateLastPreCharPos <$> getState
-  return $ lastPrePos == Nothing || lastPrePos == Just pos
+  return . fromMaybe True $ (== pos) <$> lastPrePos
 
 -- | Whether we are right after the end of a string
 notAfterString :: OrgParser Bool
-- 
cgit v1.2.3


From 6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 11 Apr 2014 11:05:42 +0200
Subject: Org reader: Fix parsing of sub-/superscript expressions

This fixes the org-reader's handling of sub- and superscript
expressions.  Simple expressions (like `2^+10`), expressions in
parentheses (`a_(n+1)`) and nested sexp (like `a_(nested()parens)`) are
now read correctly.
---
 src/Text/Pandoc/Readers/Org.hs | 47 +++++++++++++++++++++++++++++++++---------
 tests/Tests/Readers/Org.hs     | 39 +++++++++++++++++++++++++++--------
 2 files changed, 67 insertions(+), 19 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 29611e8cc..ceac69367 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
                                        , rawMathBetween "$$"  "$$"
                                        ]
 
-subscript :: OrgParser Inlines
-subscript = B.subscript    <$> try (char '_' *> maybeGroupedByBraces)
+subscript   :: OrgParser Inlines
+subscript   = B.subscript   <$> try (char '_' *> subOrSuperExpr)
 
-superscript ::  OrgParser Inlines
-superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
-
-maybeGroupedByBraces :: OrgParser Inlines
-maybeGroupedByBraces = try $
-  choice [ try $ enclosedInlines (char '{') (char '}')
-         , B.str . (:"") <$> anyChar
-         ]
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
 
 symbol :: OrgParser Inlines
 symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
@@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do
   pos <- getPosition
   lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
   return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: OrgParser Inlines
+subOrSuperExpr = try $ do
+  choice [ balancedSexp '{' '}'
+         , balancedSexp '(' ')' >>= return . enclosing ('(', ')')
+         , simpleSubOrSuperString
+         ] >>= parseFromString (mconcat <$> many inline)
+
+-- | Read a balanced sexp
+balancedSexp :: Char
+             -> Char
+             -> OrgParser String
+balancedSexp l r = try $ do
+  char l
+  res <- concat <$> many (  many1 (noneOf ([l, r] ++ "\n\r"))
+                        <|> try (string [l, r])
+                        <|> enclosing (l, r) <$> balancedSexp l r
+                         )
+  char r
+  return res
+
+simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString = try $
+  choice [ string "*"
+         , mappend <$> option [] ((:[]) <$> oneOf "+-")
+                   <*> many1 alphaNum
+         ]
+
+enclosing :: (a, a)
+          -> [a]
+          -> [a]
+enclosing (left, right) s = left : s ++ [right]
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 9e9482e45..49130f0ab 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -78,15 +78,15 @@ tests =
           "A * symbol" =?>
           para (str "A" <> space <> str "*" <> space <> "symbol")
 
-      , "Superscript single char" =:
-          "2^n" =?>
-          para (str "2" <> superscript "n")
+      , "Superscript simple expression" =:
+          "2^-λ" =?>
+          para (str "2" <> superscript "-λ")
 
       , "Superscript multi char" =:
           "2^{n-1}" =?>
           para (str "2" <> superscript "n-1")
 
-      , "Subscript single char" =:
+      , "Subscript simple expression" =:
           "a_n" =?>
           para (str "a" <> subscript "n")
 
@@ -105,11 +105,8 @@ tests =
                        ])
 
       , "No empty markup" =:
-          -- FIXME: __ is erroneously parsed as subscript "_"
-          -- "// ** __ ++ == ~~ $$" =?>
-          -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
-          "// ** ++ == ~~ $$" =?>
-          para (spcSep [ "//", "**", "++", "==", "~~", "$$" ])
+          "// ** __ ++ == ~~ $$" =?>
+          para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
 
       , "Adherence to Org's rules for markup borders" =:
           "/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
@@ -143,6 +140,30 @@ tests =
                                   ]))
                        , "emph/" ])
 
+      , "Sub- and superscript expressions" =:
+         unlines [ "a_(a(b)(c)d)"
+                 , "e^(f(g)h)"
+                 , "i_(jk)l)"
+                 , "m^()n"
+                 , "o_{p{q{}r}}"
+                 , "s^{t{u}v}"
+                 , "w_{xy}z}"
+                 , "1^{}2"
+                 , "3_{{}}"
+                 , "4^(a(*b(c*)d))"
+                 ] =?>
+         para (spcSep [ "a" <> subscript "(a(b)(c)d)"
+                      , "e" <> superscript "(f(g)h)"
+                      , "i" <> (subscript "(jk)") <> "l)"
+                      , "m" <> (superscript "()") <> "n"
+                      , "o" <> subscript "p{q{}r}"
+                      , "s" <> superscript "t{u}v"
+                      , "w" <> (subscript "xy") <> "z}"
+                      , "1" <> (superscript "") <> "2"
+                      , "3" <> subscript "{}"
+                      , "4" <> superscript ("(a(" <> strong "b(c" <> ")d))")
+                      ])
+
       , "Image" =:
           "[[./sunset.jpg]]" =?>
           (para $ image "./sunset.jpg" "" "")
-- 
cgit v1.2.3


From ae4280fba528efe68c5955cb3ca0779e6910f43b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 12 Apr 2014 00:17:46 +0200
Subject: Org reader: Add support for figures

Support for figures (images with name and caption) is added.
---
 src/Text/Pandoc/Readers/Org.hs | 57 ++++++++++++++++++++++++++++++++++++------
 tests/Tests/Readers/Org.hs     | 21 ++++++++++++++++
 2 files changed, 70 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..8f0ce61e0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,6 +37,7 @@ import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateL
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Arrow ((***))
 import           Control.Monad (guard, when)
 import           Data.Char (toLower)
 import           Data.Default
@@ -158,6 +159,7 @@ block = choice [ mempty <$ blanklines
                , orgBlock
                , example
                , drawer
+               , figure
                , specialLine
                , header
                , hline
@@ -252,6 +254,43 @@ drawerEnd = try $
   skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
 
 
+--
+-- Figures
+--
+
+-- Figures (Image on a line by itself, preceded by name and/or caption)
+figure :: OrgParser Blocks
+figure = try $ do
+  (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
+                <$> nameAndOrCaption
+  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+  guard (isImageFilename src)
+  return . B.para $ B.image src tit cap
+ where withFigPrefix cs = if "fig:" `isPrefixOf` cs
+                          then cs
+                          else "fig:" ++ cs
+
+nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines)
+nameAndOrCaption = try $ nameFirst <|> captionFirst
+ where
+   nameFirst = try $ do
+                 n <- name
+                 c <- optionMaybe caption
+                 return (Just n, c)
+   captionFirst = try $ do
+                 c <- caption
+                 n <- optionMaybe name
+                 return (n, Just c)
+
+caption :: OrgParser Inlines
+caption = try $ annotation "CAPTION" *> inlinesTillNewline
+
+name :: OrgParser String
+name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline
+
+annotation :: String -> OrgParser String
+annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
+
 -- Comments, Options and Metadata
 specialLine :: OrgParser Blocks
 specialLine = try $ metaLine <|> commentLine
@@ -277,7 +316,7 @@ declarationLine = try $ do
   return mempty
 
 metaValue :: OrgParser MetaValue
-metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
+metaValue = MetaInlines . B.toList <$> inlinesTillNewline
 
 metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -288,7 +327,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
 header :: OrgParser Blocks
 header = try $
   B.header <$> headerStart
-           <*> (trimInlines <$> restOfLine)
+           <*> inlinesTillNewline
 
 headerStart :: OrgParser Int
 headerStart = try $
@@ -424,13 +463,10 @@ setAligns aligns t = t{ orgTableAlignments = aligns }
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser Blocks
 paraOrPlain = try $
-  trimInlines . mconcat
-    <$> many1 inline
-    <**> option B.plain
-                (try $ newline *> pure B.para)
+  parseInlines <**> option B.plain (try $ newline *> pure B.para)
 
-restOfLine :: OrgParser Inlines
-restOfLine = mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser Inlines
+inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
 
 
 --
@@ -523,6 +559,8 @@ inline =
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
 
+parseInlines :: OrgParser Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
@@ -580,6 +618,9 @@ selflinkOrImage = try $ do
            then B.image src "" ""
            else B.link src "" (B.str src)
 
+selfTarget :: OrgParser String
+selfTarget = try $ char '[' *> linkTarget <* char ']'
+
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
 
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 49130f0ab..99dadc0ac 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -377,6 +377,27 @@ tests =
               code' = "main = putStrLn greeting\n" ++
                        "  where greeting = \"moin\"\n"
           in codeBlockWith attr' code'
+
+      , "Figure" =:
+          unlines [ "#+caption: A very courageous man."
+                  , "#+name: goodguy"
+                  , "[[edward.jpg]]"
+                  ] =?>
+          para (image "edward.jpg" "fig:goodguy" "A very courageous man.")
+
+      , "Unnamed figure" =:
+          unlines [ "#+caption: A great whistleblower."
+                  , "[[snowden.png]]"
+                  ] =?>
+          para (image "snowden.png" "" "A great whistleblower.")
+
+      , "Figure with `fig:` prefix in name" =:
+          unlines [ "#+caption: Used as a metapher in evolutionary biology."
+                  , "#+name: fig:redqueen"
+                  , "[[the-red-queen.jpg]]"
+                  ] =?>
+          para (image "the-red-queen.jpg" "fig:redqueen"
+                      "Used as a metapher in evolutionary biology.")
       ]
 
   , testGroup "Lists" $
-- 
cgit v1.2.3


From 82d4160bdcc149df020d1f95f4a7d893a9ecb42a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 12 Apr 2014 11:07:38 +0200
Subject: Org reader: Read linebreaks

Linebreaks are marked by the string `\\` at the end of a line.
---
 src/Text/Pandoc/Readers/Org.hs | 4 ++++
 tests/Tests/Readers/Org.hs     | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..19dd03c6b 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -506,6 +506,7 @@ anyLineNewline = (++ "\n") <$> anyLine
 inline :: OrgParser Inlines
 inline =
   choice [ whitespace
+         , linebreak
          , link
          , str
          , endline
@@ -535,6 +536,9 @@ whitespace = B.space <$ skipMany1 spaceChar
                      <* updateLastForbiddenCharPos
              <?> "whitespace"
 
+linebreak :: OrgParser Inlines
+linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+
 str :: OrgParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
             <* updateLastStrPos
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 49130f0ab..567cc4c41 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -94,6 +94,10 @@ tests =
           "a_{n+1}" =?>
           para (str "a" <> subscript "n+1")
 
+      , "Linebreak" =:
+          "line \\\\ \nbreak" =?>
+          para ("line" <> linebreak <> "break")
+
       , "Markup-chars not occuring on word break are symbols" =:
           unlines [ "this+that+ +so+on"
                   , "seven*eight* nine*"
-- 
cgit v1.2.3


From d4c1cd456c16298069b05e1da328e70dc87ea547 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 12 Apr 2014 21:44:54 -0700
Subject: Org reader: Removed ANN pragma.

This relies on Template Haskell, which causes problems in Windows
due to libraries with C dependencies.  We need to avoid using TH
in pandoc code.
---
 src/Text/Pandoc/Readers/Org.hs | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..36b1ad287 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,9 +44,6 @@ import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
--- Ignore HLint warnings to use String instead of [Char]
-{-# ANN module ("HLint: ignore Use String" :: String) #-}
-
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
-- 
cgit v1.2.3


From 346bcea713f933a6cf48829d948e14b9c28b4798 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 16 Apr 2014 13:22:48 +0200
Subject: Org reader: Better module description, minor style changes

Use module description analogous to the markdown reader's.
Use (<$) where it makes sense.
---
 src/Text/Pandoc/Readers/Org.hs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index bda0b0262..c4ea64ba7 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
    Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>
 
-Conversion of Org-Mode to 'Pandoc' document.
+Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
@@ -711,7 +711,7 @@ math1CharBetween c = try $ do
   char c
   res <- noneOf $ c:mathForbiddenBorderChars
   char c
-  eof <|> lookAhead (oneOf mathPostChars) *> return ()
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
   return [res]
 
 rawMathBetween :: String
@@ -734,12 +734,12 @@ emphasisEnd :: Char -> OrgParser Char
 emphasisEnd c = try $ do
   guard =<< notAfterForbiddenBorderChar
   char c
-  eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
-                         oneOf (x ++ emphasisPostChars))
-          *> return ()
+  eof <|> () <$ lookAhead acceptablePostChars
   updateLastStrPos
   popInlineCharStack
   return c
+ where acceptablePostChars =
+           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
 
 mathStart :: Char -> OrgParser Char
 mathStart c = try $
@@ -749,7 +749,7 @@ mathEnd :: Char -> OrgParser Char
 mathEnd c = try $ do
   res <- noneOf (c:mathForbiddenBorderChars)
   char c
-  eof <|> lookAhead (oneOf mathPostChars *> pure ())
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
   return res
 
 
-- 
cgit v1.2.3


From 5fc252270c8332908e3ad9ec12d16c08c49de4a2 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 14 Apr 2014 15:04:32 +0200
Subject: Org reader: Fix code for subexpression parsing

---
 src/Text/Pandoc/Readers/Org.hs | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c4ea64ba7..ec0436f4c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -844,24 +844,12 @@ notAfterForbiddenBorderChar = do
 
 -- | Read a sub- or superscript expression
 subOrSuperExpr :: OrgParser Inlines
-subOrSuperExpr = try $ do
-  choice [ balancedSexp '{' '}'
-         , balancedSexp '(' ')' >>= return . enclosing ('(', ')')
+subOrSuperExpr = try $
+  choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
+         , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
          , simpleSubOrSuperString
          ] >>= parseFromString (mconcat <$> many inline)
-
--- | Read a balanced sexp
-balancedSexp :: Char
-             -> Char
-             -> OrgParser String
-balancedSexp l r = try $ do
-  char l
-  res <- concat <$> many (  many1 (noneOf ([l, r] ++ "\n\r"))
-                        <|> try (string [l, r])
-                        <|> enclosing (l, r) <$> balancedSexp l r
-                         )
-  char r
-  return res
+ where enclosing (left, right) s = left : s ++ [right]
 
 simpleSubOrSuperString :: OrgParser String
 simpleSubOrSuperString = try $
@@ -869,8 +857,3 @@ simpleSubOrSuperString = try $
          , mappend <$> option [] ((:[]) <$> oneOf "+-")
                    <*> many1 alphaNum
          ]
-
-enclosing :: (a, a)
-          -> [a]
-          -> [a]
-enclosing (left, right) s = left : s ++ [right]
-- 
cgit v1.2.3


From 92582c6272a3a171c406699e46e88afc4835d85c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 16 Apr 2014 11:58:16 +0200
Subject: Org reader: introduce Reader environment around Blocks/Inlines

This introduces a Reader environment in the style of
Text.Pandoc.Parsing.F, but adapted to the Org reader parser.
---
 src/Text/Pandoc/Readers/Org.hs | 306 ++++++++++++++++++++++++-----------------
 1 file changed, 176 insertions(+), 130 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ec0436f4c..bdff4869c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-
 Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
 
@@ -29,21 +30,26 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
+import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
+                                     , trimInlines )
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
-import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
+import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
+                                            , newline, orderedListMarker
+                                            , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')
 
-import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Applicative ( Applicative, pure
+                                     , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow ((***))
-import           Control.Monad (guard, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, when)
+import           Control.Monad.Reader (Reader, runReader)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (foldl', isPrefixOf, isSuffixOf)
+import           Data.List (isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
-import           Data.Monoid (mconcat, mempty, mappend)
+import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
@@ -55,10 +61,10 @@ type OrgParser = Parser [Char] OrgParserState
 
 parseOrg:: OrgParser Pandoc
 parseOrg = do
-  blocks' <- B.toList <$> parseBlocks
+  blocks' <- parseBlocks
   st <- getState
-  let meta = orgStateMeta st
-  return $ Pandoc meta $ filter (/= Null) blocks'
+  let meta = runF (orgStateMeta' st) st
+  return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
 
 --
 -- Parser State for Org
@@ -73,7 +79,8 @@ data OrgParserState = OrgParserState
                       , orgStateLastPreCharPos       :: Maybe SourcePos
                       , orgStateLastStrPos           :: Maybe SourcePos
                       , orgStateMeta                 :: Meta
-                      } deriving (Show)
+                      , orgStateMeta'                :: F Meta
+                      }
 
 instance HasReaderOptions OrgParserState where
   extractReaderOptions = orgStateOptions
@@ -96,6 +103,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastPreCharPos = Nothing
                         , orgStateLastStrPos = Nothing
                         , orgStateMeta = nullMeta
+                        , orgStateMeta' = return nullMeta
                         }
 
 updateLastStrPos :: OrgParser ()
@@ -138,6 +146,27 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+newtype F a = F { unF :: Reader OrgParserState a
+                } deriving (Monad, Applicative, Functor)
+
+runF :: F a -> OrgParserState -> a
+runF = runReader . unF
+
+instance Monoid a => Monoid (F a) where
+  mempty = return mempty
+  mappend = liftM2 mappend
+  mconcat = fmap mconcat . sequence
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
 newline :: OrgParser Char
 newline =
   P.newline
@@ -148,10 +177,10 @@ newline =
 -- parsing blocks
 --
 
-parseBlocks :: OrgParser Blocks
+parseBlocks :: OrgParser (F Blocks)
 parseBlocks = mconcat <$> manyTill block eof
 
-block :: OrgParser Blocks
+block :: OrgParser (F Blocks)
 block = choice [ mempty <$ blanklines
                , orgBlock
                , example
@@ -159,7 +188,7 @@ block = choice [ mempty <$ blanklines
                , figure
                , specialLine
                , header
-               , hline
+               , return <$> hline
                , list
                , table
                , paraOrPlain
@@ -169,15 +198,15 @@ block = choice [ mempty <$ blanklines
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
 
-orgBlock :: OrgParser Blocks
+orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   (indent, blockType, args) <- blockHeader
   blockStr <- rawBlockContent indent blockType
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> B.divWith ("", [blockType], [])
+    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
+    _         -> fmap (B.divWith ("", [blockType], []))
                  <$> parseFromString parseBlocks blockStr
 
 blockHeader :: OrgParser (Int, String, [String])
@@ -222,15 +251,16 @@ commaEscaped (',':cs@('*':_))     = cs
 commaEscaped (',':cs@('#':'+':_)) = cs
 commaEscaped cs                   = cs
 
-example :: OrgParser Blocks
-example = try $
-  B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
+example :: OrgParser (F Blocks)
+example = try $ do
+  body <- unlines <$> many1 exampleLine
+  return . return $ B.codeBlockWith ("", ["example"], []) body
 
 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
 
 -- Drawers for properties or a logbook
-drawer :: OrgParser Blocks
+drawer :: OrgParser (F Blocks)
 drawer = try $ do
   drawerStart
   manyTill drawerLine (try drawerEnd)
@@ -256,18 +286,20 @@ drawerEnd = try $
 --
 
 -- Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser Blocks
+figure :: OrgParser (F Blocks)
 figure = try $ do
   (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
                 <$> nameAndOrCaption
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
   guard (isImageFilename src)
-  return . B.para $ B.image src tit cap
+  return $ do
+    cap' <- cap
+    return $ B.para $ B.image src tit cap'
  where withFigPrefix cs = if "fig:" `isPrefixOf` cs
                           then cs
                           else "fig:" ++ cs
 
-nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines)
+nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines))
 nameAndOrCaption = try $ nameFirst <|> captionFirst
  where
    nameFirst = try $ do
@@ -279,7 +311,7 @@ nameAndOrCaption = try $ nameFirst <|> captionFirst
                  n <- optionMaybe name
                  return (n, Just c)
 
-caption :: OrgParser Inlines
+caption :: OrgParser (F Inlines)
 caption = try $ annotation "CAPTION" *> inlinesTillNewline
 
 name :: OrgParser String
@@ -289,8 +321,8 @@ annotation :: String -> OrgParser String
 annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
 
 -- Comments, Options and Metadata
-specialLine :: OrgParser Blocks
-specialLine = try $ metaLine <|> commentLine
+specialLine :: OrgParser (F Blocks)
+specialLine = fmap return . try $ metaLine <|> commentLine
 
 metaLine :: OrgParser Blocks
 metaLine = try $ metaLineStart *> declarationLine
@@ -308,12 +340,15 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
-  meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { orgStateMeta  = orgStateMeta st <> meta' }
+  key <- metaKey
+  inlinesF <- metaInlines
+  updateState $ \st ->
+    let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
+    in st { orgStateMeta' = orgStateMeta' st <> meta' }
   return mempty
 
-metaValue :: OrgParser MetaValue
-metaValue = MetaInlines . B.toList <$> inlinesTillNewline
+metaInlines :: OrgParser (F MetaValue)
+metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
 
 metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -321,16 +356,20 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  skipSpaces
 
 -- | Headers
-header :: OrgParser Blocks
-header = try $
-  B.header <$> headerStart
-           <*> inlinesTillNewline
+header :: OrgParser (F Blocks)
+header = try $ do
+  level <- headerStart
+  title <- inlinesTillNewline
+  return $ B.header level <$> title
 
 headerStart :: OrgParser Int
 headerStart = try $
   (length <$> many1 (char '*')) <* many1 (char ' ')
 
--- Horizontal Line (five dashes or more)
+-- Don't use (or need) the reader wrapper here, we want hline to be
+-- @show@able.  Otherwise we can't use it with @notFollowedBy'@.
+
+-- | Horizontal Line (five -- dashes or more)
 hline :: OrgParser Blocks
 hline = try $ do
   skipSpaces
@@ -344,22 +383,23 @@ hline = try $ do
 -- Tables
 --
 
-data OrgTableRow = OrgContentRow [Blocks]
+data OrgTableRow = OrgContentRow (F [Blocks])
                  | OrgAlignRow [Alignment]
                  | OrgHlineRow
- deriving (Eq, Show)
 
 data OrgTable = OrgTable
   { orgTableColumns    :: Int
   , orgTableAlignments :: [Alignment]
   , orgTableHeader     :: [Blocks]
   , orgTableRows       :: [[Blocks]]
-  } deriving (Eq, Show)
+  }
 
-table :: OrgParser Blocks
+table :: OrgParser (F Blocks)
 table = try $ do
   lookAhead tableStart
-  orgToPandocTable . normalizeTable . rowsToTable <$> tableRows
+  do
+    rows <- tableRows
+    return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows
 
 orgToPandocTable :: OrgTable
                  -> Blocks
@@ -374,11 +414,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
 
 tableContentRow :: OrgParser OrgTableRow
 tableContentRow = try $
-  OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
+  OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
 
-tableContentCell :: OrgParser Blocks
+tableContentCell :: OrgParser (F Blocks)
 tableContentCell = try $
-  B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
+  fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
 
 endOfCell :: OrgParser Char
 endOfCell = try $ char '|' <|> lookAhead newline
@@ -410,8 +450,8 @@ tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
 rowsToTable :: [OrgTableRow]
-            -> OrgTable
-rowsToTable = foldl' (flip rowToContent) zeroTable
+            -> F OrgTable
+rowsToTable = foldM (flip rowToContent) zeroTable
   where zeroTable = OrgTable 0 mempty mempty mempty
 
 normalizeTable :: OrgTable
@@ -430,57 +470,64 @@ normalizeTable (OrgTable cols aligns heads lns) =
 -- line as a header.  All other horizontal lines are discarded.
 rowToContent :: OrgTableRow
              -> OrgTable
-             -> OrgTable
-rowToContent OrgHlineRow        = maybeBodyToHeader
-rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
-rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
+             -> F OrgTable
+rowToContent OrgHlineRow        t = maybeBodyToHeader t
+rowToContent (OrgAlignRow as)   t = setLongestRow as =<< setAligns as t
+rowToContent (OrgContentRow rf) t = do
+  rs <- rf
+  setLongestRow rs =<< appendToBody rs t
 
 setLongestRow :: [a]
               -> OrgTable
-              -> OrgTable
-setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
+              -> F OrgTable
+setLongestRow rs t =
+  return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
 
 maybeBodyToHeader :: OrgTable
-                  -> OrgTable
+                  -> F OrgTable
 maybeBodyToHeader t = case t of
   OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
-         t{ orgTableHeader = b , orgTableRows = [] }
-  _   -> t
+         return t{ orgTableHeader = b , orgTableRows = [] }
+  _   -> return t
 
 appendToBody :: [Blocks]
              -> OrgTable
-             -> OrgTable
-appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
+             -> F OrgTable
+appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
 
 setAligns :: [Alignment]
           -> OrgTable
-          -> OrgTable
-setAligns aligns t = t{ orgTableAlignments = aligns }
+          -> F OrgTable
+setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
 -- Paragraphs or Plain text
-paraOrPlain :: OrgParser Blocks
+paraOrPlain :: OrgParser (F Blocks)
 paraOrPlain = try $
-  parseInlines <**> option B.plain (try $ newline *> pure B.para)
+  parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
 
-inlinesTillNewline :: OrgParser Inlines
-inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser (F Inlines)
+inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
 
 
 --
 -- list blocks
 --
 
-list :: OrgParser Blocks
+list :: OrgParser (F Blocks)
 list = choice [ definitionList, bulletList, orderedList ] <?> "list"
 
-definitionList :: OrgParser Blocks
-definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart)
+definitionList :: OrgParser (F Blocks)
+definitionList = fmap B.definitionList . sequence
+                 <$> many1 (definitionListItem bulletListStart)
 
-bulletList :: OrgParser Blocks
-bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+bulletList :: OrgParser (F Blocks)
+bulletList = fmap B.bulletList . fmap compactify' . sequence
+             <$> many1 (listItem bulletListStart)
 
-orderedList :: OrgParser Blocks
-orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+orderedList :: OrgParser (F Blocks)
+-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+orderedList = fmap B.orderedList . fmap compactify' . sequence
+              <$> many1 (listItem orderedListStart)
 
 genericListStart :: OrgParser String
                  -> OrgParser Int
@@ -499,7 +546,7 @@ orderedListStart = genericListStart orderedListMarker
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
 definitionListItem :: OrgParser Int
-                   -> OrgParser (Inlines, [Blocks])
+                   -> OrgParser (F (Inlines, [Blocks]))
 definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
@@ -507,12 +554,12 @@ definitionListItem parseMarkerGetLength = try $ do
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
   contents' <- parseFromString parseBlocks $ first ++ cont
-  return (term', [contents'])
+  return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
 -- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
-         -> OrgParser Blocks
+         -> OrgParser (F Blocks)
 listItem start = try $ do
   markerLength <- try start
   firstLine <- anyLineNewline
@@ -536,11 +583,11 @@ anyLineNewline = (++ "\n") <$> anyLine
 -- inline
 --
 
-inline :: OrgParser Inlines
+inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
-         , link
+         , linkOrImage
          , str
          , endline
          , emph
@@ -557,29 +604,29 @@ inline =
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
 
-parseInlines :: OrgParser Inlines
-parseInlines = trimInlines . mconcat <$> many1 inline
+parseInlines :: OrgParser (F Inlines)
+parseInlines = trimInlinesF . mconcat <$> many1 inline
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
 specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
 
 
-whitespace :: OrgParser Inlines
-whitespace = B.space <$ skipMany1 spaceChar
-                     <* updateLastPreCharPos
-                     <* updateLastForbiddenCharPos
+whitespace :: OrgParser (F Inlines)
+whitespace = pure B.space <$ skipMany1 spaceChar
+                          <* updateLastPreCharPos
+                          <* updateLastForbiddenCharPos
              <?> "whitespace"
 
-linebreak :: OrgParser Inlines
-linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+linebreak :: OrgParser (F Inlines)
+linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
 
-str :: OrgParser Inlines
-str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
-            <* updateLastStrPos
+str :: OrgParser (F Inlines)
+str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+      <* updateLastStrPos
 
 -- an endline character that can be treated as a space, not a structural break
-endline :: OrgParser Inlines
+endline :: OrgParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -595,29 +642,29 @@ endline = try $ do
   decEmphasisNewlinesCount
   guard =<< newlinesCountWithinLimits
   updateLastPreCharPos
-  return B.space
+  return . return $ B.space
 
-link :: OrgParser Inlines
-link = explicitOrImageLink <|> selflinkOrImage <?> "link"
+linkOrImage :: OrgParser (F Inlines)
+linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
 
-explicitOrImageLink :: OrgParser Inlines
+explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
   char '['
   src    <- linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return . B.link src ""
-         $ if isImageFilename src && isImageFilename title
-           then B.image title "" ""
-           else title'
+  return $ B.link src ""  <$>
+         if isImageFilename src && isImageFilename title
+            then return $ B.image title mempty mempty
+            else title'
 
-selflinkOrImage :: OrgParser Inlines
+selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  return $ if isImageFilename src
-           then B.image src "" ""
-           else B.link src "" (B.str src)
+  return . return $ if isImageFilename src
+                    then B.image src "" ""
+                    else B.link src "" (B.str src)
 
 selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
@@ -634,51 +681,50 @@ isImageFilename filename =
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
-emph      :: OrgParser Inlines
-emph      = B.emph         <$> emphasisBetween '/'
+emph      :: OrgParser (F Inlines)
+emph      = fmap B.emph         <$> emphasisBetween '/'
 
-strong    :: OrgParser Inlines
-strong    = B.strong       <$> emphasisBetween '*'
+strong    :: OrgParser (F Inlines)
+strong    = fmap B.strong       <$> emphasisBetween '*'
 
-strikeout :: OrgParser Inlines
-strikeout = B.strikeout    <$> emphasisBetween '+'
+strikeout :: OrgParser (F Inlines)
+strikeout = fmap B.strikeout    <$> emphasisBetween '+'
 
 -- There is no underline, so we use strong instead.
-underline :: OrgParser Inlines
-underline = B.strong       <$> emphasisBetween '_'
-
-code      :: OrgParser Inlines
-code      = B.code         <$> verbatimBetween '='
+underline :: OrgParser (F Inlines)
+underline = fmap B.strong       <$> emphasisBetween '_'
 
-verbatim  :: OrgParser Inlines
-verbatim  = B.rawInline "" <$> verbatimBetween '~'
+code      :: OrgParser (F Inlines)
+code      = return . B.code         <$> verbatimBetween '='
 
-math      :: OrgParser Inlines
-math      = B.math         <$> choice [ math1CharBetween '$'
-                                      , mathStringBetween '$'
-                                      , rawMathBetween "\\(" "\\)"
-                                      ]
+verbatim  :: OrgParser (F Inlines)
+verbatim  = return . B.rawInline "" <$> verbatimBetween '~'
 
-displayMath :: OrgParser Inlines
-displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
-                                       , rawMathBetween "$$"  "$$"
-                                       ]
+subscript   :: OrgParser (F Inlines)
+subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
 
-subscript   :: OrgParser Inlines
-subscript   = B.subscript   <$> try (char '_' *> subOrSuperExpr)
+superscript :: OrgParser (F Inlines)
+superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
 
-superscript :: OrgParser Inlines
-superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
+math      :: OrgParser (F Inlines)
+math      = return . B.math      <$> choice [ math1CharBetween '$'
+                                            , mathStringBetween '$'
+                                            , rawMathBetween "\\(" "\\)"
+                                            ]
 
-symbol :: OrgParser Inlines
-symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+displayMath :: OrgParser (F Inlines)
+displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+                                                , rawMathBetween "$$"  "$$"
+                                                ]
+symbol :: OrgParser (F Inlines)
+symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
  where updatePositions c
            | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
            | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
            | otherwise = return c
 
 emphasisBetween :: Char
-                -> OrgParser Inlines
+                -> OrgParser (F Inlines)
 emphasisBetween c = try $ do
   startEmphasisNewlinesCounting emphasisAllowedNewlines
   res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@@ -755,9 +801,9 @@ mathEnd c = try $ do
 
 enclosedInlines :: OrgParser a
                 -> OrgParser b
-                -> OrgParser Inlines
+                -> OrgParser (F Inlines)
 enclosedInlines start end = try $
-  trimInlines . mconcat <$> enclosed start end inline
+  trimInlinesF . mconcat <$> enclosed start end inline
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -843,7 +889,7 @@ notAfterForbiddenBorderChar = do
   return $ lastFBCPos /= Just pos
 
 -- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser Inlines
+subOrSuperExpr :: OrgParser (F Inlines)
 subOrSuperExpr = try $
   choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
          , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
-- 
cgit v1.2.3


From 0672f58a445c289c58e42cffbbf32a273e801e39 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 18:43:49 +0200
Subject: Org reader: Support footnotes

---
 src/Text/Pandoc/Readers/Org.hs | 66 ++++++++++++++++++++++++++++++++++++++++--
 tests/Tests/Readers/Org.hs     |  4 +++
 2 files changed, 68 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index bdff4869c..17f8a1c9e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,7 +44,7 @@ import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow ((***))
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
-import           Control.Monad.Reader (Reader, runReader)
+import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (isPrefixOf, isSuffixOf)
@@ -59,7 +59,7 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
-parseOrg:: OrgParser Pandoc
+parseOrg :: OrgParser Pandoc
 parseOrg = do
   blocks' <- parseBlocks
   st <- getState
@@ -70,6 +70,9 @@ parseOrg = do
 -- Parser State for Org
 --
 
+type OrgNoteRecord = (String, F Blocks)
+type OrgNoteTable = [OrgNoteRecord]
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
@@ -80,6 +83,7 @@ data OrgParserState = OrgParserState
                       , orgStateLastStrPos           :: Maybe SourcePos
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
+                      , orgStateNotes'               :: OrgNoteTable
                       }
 
 instance HasReaderOptions OrgParserState where
@@ -104,6 +108,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastStrPos = Nothing
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
+                        , orgStateNotes' = []
                         }
 
 updateLastStrPos :: OrgParser ()
@@ -146,6 +151,11 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+addToNotesTable :: OrgNoteRecord -> OrgParser ()
+addToNotesTable note = do
+  oldnotes <- orgStateNotes' <$> getState
+  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
+
 
 --
 -- Adaptions and specializations of parsing utilities
@@ -157,6 +167,12 @@ newtype F a = F { unF :: Reader OrgParserState a
 runF :: F a -> OrgParserState -> a
 runF = runReader . unF
 
+askF :: F OrgParserState
+askF = F ask
+
+asksF :: (OrgParserState -> a) -> F a
+asksF f = F $ asks f
+
 instance Monoid a => Monoid (F a) where
   mempty = return mempty
   mappend = liftM2 mappend
@@ -191,6 +207,7 @@ block = choice [ mempty <$ blanklines
                , return <$> hline
                , list
                , table
+               , noteBlock
                , paraOrPlain
                ] <?> "block"
 
@@ -500,6 +517,16 @@ setAligns :: [Alignment]
           -> F OrgTable
 setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
+--
+-- Footnote defintions
+--
+noteBlock :: OrgParser (F Blocks)
+noteBlock = try $ do
+  ref <- noteMarker
+  content <- skipSpaces *> paraOrPlain
+  addToNotesTable (ref, content)
+  return mempty
+
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser (F Blocks)
 paraOrPlain = try $
@@ -587,6 +614,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , footnote
          , linkOrImage
          , str
          , endline
@@ -632,6 +660,7 @@ endline = try $ do
   notFollowedBy blankline
   notFollowedBy' exampleLine
   notFollowedBy' hline
+  notFollowedBy' noteMarker
   notFollowedBy' tableStart
   notFollowedBy' drawerStart
   notFollowedBy' headerStart
@@ -644,6 +673,39 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+footnote :: OrgParser (F Inlines)
+footnote = try $ inlineNote <|> referencedNote
+
+inlineNote :: OrgParser (F Inlines)
+inlineNote = try $ do
+  string "[fn:"
+  ref <- many alphaNum
+  char ':'
+  note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+  when (not $ null ref) $
+       addToNotesTable ("fn:" ++ ref, note)
+  return $ B.note <$> note
+
+referencedNote :: OrgParser (F Inlines)
+referencedNote = try $ do
+  ref <- noteMarker
+  return $ do
+    notes <- asksF orgStateNotes'
+    case lookup ref notes of
+      Nothing   -> return $ B.str $ "[" ++ ref ++ "]"
+      Just contents  -> do
+        st <- askF
+        let contents' = runF contents st{ orgStateNotes' = [] }
+        return $ B.note contents'
+
+noteMarker :: OrgParser String
+noteMarker = try $ do
+  char '['
+  choice [ many1Till digit (char ']')
+         , (++) <$> string "fn:"
+                <*> many1Till (noneOf "\n\r\t ") (char ']')
+         ]
+
 linkOrImage :: OrgParser (F Inlines)
 linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
 
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index f39bd7992..7f9c5f1d5 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -98,6 +98,10 @@ tests =
           "line \\\\ \nbreak" =?>
           para ("line" <> linebreak <> "break")
 
+      , "Inline note" =:
+          "[fn::Schreib mir eine E-Mail]" =?>
+          para (note $ para "Schreib mir eine E-Mail")
+
       , "Markup-chars not occuring on word break are symbols" =:
           unlines [ "this+that+ +so+on"
                   , "seven*eight* nine*"
-- 
cgit v1.2.3


From 6d6724cf2c6ae6bcc0df312c476e45644c972a85 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 17 Apr 2014 18:09:27 +0200
Subject: Org reader: Support more types of '#+BEGIN_<type>' blocks

Support for standard org-blocks is improved.  The parser now handles
"HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a
sensible fashion.
---
 src/Text/Pandoc/Readers/Org.hs | 41 +++++++++++++++---
 tests/Tests/Readers/Org.hs     | 97 ++++++++++++++++++++++++++++++++----------
 2 files changed, 108 insertions(+), 30 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 17f8a1c9e..88e81f5fc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,6 +37,7 @@ import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
+                                            , parseFromString
                                             , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')
 
@@ -47,7 +48,7 @@ import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (isPrefixOf, isSuffixOf)
+import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
@@ -156,6 +157,16 @@ addToNotesTable note = do
   oldnotes <- orgStateNotes' <$> getState
   updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
 
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+
 
 --
 -- Adaptions and specializations of parsing utilities
@@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   (indent, blockType, args) <- blockHeader
-  blockStr <- rawBlockContent indent blockType
+  content <- rawBlockContent indent blockType
+  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> fmap (B.divWith ("", [blockType], []))
-                 <$> parseFromString parseBlocks blockStr
+    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
+    "html"    -> returnF $ B.rawBlock "html" content
+    "latex"   -> returnF $ B.rawBlock "latex" content
+    "ascii"   -> returnF $ B.rawBlock "ascii" content
+    "example" -> returnF $ exampleCode content
+    "quote"   -> return  $ B.blockQuote <$> contentBlocks
+    "verse"   -> parseVerse content
+    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+ where
+   returnF :: a -> OrgParser (F a)
+   returnF = return . return
+
+   parseVerse :: String -> OrgParser (F Blocks)
+   parseVerse cs =
+       fmap B.para . mconcat . intersperse (pure B.linebreak)
+       <$> mapM (parseFromString parseInlines) (lines cs)
 
 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@@ -270,8 +295,10 @@ commaEscaped cs                   = cs
 
 example :: OrgParser (F Blocks)
 example = try $ do
-  body <- unlines <$> many1 exampleLine
-  return . return $ B.codeBlockWith ("", ["example"], []) body
+  return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
 
 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 7f9c5f1d5..7d5bfe650 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -363,29 +363,6 @@ tests =
                   , "#+END_COMMENT"] =?>
           (mempty::Blocks)
 
-      , "Source Block in Text" =:
-          unlines [ "Low German greeting"
-                  , "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
-                     , codeBlockWith attr' code'
-                     ]
-
-      , "Source Block" =:
-          unlines [ "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in codeBlockWith attr' code'
-
       , "Figure" =:
           unlines [ "#+caption: A very courageous man."
                   , "#+name: goodguy"
@@ -661,4 +638,78 @@ tests =
                 , [ plain "2"      , plain mempty , plain mempty  ]
                 ]
       ]
+
+    , testGroup "Blocks"
+      [ "Source block" =:
+           unlines [ "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"moin\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                       "  where greeting = \"moin\"\n"
+           in codeBlockWith attr' code'
+
+      , "Source block between paragraphs" =:
+           unlines [ "Low German greeting"
+                   , "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"Moin!\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                        "  where greeting = \"Moin!\"\n"
+           in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
+                      , codeBlockWith attr' code'
+                      ]
+
+      , "Example block" =:
+           unlines [ "#+begin_example"
+                   , "A chosen representation of"
+                   , "a rule."
+                   , "#+eND_exAMPle"
+                   ] =?>
+           codeBlockWith ("", ["example"], [])
+                         "A chosen representation of\na rule.\n"
+
+      , "HTML block" =:
+           unlines [ "#+BEGIN_HTML"
+                   , "<aside>HTML5 is pretty nice.</aside>"
+                   , "#+END_HTML"
+                   ] =?>
+           rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n"
+
+      , "Quote block" =:
+           unlines [ "#+BEGIN_QUOTE"
+                   , "/Niemand/ hat die Absicht, eine Mauer zu errichten!"
+                   , "#+END_QUOTE"
+                   ] =?>
+           blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht,"
+                                    , "eine", "Mauer", "zu", "errichten!"
+                                    ]))
+
+      , "Verse block" =:
+          unlines [ "The first lines of Goethe's /Faust/:"
+                  , "#+begin_verse"
+                  , "Habe nun, ach! Philosophie,"
+                  , "Juristerei und Medizin,"
+                  , "Und leider auch Theologie!"
+                  , "Durchaus studiert, mit heißem Bemühn."
+                  , "#+end_verse"
+                  ] =?>
+          mconcat
+          [ para $ spcSep [ "The", "first", "lines", "of"
+                          , "Goethe's", emph "Faust" <> ":"]
+          , para $ mconcat
+              [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ]
+              , linebreak
+              , spcSep [ "Juristerei", "und", "Medizin," ]
+              , linebreak
+              , spcSep [ "Und", "leider", "auch", "Theologie!" ]
+              , linebreak
+              , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ]
+              ]
+          ]
+
+      ]
   ]
-- 
cgit v1.2.3


From f19d7233d8d3e47912b760fc62a253e5baf8275a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 08:33:25 +0200
Subject: Org reader: Fix parsing of loose lists

Loose lists (i.e. lists with blankline separated items), were parsed as
multiple lists, each containing a single item.  This patch fixes this
issue.
---
 src/Text/Pandoc/Readers/Org.hs | 11 +++++++----
 tests/Tests/Readers/Org.hs     | 21 ++++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 88e81f5fc..1fa8d4d5e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -605,9 +605,10 @@ definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
   first <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
-  contents' <- parseFromString parseBlocks $ first ++ cont
+  contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
   return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
@@ -617,16 +618,18 @@ listItem :: OrgParser Int
 listItem start = try $ do
   markerLength <- try start
   firstLine <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
   rest <- concat <$> many (listContinuation markerLength)
-  parseFromString parseBlocks $ firstLine ++ rest
+  parseFromString parseBlocks $ firstLine ++ blank ++ rest
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
 listContinuation :: Int
                  -> OrgParser String
 listContinuation markerLength = try $
-  mappend <$> many blankline
-          <*> (concat <$> many1 listLine)
+  notFollowedBy' blankline
+  *> (mappend <$> (concat <$> many1 listLine)
+              <*> many blankline)
  where listLine = try $ indentWith markerLength *> anyLineNewline
 
 anyLineNewline :: OrgParser String
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 7d5bfe650..572fc501f 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -518,13 +518,24 @@ tests =
                          , ("TTL", [ plain $ "transistor-transistor" <> space <>
                                                "logic" ])
                          , ("PSK", [ mconcat
-                                     [ para  $ "phase-shift" <> space <> "keying"
-                                     , plain $ spcSep [ "a", "digital"
-                                                      , "modulation", "scheme" ]
+                                     [ para $ "phase-shift" <> space <> "keying"
+                                     , para $ spcSep [ "a", "digital"
+                                                     , "modulation", "scheme" ]
                                      ]
-                                   ]
-                                   )
+                                   ])
                          ]
+
+      , "Loose bullet list" =:
+          unlines [ "- apple"
+                  , ""
+                  , "- orange"
+                  , ""
+                  , "- peach"
+                  ] =?>
+          bulletList [ para "apple"
+                     , para "orange"
+                     , para "peach"
+                     ]
       ]
 
   , testGroup "Tables"
-- 
cgit v1.2.3


From 09441b65a83f372410394a88af7808f494c3aa57 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 10:15:58 +0200
Subject: Org reader: Add support for plain LaTeX fragments

This adds support for LaTeX fragments like the following:

```
\begin{equation}
\int fg \mathrm{d}x
\end{equation}
```
---
 src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++++++++-
 tests/Tests/Readers/Org.hs     | 20 +++++++++++++++++++-
 2 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1fa8d4d5e..66cfe720e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -218,6 +218,7 @@ block = choice [ mempty <$ blanklines
                , return <$> hline
                , list
                , table
+               , latexFragment
                , noteBlock
                , paraOrPlain
                ] <?> "block"
@@ -544,6 +545,41 @@ setAligns :: [Alignment]
           -> F OrgTable
 setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
+
+--
+-- LaTeX fragments
+--
+latexFragment :: OrgParser (F Blocks)
+latexFragment = try $ do
+  envName <- latexEnvStart
+  content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
+  return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ where
+   c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
+                              , c
+                              , "\\end{", e, "}\n"
+                              ]
+
+latexEnvStart :: OrgParser String
+latexEnvStart = try $ do
+  skipSpaces *> string "\\begin{"
+             *> latexEnvName
+             <* string "}"
+             <* blankline
+
+latexEnd :: String -> OrgParser ()
+latexEnd envName = try $
+  () <$ skipSpaces
+     <* string ("\\end{" ++ envName ++ "}")
+     <* blankline
+
+-- | Parses a LaTeX environment name.
+latexEnvName :: OrgParser String
+latexEnvName = try $ do
+  mappend <$> many1 alphaNum
+          <*> option "" (string "*")
+
+
 --
 -- Footnote defintions
 --
@@ -683,7 +719,9 @@ str :: OrgParser (F Inlines)
 str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
       <* updateLastStrPos
 
--- an endline character that can be treated as a space, not a structural break
+-- | An endline character that can be treated as a space, not a structural
+-- break.  This should reflect the values of the Emacs variable
+-- @org-element-pagaraph-separate@.
 endline :: OrgParser (F Inlines)
 endline = try $ do
   newline
@@ -695,6 +733,7 @@ endline = try $ do
   notFollowedBy' drawerStart
   notFollowedBy' headerStart
   notFollowedBy' metaLineStart
+  notFollowedBy' latexEnvStart
   notFollowedBy' commentLineStart
   notFollowedBy' bulletListStart
   notFollowedBy' orderedListStart
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 572fc501f..1ac2c1fd8 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -650,7 +650,7 @@ tests =
                 ]
       ]
 
-    , testGroup "Blocks"
+    , testGroup "Blocks and fragments"
       [ "Source block" =:
            unlines [ "  #+BEGIN_SRC haskell"
                    , "  main = putStrLn greeting"
@@ -722,5 +722,23 @@ tests =
               ]
           ]
 
+      , "LaTeX fragment" =:
+          unlines [ "\\begin{equation}"
+                  , "X_i = \\begin{cases}"
+                  , "      G_{\\alpha(i)} & \\text{if }\\alpha(i-1) = \\alpha(i)\\\\"
+                  , "      C_{\\alpha(i)} & \\text{otherwise}"
+                  , "      \\end{cases}"
+                  , "\\end{equation}"
+                  ] =?>
+          rawBlock "latex"
+                   (unlines [ "\\begin{equation}"
+                            , "X_i = \\begin{cases}"
+                            , "      G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" ++
+                              " \\alpha(i)\\\\"
+                            , "      C_{\\alpha(i)} & \\text{otherwise}"
+                            , "      \\end{cases}"
+                            , "\\end{equation}"
+                            ])
+
       ]
   ]
-- 
cgit v1.2.3


From 6ded3d41d94c1e90d1d30a1f99ddad62e62d9ce6 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 20:47:50 +0200
Subject: Org reader: Apply captions to code blocks and tables

The `Table` blocktype already takes the caption as an argument, while code
blocks are wrapped in a `Div` block together with a labelling `Span`.
---
 src/Text/Pandoc/Readers/Org.hs | 134 +++++++++++++++++++++++++++++------------
 tests/Tests/Readers/Org.hs     |  31 +++++++++-
 2 files changed, 124 insertions(+), 41 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 66cfe720e..025158060 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,13 +43,13 @@ import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
-import           Control.Arrow ((***))
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
-import           Data.Maybe (listToMaybe, fromMaybe)
+import qualified Data.Map as M
+import           Data.Maybe (listToMaybe, fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
 -- | Parse org-mode string and return a Pandoc document.
@@ -74,9 +74,12 @@ parseOrg = do
 type OrgNoteRecord = (String, F Blocks)
 type OrgNoteTable = [OrgNoteRecord]
 
+type OrgBlockAttributes = M.Map String String
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
+                      , orgStateBlockAttributes      :: OrgBlockAttributes
                       , orgStateEmphasisCharStack    :: [Char]
                       , orgStateEmphasisNewlines     :: Maybe Int
                       , orgStateLastForbiddenCharPos :: Maybe SourcePos
@@ -102,6 +105,7 @@ instance Default OrgParserState where
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
                         { orgStateOptions = def
+                        , orgStateBlockAttributes = M.empty
                         , orgStateEmphasisCharStack = []
                         , orgStateEmphasisNewlines = Nothing
                         , orgStateLastForbiddenCharPos = Nothing
@@ -112,6 +116,19 @@ defaultOrgParserState = OrgParserState
                         , orgStateNotes' = []
                         }
 
+addBlockAttribute :: String -> String -> OrgParser ()
+addBlockAttribute key val = updateState $ \s ->
+  let attrs = orgStateBlockAttributes s
+  in s{ orgStateBlockAttributes = M.insert key val attrs }
+
+lookupBlockAttribute :: String -> OrgParser (Maybe String)
+lookupBlockAttribute key =
+  M.lookup key . orgStateBlockAttributes <$> getState
+
+resetBlockAttributes :: OrgParser ()
+resetBlockAttributes = updateState $ \s ->
+  s{ orgStateBlockAttributes = orgStateBlockAttributes def }
+
 updateLastStrPos :: OrgParser ()
 updateLastStrPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastStrPos = Just p }
@@ -125,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
 
 pushToInlineCharStack :: Char -> OrgParser ()
-pushToInlineCharStack c = updateState $ \st ->
-  st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
+pushToInlineCharStack c = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
 
 popInlineCharStack :: OrgParser ()
-popInlineCharStack = updateState $ \st ->
-  st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
+popInlineCharStack = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
 
 surroundingEmphasisChar :: OrgParser [Char]
 surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
 
 startEmphasisNewlinesCounting :: Int -> OrgParser ()
 startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
-  s { orgStateEmphasisNewlines = Just maxNewlines }
+  s{ orgStateEmphasisNewlines = Just maxNewlines }
 
 decEmphasisNewlinesCount :: OrgParser ()
 decEmphasisNewlinesCount = updateState $ \s ->
@@ -209,20 +226,50 @@ parseBlocks = mconcat <$> manyTill block eof
 
 block :: OrgParser (F Blocks)
 block = choice [ mempty <$ blanklines
-               , orgBlock
+               , optionalAttributes $ choice
+                 [ orgBlock
+                 , figure
+                 , table
+                 ]
                , example
                , drawer
-               , figure
                , specialLine
                , header
                , return <$> hline
                , list
-               , table
                , latexFragment
                , noteBlock
                , paraOrPlain
                ] <?> "block"
 
+optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
+optionalAttributes parser = try $
+  resetBlockAttributes *> parseBlockAttributes *> parser
+
+parseBlockAttributes :: OrgParser ()
+parseBlockAttributes = do
+  attrs <- many attribute
+  () <$ mapM (uncurry parseAndAddAttribute) attrs
+ where
+   attribute :: OrgParser (String, String)
+   attribute = try $ do
+         key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
+         val <- skipSpaces *> anyLine
+         return (map toLower key, val)
+
+parseAndAddAttribute :: String -> String -> OrgParser ()
+parseAndAddAttribute key value = do
+  let key' = map toLower key
+  () <$ addBlockAttribute key' value
+
+lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
+lookupInlinesAttr attr = try $ do
+  val <- lookupBlockAttribute attr
+  maybe (return Nothing)
+        (fmap Just . parseFromString parseInlines)
+        val
+
+
 --
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
@@ -235,13 +282,13 @@ orgBlock = try $ do
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
     "html"    -> returnF $ B.rawBlock "html" content
     "latex"   -> returnF $ B.rawBlock "latex" content
     "ascii"   -> returnF $ B.rawBlock "ascii" content
     "example" -> returnF $ exampleCode content
     "quote"   -> return  $ B.blockQuote <$> contentBlocks
     "verse"   -> parseVerse content
+    "src"     -> codeBlockWithAttr classArgs content
     _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
  where
    returnF :: a -> OrgParser (F a)
@@ -260,6 +307,18 @@ blockHeader = (,,) <$> blockIndent
        blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
        blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
 
+codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
+codeBlockWithAttr classArgs content = do
+  identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
+  caption <- lookupInlinesAttr "caption"
+  let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
+  return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
+ where
+   labelDiv blk value =
+       B.divWith nullAttr <$> (mappend <$> labelledBlock value
+                                       <*> pure blk)
+   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+
 rawBlockContent :: Int -> String -> OrgParser String
 rawBlockContent indent blockType =
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
@@ -333,38 +392,26 @@ drawerEnd = try $
 -- Figures (Image on a line by itself, preceded by name and/or caption)
 figure :: OrgParser (F Blocks)
 figure = try $ do
-  (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
-                <$> nameAndOrCaption
+  (cap, nam) <- nameAndCaption
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
   guard (isImageFilename src)
   return $ do
     cap' <- cap
-    return $ B.para $ B.image src tit cap'
- where withFigPrefix cs = if "fig:" `isPrefixOf` cs
-                          then cs
-                          else "fig:" ++ cs
-
-nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines))
-nameAndOrCaption = try $ nameFirst <|> captionFirst
+    return $ B.para $ B.image src nam cap'
  where
-   nameFirst = try $ do
-                 n <- name
-                 c <- optionMaybe caption
-                 return (Just n, c)
-   captionFirst = try $ do
-                 c <- caption
-                 n <- optionMaybe name
-                 return (n, Just c)
-
-caption :: OrgParser (F Inlines)
-caption = try $ annotation "CAPTION" *> inlinesTillNewline
-
-name :: OrgParser String
-name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline
-
-annotation :: String -> OrgParser String
-annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
+   nameAndCaption =
+       do
+         maybeCap <- lookupInlinesAttr "caption"
+         maybeNam <- lookupBlockAttribute "name"
+         guard $ isJust maybeCap || isJust maybeNam
+         return ( fromMaybe mempty maybeCap
+                , maybe mempty withFigPrefix maybeNam )
+   withFigPrefix cs =
+       if "fig:" `isPrefixOf` cs
+       then cs
+       else "fig:" ++ cs
 
+--
 -- Comments, Options and Metadata
 specialLine :: OrgParser (F Blocks)
 specialLine = fmap return . try $ metaLine <|> commentLine
@@ -400,6 +447,10 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
 
+--
+-- Headers
+--
+
 -- | Headers
 header :: OrgParser (F Blocks)
 header = try $ do
@@ -411,6 +462,7 @@ headerStart :: OrgParser Int
 headerStart = try $
   (length <$> many1 (char '*')) <* many1 (char ' ')
 
+
 -- Don't use (or need) the reader wrapper here, we want hline to be
 -- @show@able.  Otherwise we can't use it with @notFollowedBy'@.
 
@@ -444,12 +496,14 @@ table = try $ do
   lookAhead tableStart
   do
     rows <- tableRows
-    return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows
+    cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
+    return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
 
 orgToPandocTable :: OrgTable
+                 -> Inlines
                  -> Blocks
-orgToPandocTable (OrgTable _ aligns heads lns) =
-  B.table "" (zip aligns $ repeat 0) heads lns
+orgToPandocTable (OrgTable _ aligns heads lns) caption =
+  B.table caption (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 1ac2c1fd8..80a95d36b 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -8,7 +8,7 @@ import Tests.Arbitrary()
 import Text.Pandoc.Builder
 import Text.Pandoc
 import Data.List (intersperse)
-import Data.Monoid (mempty, mconcat)
+import Data.Monoid (mempty, mappend, mconcat)
 
 org :: String -> Pandoc
 org = readOrg def
@@ -648,6 +648,18 @@ tests =
                 [ [ plain "1"      , plain "One"  , plain "foo"  ]
                 , [ plain "2"      , plain mempty , plain mempty  ]
                 ]
+
+      , "Table with caption" =:
+          unlines [ "#+CAPTION: Hitchhiker's Multiplication Table"
+                  , "| x |  6 |"
+                  , "| 9 | 42 |"
+                  ] =?>
+          table "Hitchhiker's Multiplication Table"
+                [(AlignDefault, 0), (AlignDefault, 0)]
+                []
+                [ [ plain "x", plain "6" ]
+                , [ plain "9", plain "42" ]
+                ]
       ]
 
     , testGroup "Blocks and fragments"
@@ -740,5 +752,22 @@ tests =
                             , "\\end{equation}"
                             ])
 
+      , "Code block with caption" =:
+          unlines [ "#+CAPTION: Functor laws in Haskell"
+                  , "#+NAME: functor-laws"
+                  , "#+BEGIN_SRC haskell"
+                  , "fmap id = id"
+                  , "fmap (p . q) = (fmap p) . (fmap q)"
+                  , "#+END_SRC"
+                  ] =?>
+          divWith
+             nullAttr
+             (mappend
+              (plain $ spanWith ("", ["label"], [])
+                                (spcSep [ "Functor", "laws", "in", "Haskell" ]))
+              (codeBlockWith ("functor-laws", ["haskell"], [])
+                             (unlines [ "fmap id = id"
+                                      , "fmap (p . q) = (fmap p) . (fmap q)"
+                                      ])))
       ]
   ]
-- 
cgit v1.2.3


From a69416091ba035ab1661ff306ef3e51fd926488b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 11:25:39 +0200
Subject: Org reader: Fix distinction of images and normal links

Fixed a false assumption about the precedence of (&&) vs (||).
---
 src/Text/Pandoc/Readers/Org.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 025158060..66211b20e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -860,8 +860,8 @@ linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-  ':' `notElem` filename
+  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+   ':' `notElem` filename)
  where
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
-- 
cgit v1.2.3


From 8e91d362a392d1ee90a497f39cfcf90fee8d8da0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 13:15:47 +0200
Subject: Org reader: Fix parsing of footnotes

Footnotes can consist of multiple blocks and end only at a header or at
the beginning of another footnote.  This fixes the previous behavior,
which restricted notes to a single paragraph.
---
 src/Text/Pandoc/Readers/Org.hs |  8 ++++++--
 tests/Tests/Readers/Org.hs     | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 66211b20e..0bc0a2668 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -639,10 +639,14 @@ latexEnvName = try $ do
 --
 noteBlock :: OrgParser (F Blocks)
 noteBlock = try $ do
-  ref <- noteMarker
-  content <- skipSpaces *> paraOrPlain
+  ref <- noteMarker <* skipSpaces
+  content <- mconcat <$> blocksTillHeaderOrNote
   addToNotesTable (ref, content)
   return mempty
+ where
+   blocksTillHeaderOrNote =
+     many1Till block (eof <|> () <$ lookAhead noteMarker
+                          <|> () <$ lookAhead headerStart)
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser (F Blocks)
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 80a95d36b..4cc405c0f 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -383,6 +383,48 @@ tests =
                   ] =?>
           para (image "the-red-queen.jpg" "fig:redqueen"
                       "Used as a metapher in evolutionary biology.")
+
+      , "Footnote" =:
+          unlines [ "A footnote[1]"
+                  , ""
+                  , "[1] First paragraph"
+                  , ""
+                  , "second paragraph"
+                  ] =?>
+          para (mconcat
+                [ "A", space, "footnote"
+                , note $ mconcat [ para ("First" <> space <> "paragraph")
+                                 , para ("second" <> space <> "paragraph")
+                                 ]
+                ])
+
+      , "Two footnotes" =:
+          unlines [ "Footnotes[fn:1][fn:2]"
+                  , ""
+                  , "[fn:1] First note."
+                  , ""
+                  , "[fn:2] Second note."
+                  ] =?>
+          para (mconcat
+                [ "Footnotes"
+                , note $ para ("First" <> space <> "note.")
+                , note $ para ("Second" <> space <> "note.")
+                ])
+
+      , "Footnote followed by header" =:
+          unlines [ "Another note[fn:yay]"
+                  , ""
+                  , "[fn:yay] This is great!"
+                  , ""
+                  , "** Headline"
+                  ] =?>
+          mconcat
+          [ para (mconcat
+                  [ "Another", space, "note"
+                  , note $ para ("This" <> space <> "is" <> space <> "great!")
+                  ])
+          , header 2 "Headline"
+          ]
       ]
 
   , testGroup "Lists" $
-- 
cgit v1.2.3


From 8276449520ba85c78f0b4e919fbc9bcf893a74f0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 15:05:03 +0200
Subject: Org reader: Allow for compact definition lists

Use `Text.Pandoc.Shared.compactify'DL` to allow for compact definition
lists.
---
 src/Text/Pandoc/Readers/Org.hs |  4 ++--
 tests/Tests/Readers/Org.hs     | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0bc0a2668..c71cc24be 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,7 +39,7 @@ import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
                                             , updateLastStrPos )
-import           Text.Pandoc.Shared (compactify')
+import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
@@ -665,7 +665,7 @@ list :: OrgParser (F Blocks)
 list = choice [ definitionList, bulletList, orderedList ] <?> "list"
 
 definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . sequence
+definitionList = fmap B.definitionList . fmap compactify'DL . sequence
                  <$> many1 (definitionListItem bulletListStart)
 
 bulletList :: OrgParser (F Blocks)
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 4cc405c0f..f62b73ce4 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -567,6 +567,18 @@ tests =
                                    ])
                          ]
 
+      , "Compact definition list" =:
+          unlines [ "- ATP :: adenosine 5' triphosphate"
+                  , "- DNA :: deoxyribonucleic acid"
+                  , "- PCR :: polymerase chain reaction"
+                  , ""
+                  ] =?>
+          definitionList
+          [ ("ATP", [ plain $ spcSep [ "adenosine", "5'", "triphosphate" ] ])
+          , ("DNA", [ plain $ spcSep [ "deoxyribonucleic", "acid" ] ])
+          , ("PCR", [ plain $ spcSep [ "polymerase", "chain", "reaction" ] ])
+          ]
+
       , "Loose bullet list" =:
           unlines [ "- apple"
                   , ""
-- 
cgit v1.2.3


From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 17:42:01 +0200
Subject: Org reader: Recognize plain and angle links

This adds support for plain links (like http://zeitlens.com) and angle
links (like <http://moltkeplatz.de>).
---
 src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++---------
 tests/Tests/Readers/Org.hs     | 14 ++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c71cc24be..7a50b1db9 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -45,7 +45,7 @@ import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
-import           Data.Char (toLower)
+import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
@@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where
 trimInlinesF :: F Inlines -> F Inlines
 trimInlinesF = liftM trimInlines
 
+returnF :: a -> OrgParser (F a)
+returnF = return . return
+
 
 -- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
 newline :: OrgParser Char
@@ -291,9 +294,6 @@ orgBlock = try $ do
     "src"     -> codeBlockWithAttr classArgs content
     _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
  where
-   returnF :: a -> OrgParser (F a)
-   returnF = return . return
-
    parseVerse :: String -> OrgParser (F Blocks)
    parseVerse cs =
        fmap B.para . mconcat . intersperse (pure B.linebreak)
@@ -834,7 +834,11 @@ noteMarker = try $ do
          ]
 
 linkOrImage :: OrgParser (F Inlines)
-linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
+linkOrImage = explicitOrImageLink
+              <|> selflinkOrImage
+              <|> angleLink
+              <|> plainLink
+              <?> "link or image"
 
 explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
@@ -851,15 +855,27 @@ explicitOrImageLink = try $ do
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  return . return $ if isImageFilename src
-                    then B.image src "" ""
-                    else B.link src "" (B.str src)
+  returnF $ if isImageFilename src
+            then B.image src "" ""
+            else B.link src "" (B.str src)
+
+plainLink :: OrgParser (F Inlines)
+plainLink = try $ do
+  (orig, src) <- uri
+  returnF $ B.link src "" (B.str orig)
+
+angleLink :: OrgParser (F Inlines)
+angleLink = try $ do
+  char '<'
+  link <- plainLink
+  char '>'
+  return link
 
 selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
+linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
 isImageFilename :: String -> Bool
 isImageFilename filename =
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index f62b73ce4..ed774f527 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -188,6 +188,20 @@ tests =
       , "Image link" =:
           "[[sunset.png][dusk.svg]]" =?>
           (para $ link "sunset.png" "" (image "dusk.svg" "" ""))
+
+      , "Plain link" =:
+          "Posts on http://zeitlens.com/ can be funny at times." =?>
+          (para $ spcSep [ "Posts", "on"
+                         , link "http://zeitlens.com/" "" "http://zeitlens.com/"
+                         , "can", "be", "funny", "at", "times."
+                         ])
+
+      , "Angle link" =:
+          "Look at <http://moltkeplatz.de> for fnords." =?>
+          (para $ spcSep [ "Look", "at"
+                         , link "http://moltkeplatz.de" "" "http://moltkeplatz.de"
+                         , "for", "fnords."
+                         ])
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3


From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 17:42:01 +0200
Subject: Org reader: Read anchors as empty spans

Anchors (like <<this>>) are parsed as empty spans.
---
 src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7a50b1db9..7f1893936 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -740,6 +740,7 @@ inline =
          , linebreak
          , footnote
          , linkOrImage
+         , anchor
          , str
          , endline
          , emph
@@ -886,6 +887,30 @@ isImageFilename filename =
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal.  Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor =  try $ pure <$> (B.spanWith <$> attributes <*> pure mempty)
+ where
+       name = string "<<"
+              *> many1 (noneOf "\t\n\r<>\"' ")
+              <* string ">>"
+       attributes = name >>= \n -> return (solidify n, [], [])
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+           | isAlphaNum c    = c
+           | c `elem` "_.-:" = c
+           | otherwise       = '-'
+
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
-- 
cgit v1.2.3


From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 25 Apr 2014 15:29:28 +0200
Subject: Org reader: Enable internal links

Internal links in Org are possible by using an anchor-name as the target
of a link:

[[some-anchor][This]] is an internal link.

It links <<some-anchor>> here.
---
 src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++----------
 tests/Tests/Readers/Org.hs     | 25 +++++++++++++++++++++
 2 files changed, 63 insertions(+), 12 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7f1893936..0e52bff90 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
+                      , orgStateAnchorIds            :: [String]
                       , orgStateBlockAttributes      :: OrgBlockAttributes
                       , orgStateEmphasisCharStack    :: [Char]
                       , orgStateEmphasisNewlines     :: Maybe Int
@@ -105,6 +106,7 @@ instance Default OrgParserState where
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
                         { orgStateOptions = def
+                        , orgStateAnchorIds = []
                         , orgStateBlockAttributes = M.empty
                         , orgStateEmphasisCharStack = []
                         , orgStateEmphasisNewlines = Nothing
@@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState
                         , orgStateNotes' = []
                         }
 
+recordAnchorId :: String -> OrgParser ()
+recordAnchorId i = updateState $ \s ->
+  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
 addBlockAttribute :: String -> String -> OrgParser ()
 addBlockAttribute key val = updateState $ \s ->
   let attrs = orgStateBlockAttributes s
@@ -848,17 +854,14 @@ explicitOrImageLink = try $ do
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ B.link src ""  <$>
-         if isImageFilename src && isImageFilename title
-            then return $ B.image title mempty mempty
-            else title'
+  return $ if isImageFilename src && isImageFilename title
+           then pure $ B.link src "" $ B.image title mempty mempty
+           else linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  returnF $ if isImageFilename src
-            then B.image src "" ""
-            else B.link src "" (B.str src)
+  return $ linkToInlinesF src (B.str src)
 
 plainLink :: OrgParser (F Inlines)
 plainLink = try $ do
@@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF s@('#':_) = pure . B.link s ""
+linkToInlinesF s
+    | isImageFilename s = const . pure $ B.image s "" ""
+    | isUri s           = pure . B.link s ""
+    | isRelativeUrl s   = pure . B.link s ""
+linkToInlinesF s = \title -> do
+  anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
+  if anchorB
+    then pure $ B.link ('#':s) "" title
+    else pure $ B.emph title
+
+isRelativeUrl :: String -> Bool
+isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+
+isUri :: String -> Bool
+isUri s = let (scheme, path) = break (== ':') s
+          in all (\c -> isAlphaNum c || c `elem` ".-") scheme
+             && not (null path)
+
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
@@ -894,12 +917,15 @@ isImageFilename filename =
 -- an anchor.
 
 anchor :: OrgParser (F Inlines)
-anchor =  try $ pure <$> (B.spanWith <$> attributes <*> pure mempty)
+anchor =  try $ do
+  anchorId <- parseAnchor
+  recordAnchorId anchorId
+  returnF $ B.spanWith (solidify anchorId, [], []) mempty
  where
-       name = string "<<"
-              *> many1 (noneOf "\t\n\r<>\"' ")
-              <* string ">>"
-       attributes = name >>= \n -> return (solidify n, [], [])
+       parseAnchor = string "<<"
+                     *> many1 (noneOf "\t\n\r<>\"' ")
+                     <* string ">>"
+                     <* skipSpaces
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index ed774f527..96747d148 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -202,6 +202,11 @@ tests =
                          , link "http://moltkeplatz.de" "" "http://moltkeplatz.de"
                          , "for", "fnords."
                          ])
+
+      , "Anchor" =:
+          "<<anchor>> Link here later." =?>
+          (para $ spanWith ("anchor", [], []) mempty <>
+                  "Link" <> space <> "here" <> space <> "later.")
       ]
 
   , testGroup "Meta Information" $
@@ -279,6 +284,26 @@ tests =
                   , ":END:"
                   ] =?>
           para (":FOO:" <> space <> ":END:")
+
+      , "Anchor reference" =:
+          unlines [ "<<link-here>> Target."
+                  , ""
+                  , "[[link-here][See here!]]"
+                  ] =?>
+          (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
+           para (link "#link-here" "" ("See" <> space <> "here!")))
+
+      , "Search links are read as emph" =:
+          "[[Wally][Where's Wally?]]" =?>
+          (para (emph $ "Where's" <> space <> "Wally?"))
+
+      , "Link to nonexistent anchor" =:
+          unlines [ "<<link-here>> Target."
+                  , ""
+                  , "[[link$here][See here!]]"
+                  ] =?>
+          (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
+           para (emph ("See" <> space <> "here!")))
       ]
 
   , testGroup "Basic Blocks" $
-- 
cgit v1.2.3


From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 30 Apr 2014 11:16:01 +0200
Subject: Org reader: Add support for custom link types

Org allows users to define their own custom link types.  E.g., in a
document with a lot of links to Wikipedia articles, one can define a
custom wikipedia link-type via

    #+LINK: wp https://en.wikipedia.org/wiki/

This allows to write [[wp:Org_mode][Org-mode]] instead of the
equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]].
---
 src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++-----
 tests/Tests/Readers/Org.hs     | 26 ++++++++++++++++
 2 files changed, 86 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0e52bff90..d68ef45ef 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,7 +43,7 @@ import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
-import           Control.Monad (foldM, guard, liftM, liftM2, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
@@ -51,6 +51,7 @@ import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
 import           Data.Maybe (listToMaybe, fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
+import           Network.HTTP (urlEncode)
 
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
@@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord]
 
 type OrgBlockAttributes = M.Map String String
 
+type OrgLinkFormatters = M.Map String (String -> String)
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
@@ -86,6 +89,7 @@ data OrgParserState = OrgParserState
                       , orgStateLastForbiddenCharPos :: Maybe SourcePos
                       , orgStateLastPreCharPos       :: Maybe SourcePos
                       , orgStateLastStrPos           :: Maybe SourcePos
+                      , orgStateLinkFormatters       :: OrgLinkFormatters
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
                       , orgStateNotes'               :: OrgNoteTable
@@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastForbiddenCharPos = Nothing
                         , orgStateLastPreCharPos = Nothing
                         , orgStateLastStrPos = Nothing
+                        , orgStateLinkFormatters = M.empty
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
                         , orgStateNotes' = []
@@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+addLinkFormat :: String
+              -> (String -> String)
+              -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+  let fs = orgStateLinkFormatters s
+  in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
 addToNotesTable :: OrgNoteRecord -> OrgParser ()
 addToNotesTable note = do
   oldnotes <- orgStateNotes' <$> getState
@@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks)
 specialLine = fmap return . try $ metaLine <|> commentLine
 
 metaLine :: OrgParser Blocks
-metaLine = try $ metaLineStart *> declarationLine
+metaLine = try $ mempty
+              <$ (metaLineStart *> (optionLine <|> declarationLine))
 
 commentLine :: OrgParser Blocks
 commentLine = try $ commentLineStart *> anyLine *> pure mempty
@@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
 commentLineStart :: OrgParser String
 commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 
-declarationLine :: OrgParser Blocks
+declarationLine :: OrgParser ()
 declarationLine = try $ do
   key <- metaKey
   inlinesF <- metaInlines
   updateState $ \st ->
     let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
     in st { orgStateMeta' = orgStateMeta' st <> meta' }
-  return mempty
+  return ()
 
 metaInlines :: OrgParser (F MetaValue)
 metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
@@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
 
+optionLine :: OrgParser ()
+optionLine = try $ do
+  key <- metaKey
+  case key of
+    "link" -> parseLinkFormat >>= uncurry addLinkFormat
+    _      -> mzero
+
+parseLinkFormat :: OrgParser ((String, String -> String))
+parseLinkFormat = try $ do
+  linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+  linkSubst <- parseFormat
+  return (linkType, linkSubst)
+
+-- | An ad-hoc, single-argument-only implementation of a printf-style format
+-- parser.
+parseFormat :: OrgParser (String -> String)
+parseFormat = try $ do
+  replacePlain <|> replaceUrl <|> justAppend
+ where
+   -- inefficient, but who cares
+   replacePlain = try $ (\x -> concat . flip intersperse x)
+                     <$> sequence [tillSpecifier 's', rest]
+   replaceUrl   = try $ (\x -> concat . flip intersperse x . urlEncode)
+                     <$> sequence [tillSpecifier 'h', rest]
+   justAppend   = try $ (++) <$> rest
+
+   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
+   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+
 --
 -- Headers
 --
@@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink
 explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
   char '['
-  src    <- linkTarget
+  srcF   <- applyCustomLinkFormat =<< linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if isImageFilename src && isImageFilename title
-           then pure $ B.link src "" $ B.image title mempty mempty
-           else linkToInlinesF src =<< title'
+  return $ do
+    src <- srcF
+    if isImageFilename src && isImageFilename title
+      then pure $ B.link src "" $ B.image title mempty mempty
+      else linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
@@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
+applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat link = do
+  let (linkType, rest) = break (== ':') link
+  return $ do
+    formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
+    return $ maybe link ($ drop 1 rest) formatter
+
+
 linkToInlinesF :: String -> Inlines -> F Inlines
 linkToInlinesF s@('#':_) = pure . B.link s ""
 linkToInlinesF s
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 96747d148..78684f0f1 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -304,6 +304,32 @@ tests =
                   ] =?>
           (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
            para (emph ("See" <> space <> "here!")))
+
+      , "Link abbreviation" =:
+          unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
+                  , "[[wp:Org_mode][Wikipedia on Org-mode]]"
+                  ] =?>
+          (para (link "https://en.wikipedia.org/wiki/Org_mode" ""
+                      ("Wikipedia" <> space <> "on" <> space <> "Org-mode")))
+
+      , "Link abbreviation, defined after first use" =:
+          unlines [ "[[zl:non-sense][Non-sense articles]]"
+                  , "#+LINK: zl http://zeitlens.com/tags/%s.html"
+                  ] =?>
+          (para (link "http://zeitlens.com/tags/non-sense.html" ""
+                      ("Non-sense" <> space <> "articles")))
+
+      , "Link abbreviation, URL encoded arguments" =:
+          unlines [ "#+link: expl http://example.com/%h/foo"
+                  , "[[expl:Hello, World!][Moin!]]"
+                  ] =?>
+          (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!"))
+
+      , "Link abbreviation, append arguments" =:
+          unlines [ "#+link: expl http://example.com/"
+                  , "[[expl:foo][bar]]"
+                  ] =?>
+          (para (link "http://example.com/foo" "" "bar"))
       ]
 
   , testGroup "Basic Blocks" $
-- 
cgit v1.2.3


From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 5 May 2014 14:39:25 +0200
Subject: Org reader: Read inline code blocks

Org's inline code blocks take forms like `src_haskell(print "hi")` and
are frequently used to include results from computations called from
within the document.  The blocks are read as inline code and marked with
the special class `rundoc-block`.  Proper handling and execution of
these blocks is the subject of a separate library, rundoc, which is
work in progress.

This closes #1278.
---
 src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++---
 tests/Tests/Readers/Org.hs     | 18 ++++++++++++++++++
 2 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index d68ef45ef..dba61dfe0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,6 +43,7 @@ import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
+import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
@@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence
              <$> many1 (listItem bulletListStart)
 
 orderedList :: OrgParser (F Blocks)
--- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
 orderedList = fmap B.orderedList . fmap compactify' . sequence
               <$> many1 (listItem orderedListStart)
 
@@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int
 definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
-  first <- anyLineNewline
+  line1 <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
-  contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
+  contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
   return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
@@ -789,6 +789,7 @@ inline =
          , footnote
          , linkOrImage
          , anchor
+         , inlineCodeBlock
          , str
          , endline
          , emph
@@ -989,6 +990,42 @@ solidify = map replaceSpecialChar
            | c `elem` "_.-:" = c
            | otherwise       = '-'
 
+-- | Parses an inline code block and marks it as an babel block.
+inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock = try $ do
+  string "src_"
+  lang <- many1 orgArgWordChar
+  opts <- option [] $ enclosedByPair '[' ']' blockOption
+  inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
+  let attrClasses = [translateLang lang, rundocBlockClass]
+  let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
+  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ where enclosedByPair s e p = char s *> many1Till p (char e)
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = "rundoc-block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+  skipSpaces *> char ':'
+             *> many1 orgArgWordChar
+             <* many1 spaceChar
+
+orgArgValue :: OrgParser String
+orgArgValue = try $
+  skipSpaces *> many1 orgArgWordChar
+             <* skipSpaces
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 78684f0f1..949976aba 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -207,6 +207,24 @@ tests =
           "<<anchor>> Link here later." =?>
           (para $ spanWith ("anchor", [], []) mempty <>
                   "Link" <> space <> "here" <> space <> "later.")
+
+      , "Inline code block" =:
+          "src_emacs-lisp{(message \"Hello\")}" =?>
+          (para $ codeWith ( ""
+                           , [ "commonlisp", "rundoc-block" ]
+                           , [ ("rundoc-language", "emacs-lisp") ])
+                           "(message \"Hello\")")
+
+      , "Inline code block with arguments" =:
+          "src_sh[:export both :results output]{echo 'Hello, World'}" =?>
+          (para $ codeWith ( ""
+                           , [ "bash", "rundoc-block" ]
+                           , [ ("rundoc-language", "sh")
+                             , ("rundoc-export", "both")
+                             , ("rundoc-results", "output")
+                             ]
+                           )
+                           "echo 'Hello, World'")
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3


From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 8 May 2014 17:01:58 +0200
Subject: Org reader: refactor #+BEGIN..#+END block parsing code

---
 src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++--------------
 1 file changed, 80 insertions(+), 42 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index dba61dfe0..9df8ce0b3 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -50,7 +50,7 @@ import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
-import           Data.Maybe (listToMaybe, fromMaybe, isJust)
+import           Data.Maybe (fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 import           Network.HTTP (urlEncode)
 
@@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s ->
   s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
 
 surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+surroundingEmphasisChar =
+  take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
 
 startEmphasisNewlinesCounting :: Int -> OrgParser ()
 startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
@@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
 
 decEmphasisNewlinesCount :: OrgParser ()
 decEmphasisNewlinesCount = updateState $ \s ->
-      s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+  s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
 
 newlinesCountWithinLimits :: OrgParser Bool
 newlinesCountWithinLimits = do
@@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
 
+type BlockProperties = (Int, String)  -- (Indentation, Block-Type)
+
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
-  (indent, blockType, args) <- blockHeader
-  content <- rawBlockContent indent blockType
-  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
-  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
-  case blockType of
-    "comment" -> return mempty
-    "html"    -> returnF $ B.rawBlock "html" content
-    "latex"   -> returnF $ B.rawBlock "latex" content
-    "ascii"   -> returnF $ B.rawBlock "ascii" content
-    "example" -> returnF $ exampleCode content
-    "quote"   -> return  $ B.blockQuote <$> contentBlocks
-    "verse"   -> parseVerse content
-    "src"     -> codeBlockWithAttr classArgs content
-    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+  blockProp@(_, blkType) <- blockHeaderStart
+  ($ blockProp) $
+    case blkType of
+      "comment" -> withRaw'   (const mempty)
+      "html"    -> withRaw'   (return . (B.rawBlock blkType))
+      "latex"   -> withRaw'   (return . (B.rawBlock blkType))
+      "ascii"   -> withRaw'   (return . (B.rawBlock blkType))
+      "example" -> withRaw'   (return . exampleCode)
+      "quote"   -> withParsed (fmap B.blockQuote)
+      "verse"   -> verseBlock
+      "src"     -> codeBlock
+      _         -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indent <*> blockType
  where
-   parseVerse :: String -> OrgParser (F Blocks)
-   parseVerse cs =
-       fmap B.para . mconcat . intersperse (pure B.linebreak)
-       <$> mapM (parseFromString parseInlines) (lines cs)
-
-blockHeader :: OrgParser (Int, String, [String])
-blockHeader = (,,) <$> blockIndent
-                   <*> blockType
-                   <*> (skipSpaces *> blockArgs)
- where blockIndent = length <$> many spaceChar
-       blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
-       blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
-
-codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
-codeBlockWithAttr classArgs content = do
-  identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
-  caption <- lookupInlinesAttr "caption"
-  let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
+  indent    = length      <$> many spaceChar
+  blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+
+withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+  ignHeaders
+  content <- rawBlockContent blkProp
+  fmap B.para . mconcat . intersperse (pure B.linebreak)
+    <$> mapM (parseFromString parseInlines) (lines content)
+
+codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock blkProp = do
+  skipSpaces
+  language      <- optionMaybe orgArgWord
+  (classes, kv) <- codeHeaderArgs
+  id'           <- fromMaybe "" <$> lookupBlockAttribute "name"
+  caption       <- lookupInlinesAttr "caption"
+  content       <- rawBlockContent blkProp
+  let attr      = ( id'
+                  , maybe id (\l -> (l:)) language $ classes
+                  , kv )
+  let codeBlck  = B.codeBlockWith attr content
   return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
  where
    labelDiv blk value =
@@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do
                                        <*> pure blk)
    labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
 
-rawBlockContent :: Int -> String -> OrgParser String
-rawBlockContent indent blockType =
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  where
-   indentedLine = try $ choice [ blankline         *> pure "\n"
-                               , indentWith indent *> anyLine
-                               ]
-   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+   indentedLine = try $
+     choice [ blankline         *> pure "\n"
+            , indentWith indent *> anyLine
+            ]
+   blockEnder = try $
+     indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+  raw <- rawBlockContent blkProps
+  parseFromString parseBlocks (raw ++ "\n")
 
 -- indent by specified number of spaces (or equiv. tabs)
 indentWith :: Int -> OrgParser String
@@ -356,6 +383,13 @@ indentWith num = do
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs =
+  (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline
+
 translateLang :: String -> String
 translateLang "C"          = "c"
 translateLang "C++"        = "cpp"
@@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
  where enclosedByPair s e p = char s *> many1Till p (char e)
 
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
 -- | The class-name used to mark rundoc blocks.
 rundocBlockClass :: String
-rundocBlockClass = "rundoc-block"
+rundocBlockClass = rundocPrefix ++ "block"
 
 blockOption :: OrgParser (String, String)
 blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
-- 
cgit v1.2.3


From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 9 May 2014 18:07:37 +0200
Subject: Org reader: Support arguments for code blocks

The general form of source block headers
(`#+BEGIN_SRC <language> <switches> <header arguments>`) was not
recognized by the reader.  This patch adds support for the above form,
adds header arguments to the block's key-value pairs and marks the block
as a rundoc block if header arguments are present.

This closes #1286.
---
 src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------
 tests/Tests/Readers/Org.hs     | 14 ++++++
 2 files changed, 70 insertions(+), 42 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 9df8ce0b3..c05ac92d0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -276,7 +276,7 @@ parseBlockAttributes = do
  where
    attribute :: OrgParser (String, String)
    attribute = try $ do
-         key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
+         key <- metaLineStart *> many1Till nonspaceChar (char ':')
          val <- skipSpaces *> anyLine
          return (map toLower key, val)
 
@@ -342,16 +342,11 @@ verseBlock blkProp = try $ do
 codeBlock :: BlockProperties -> OrgParser (F Blocks)
 codeBlock blkProp = do
   skipSpaces
-  language      <- optionMaybe orgArgWord
-  (classes, kv) <- codeHeaderArgs
+  (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
   id'           <- fromMaybe "" <$> lookupBlockAttribute "name"
-  caption       <- lookupInlinesAttr "caption"
   content       <- rawBlockContent blkProp
-  let attr      = ( id'
-                  , maybe id (\l -> (l:)) language $ classes
-                  , kv )
-  let codeBlck  = B.codeBlockWith attr content
-  return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
+  let codeBlck  = B.codeBlockWith ( id', classes, kv ) content
+  maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
  where
    labelDiv blk value =
        B.divWith nullAttr <$> (mappend <$> labelledBlock value
@@ -383,12 +378,33 @@ indentWith num = do
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
+type SwitchOption = (Char, Maybe String)
+
 orgArgWord :: OrgParser String
 orgArgWord = many1 orgArgWordChar
 
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
 codeHeaderArgs :: OrgParser ([String], [(String, String)])
-codeHeaderArgs =
-  (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline
+codeHeaderArgs = try $ do
+  language   <- skipSpaces *> orgArgWord
+  _          <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+  parameters <- manyTill blockOption newline
+  let pandocLang = translateLang language
+  return $
+    if hasRundocParameters parameters
+    then ( [ pandocLang, rundocBlockClass ]
+         , map toRundocAttrib (("language", language) : parameters)
+         )
+    else ([ pandocLang ], parameters)
+ where hasRundocParameters = not . null
+
+switch :: OrgParser SwitchOption
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+   simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+   lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+                       (string "-l \"" *> many1Till nonspaceChar (char '"'))
 
 translateLang :: String -> String
 translateLang "C"          = "c"
@@ -401,6 +417,32 @@ translateLang "sh"         = "bash"
 translateLang "sqlite"     = "sql"
 translateLang cs = cs
 
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+  skipSpaces *> char ':'
+             *> many1 orgArgWordChar
+
+orgArgValue :: OrgParser String
+orgArgValue = try $
+  skipSpaces *> many1 orgArgWordChar <* skipSpaces
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
 commaEscaped :: String -> String
 commaEscaped (',':cs@('*':_))     = cs
 commaEscaped (',':cs@('#':'+':_)) = cs
@@ -425,7 +467,7 @@ drawer = try $ do
 
 drawerStart :: OrgParser String
 drawerStart = try $
-  skipSpaces *> drawerName <* skipSpaces <* newline
+  skipSpaces *> drawerName <* skipSpaces <* P.newline
  where drawerName = try $  char ':' *> validDrawerName <* char ':'
        validDrawerName =  stringAnyCase "PROPERTIES"
                           <|> stringAnyCase "LOGBOOK"
@@ -435,7 +477,7 @@ drawerLine = try anyLine
 
 drawerEnd :: OrgParser String
 drawerEnd = try $
-  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline
 
 
 --
@@ -446,7 +488,7 @@ drawerEnd = try $
 figure :: OrgParser (F Blocks)
 figure = try $ do
   (cap, nam) <- nameAndCaption
-  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+  src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
   guard (isImageFilename src)
   return $ do
     cap' <- cap
@@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
  where enclosedByPair s e p = char s *> many1Till p (char e)
 
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
-blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
-
-orgArgKey :: OrgParser String
-orgArgKey = try $
-  skipSpaces *> char ':'
-             *> many1 orgArgWordChar
-             <* many1 spaceChar
-
-orgArgValue :: OrgParser String
-orgArgValue = try $
-  skipSpaces *> many1 orgArgWordChar
-             <* skipSpaces
-
-orgArgWordChar :: OrgParser Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 949976aba..a78e8861f 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -822,6 +822,20 @@ tests =
            in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
                       , codeBlockWith attr' code'
                       ]
+      , "Source block with rundoc/babel arguments" =:
+           unlines [ "#+BEGIN_SRC emacs-lisp :exports both"
+                   , "(progn (message \"Hello, World!\")"
+                   , "       (+ 23 42))"
+                   , "#+END_SRC" ] =?>
+           let classes = [ "commonlisp"  -- as kate doesn't know emacs-lisp syntax
+                         , "rundoc-block"
+                         ]
+               params = [ ("rundoc-language", "emacs-lisp")
+                        , ("rundoc-exports", "both")
+                        ]
+               code' = unlines [ "(progn (message \"Hello, World!\")"
+                               , "       (+ 23 42))" ]
+           in codeBlockWith ("", classes, params) code'
 
       , "Example block" =:
            unlines [ "#+begin_example"
-- 
cgit v1.2.3


From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 9 May 2014 18:23:23 +0200
Subject: Org reader: Fix parsing of blank lines within blocks

Blank lines were parsed as two newlines instead of just one.
Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out.
---
 src/Text/Pandoc/Readers/Org.hs | 8 ++------
 tests/Tests/Readers/Org.hs     | 9 +++++++++
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c05ac92d0..0f218d43f 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String
 rawBlockContent (indent, blockType) = try $
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  where
-   indentedLine = try $
-     choice [ blankline         *> pure "\n"
-            , indentWith indent *> anyLine
-            ]
-   blockEnder = try $
-     indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+   indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
+   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
 
 parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
 parsedBlockContent blkProps = try $ do
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index a78e8861f..87b0d0c90 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -920,5 +920,14 @@ tests =
                              (unlines [ "fmap id = id"
                                       , "fmap (p . q) = (fmap p) . (fmap q)"
                                       ])))
+
+      , "Convert blank lines in blocks to single newlines" =:
+          unlines [ "#+begin_html"
+                  , ""
+                  , "<span>boring</span>"
+                  , ""
+                  , "#+end_html"
+                  ] =?>
+          rawBlock "html" "\n<span>boring</span>\n\n"
       ]
   ]
-- 
cgit v1.2.3


From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 10 May 2014 11:25:20 +0200
Subject: Org reader: Fix block parameter reader, relax constraints

The reader produced wrong results for block containing non-letter chars
in their parameter arguments.  This patch relaxes constraints in that it
allows block header arguments to contain any non-space character (except
for ']' for inline blocks).

Thanks to Xiao Hanyu for noticing this.
---
 src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------
 tests/Tests/Readers/Org.hs     | 12 ++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0f218d43f..2e4a29beb 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String)
 blockHeaderStart = try $ (,) <$> indent <*> blockType
  where
   indent    = length      <$> many spaceChar
-  blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+  blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
 
 withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
 withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
@@ -422,16 +422,23 @@ rundocBlockClass :: String
 rundocBlockClass = rundocPrefix ++ "block"
 
 blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
+
+inlineBlockOption :: OrgParser (String, String)
+inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
 
 orgArgKey :: OrgParser String
 orgArgKey = try $
   skipSpaces *> char ':'
              *> many1 orgArgWordChar
 
-orgArgValue :: OrgParser String
-orgArgValue = try $
-  skipSpaces *> many1 orgArgWordChar <* skipSpaces
+orgParamValue :: OrgParser String
+orgParamValue = try $
+  skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
+
+orgInlineParamValue :: OrgParser String
+orgInlineParamValue = try $
+  skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
 
 orgArgWordChar :: OrgParser Char
 orgArgWordChar = alphaNum <|> oneOf "-_"
@@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines)
 inlineCodeBlock = try $ do
   string "src_"
   lang <- many1 orgArgWordChar
-  opts <- option [] $ enclosedByPair '[' ']' blockOption
+  opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
   inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 87b0d0c90..4ef7a7731 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -929,5 +929,17 @@ tests =
                   , "#+end_html"
                   ] =?>
           rawBlock "html" "\n<span>boring</span>\n\n"
+
+      , "Non-letter chars in source block parameters" =:
+          unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich"
+                  , "code body"
+                  , "#+END_SRC"
+                  ] =?>
+          let classes = [ "c", "rundoc-block" ]
+              params  = [ ("rundoc-language", "C")
+                        , ("rundoc-tangle", "xxxx.c")
+                        , ("rundoc-city", "Zürich")
+                        ]
+          in codeBlockWith ( "", classes, params) "code body\n"
       ]
   ]
-- 
cgit v1.2.3


From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:45:37 +0200
Subject: Introduce class HasLastStrPosition, generalize functions

Both `ParserState` and `OrgParserState` keep track of the parser position at
which the last string ended.  This patch introduces a new class
`HasLastStrPosition` and makes the above types instances of that class.  This
enables the generalization of functions updating the state or checking if one
is right after a string.
---
 src/Text/Pandoc/Parsing.hs          | 32 +++++++++++++++++++++++---------
 src/Text/Pandoc/Readers/Markdown.hs | 11 +++--------
 src/Text/Pandoc/Readers/Org.hs      | 11 ++++-------
 3 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1e55cbc4..344f6c7ba 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
                              withRaw,
                              escaped,
                              characterReference,
-                             updateLastStrPos,
                              anyOrderedListMarker,
                              orderedListMarker,
                              charRef,
@@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
                              testStringWith,
                              guardEnabled,
                              guardDisabled,
+                             updateLastStrPos,
+                             notAfterString,
                              ParserState (..),
                              HasReaderOptions (..),
                              HasHeaderMap (..),
                              HasIdentifierList (..),
                              HasMacros (..),
+                             HasLastStrPosition (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -904,6 +906,14 @@ instance HasMacros ParserState where
   extractMacros        = stateMacros
   updateMacros f st    = st{ stateMacros = f $ stateMacros st }
 
+class HasLastStrPosition st where
+  setLastStrPos  :: SourcePos -> st -> st
+  getLastStrPos  :: st -> Maybe SourcePos
+
+instance HasLastStrPosition ParserState where
+  setLastStrPos pos st = st{ stateLastStrPos = Just pos }
+  getLastStrPos st     = stateLastStrPos st
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
@@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
 guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
 
+-- | Update the position on which the last string ended.
+updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos
+
+-- | Whether we are right after the end of a string.
+notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString = do
+  pos <- getPosition
+  st  <- getState
+  return $ getLastStrPos st /= Just pos
+
 data HeaderType
     = SingleHeader Char  -- ^ Single line of characters underneath
     | DoubleHeader Char  -- ^ Lines of characters above and below
@@ -1049,17 +1070,11 @@ charOrRef cs =
                        guard (c `elem` cs)
                        return c)
 
-updateLastStrPos :: Parser [Char] ParserState ()
-updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ stateLastStrPos = Just p }
-
 singleQuoteStart :: Parser [Char] ParserState ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
-  pos <- getPosition
-  st <- getState
   -- single quote start can't be right after str
-  guard $ stateLastStrPos st /= Just pos
+  guard =<< notAfterString
   () <$ charOrRef "'\8216\145"
 
 singleQuoteEnd :: Parser [Char] st ()
@@ -1156,4 +1171,3 @@ applyMacros' target = do
      then do macros <- extractMacros `fmap` getState
              return $ applyMacros macros target
      else return target
-
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d1637b701..1ac98e94c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1474,9 +1474,7 @@ strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
   where  checkIntraword = do
            exts <- getOption readerExtensions
            when (Ext_intraword_underscores `Set.member` exts) $ do
-             pos <- getPosition
-             lastStrPos <- stateLastStrPos <$> getState
-             guard $ lastStrPos /= Just pos
+             guard =<< notAfterString
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
 str :: MarkdownParser (F Inlines)
 str = do
   result <- many1 alphaNum
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
   isSmart <- getOption readerSmart
   if isSmart
@@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
   -- make sure we're not right after an alphanumeric,
   -- since foo@bar.baz is probably an email address
-  lastStrPos <- stateLastStrPos <$> getState
-  pos <- getPosition
-  guard $ lastStrPos /= Just pos
+  guard =<< notAfterString
   suppress_author <- option False (char '-' >> return True)
   char '@'
   first <- letter <|> char '_'
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2e4a29beb..5dbcaee98 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
   deleteMeta field st =
     st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 
+instance HasLastStrPosition OrgParserState where
+  getLastStrPos = orgStateLastStrPos
+  setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
 instance Default OrgParserState where
   def = defaultOrgParserState
 
@@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do
   lastPrePos <- orgStateLastPreCharPos <$> getState
   return . fromMaybe True $ (== pos) <$> lastPrePos
 
--- | Whether we are right after the end of a string
-notAfterString :: OrgParser Bool
-notAfterString = do
-  pos <- getPosition
-  lastStrPos <- orgStateLastStrPos <$> getState
-  return $ lastStrPos /= Just pos
-
 -- | Whether the parser is right after a forbidden border char
 notAfterForbiddenBorderChar :: OrgParser Bool
 notAfterForbiddenBorderChar = do
-- 
cgit v1.2.3


From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:49:30 +0200
Subject: Org reader: support Pandocs citation extension

Citations are defined via the "normal citation" syntax used in markdown,
with the sole difference that newlines are not allowed between "[...]".
This is for consistency, as org-mode generally disallows newlines
between square brackets.

The extension is turned on by default and can be turned off via the
default syntax-extension mechanism, i.e. by specifying "org-citation" as
the input format.
Move `citeKey` from Readers.Markdown into Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc.hs             |  2 +-
 src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++--
 tests/Tests/Readers/Org.hs     | 22 +++++++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index dd5bc18f6..130338f0e 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
+getDefaultExtensions "org"             = Set.fromList [Ext_citations]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
@@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
-
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dbcaee98..86dda2732 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , cite
          , footnote
          , linkOrImage
          , anchor
@@ -933,6 +934,51 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+cite :: OrgParser (F Inlines)
+cite = try $ do
+  guardEnabled Ext_citations
+  (cs, raw) <- withRaw normalCite
+  return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $  char '['
+                 *> skipSpaces
+                 *> citeList
+                 <* skipSpaces
+                 <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+  pref <- prefix
+  (suppress_author, key) <- citeKey
+  suff <- suffix
+  return $ do
+    x <- pref
+    y <- suff
+    return $ Citation{ citationId      = key
+                     , citationPrefix  = B.toList x
+                     , citationSuffix  = B.toList y
+                     , citationMode    = if suppress_author
+                                            then SuppressAuthor
+                                            else NormalCitation
+                     , citationNoteNum = 0
+                     , citationHash    = 0
+                     }
+ where
+   prefix = trimInlinesF . mconcat <$>
+            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+   suffix = try $ do
+     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+     skipSpaces
+     rest <- trimInlinesF . mconcat <$>
+             many (notFollowedBy (oneOf ";]") *> inline)
+     return $ if hasSpace
+              then (B.space <>) <$> rest
+              else rest
+
 footnote :: OrgParser (F Inlines)
 footnote = try $ inlineNote <|> referencedNote
 
@@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
 
 applyCustomLinkFormat :: String -> OrgParser (F String)
 applyCustomLinkFormat link = do
@@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
+enclosedByPair :: Char          -- ^ opening char
+               -> Char          -- ^ closing char
+               -> OrgParser a   -- ^ parser
+               -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
 
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 4ef7a7731..ca97ba348 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -225,6 +225,28 @@ tests =
                              ]
                            )
                            "echo 'Hello, World'")
+
+      , "Citation" =:
+          "[@nonexistent]" =?>
+          let citation = Citation
+                         { citationId = "nonexistent"
+                         , citationPrefix = []
+                         , citationSuffix = []
+                         , citationMode = NormalCitation
+                         , citationNoteNum = 0
+                         , citationHash = 0}
+          in (para $ cite [citation] "[@nonexistent]")
+
+      , "Citation containing text" =:
+          "[see @item1 p. 34-35]" =?>
+          let citation = Citation
+                         { citationId = "item1"
+                         , citationPrefix = [Str "see"]
+                         , citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
+                         , citationMode = NormalCitation
+                         , citationNoteNum = 0
+                         , citationHash = 0}
+          in (para $ cite [citation] "[see @item1 p. 34-35]")
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3


From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 20 May 2014 22:29:21 +0200
Subject: Org reader: support for inline LaTeX

Inline LaTeX is now accepted and parsed by the org-mode reader.  Both,
math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be
used without any further escaping.
---
 src/Text/Pandoc/Readers/LaTeX.hs |  1 +
 src/Text/Pandoc/Readers/Org.hs   | 32 +++++++++++++++++++++++++++++++-
 tests/Tests/Readers/Org.hs       | 27 +++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6f870318f..7fc587882 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document.
 module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
+                                   inlineCommand,
                                    handleIncludes
                                  ) where
 
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 86dda2732..c3ea8d7c2 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,12 +39,15 @@ import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
                                             , updateLastStrPos )
+import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import           Text.Pandoc.Shared (compactify', compactify'DL)
+import           Text.Parsec.Pos (updatePosString)
+import           Text.TeXMath (texMathToPandoc, DisplayType(..))
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow (first)
-import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
@@ -886,6 +889,7 @@ inline =
          , verbatim
          , subscript
          , superscript
+         , inlineLaTeX
          , symbol
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
@@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $
          , mappend <$> option [] ((:[]) <$> oneOf "+-")
                    <*> many1 alphaNum
          ]
+
+inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX = try $ do
+  cmd <- inlineLaTeXCommand
+  maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
+ where
+   parseAsMath :: String -> Maybe Inlines
+   parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
+
+   parseAsInlineLaTeX :: String -> Maybe Inlines
+   parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+
+   state :: ParserState
+   state = def{ stateOptions = def{ readerParseRaw = True }}
+
+maybeRight :: Either a b -> Maybe b
+maybeRight = either (const Nothing) Just
+
+inlineLaTeXCommand :: OrgParser String
+inlineLaTeXCommand = try $ do
+  rest <- getInput
+  pos <- getPosition
+  case runParser rawLaTeXInline def "source" rest of
+    Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
+                                 <* (setPosition $ updatePosString pos cs)
+    _ -> mzero
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index ca97ba348..4ed77887f 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -247,6 +247,33 @@ tests =
                          , citationNoteNum = 0
                          , citationHash = 0}
           in (para $ cite [citation] "[see @item1 p. 34-35]")
+
+      , "Inline LaTeX symbol" =:
+          "\\dots" =?>
+          para "…"
+
+      , "Inline LaTeX command" =:
+          "\\textit{Emphasised}" =?>
+          para (emph "Emphasised")
+
+      , "Inline LaTeX math symbol" =:
+          "\\tau" =?>
+          para (emph "τ")
+
+      , "Unknown inline LaTeX command" =:
+          "\\notacommand{foo}" =?>
+          para (rawInline "latex" "\\notacommand{foo}")
+
+      , "LaTeX citation" =:
+          "\\cite{Coffee}" =?>
+          let citation = Citation
+                         { citationId = "Coffee"
+                         , citationPrefix = []
+                         , citationSuffix = []
+                         , citationMode = AuthorInText
+                         , citationNoteNum = 0
+                         , citationHash = 0}
+          in (para . cite [citation] $ rawInline "latex" "\\cite{Coffee}")
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3


From 01ef573ac2f6620e9f70ae8965e5ccc664e3aec4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 14:18:06 -0700
Subject: Org reader:  fixed #1342.

This change rewrites `inlineLaTeXCommand` so that parsec will
know when input is being consumed.  Previously a run-time
error would be produced with some input involving raw latex.
(I believe this does not affect the last release, as the inline
latex reading was added recently.)
---
 src/Text/Pandoc/Readers/Org.hs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c3ea8d7c2..0e872abf0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
-                                            , updateLastStrPos )
+                                            )
 import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import           Text.Pandoc.Shared (compactify', compactify'DL)
-import           Text.Parsec.Pos (updatePosString)
 import           Text.TeXMath (texMathToPandoc, DisplayType(..))
 
 import           Control.Applicative ( Applicative, pure
@@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser ()
 resetBlockAttributes = updateState $ \s ->
   s{ orgStateBlockAttributes = orgStateBlockAttributes def }
 
-updateLastStrPos :: OrgParser ()
-updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastStrPos = Just p }
-
 updateLastForbiddenCharPos :: OrgParser ()
 updateLastForbiddenCharPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
@@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just
 inlineLaTeXCommand :: OrgParser String
 inlineLaTeXCommand = try $ do
   rest <- getInput
-  pos <- getPosition
   case runParser rawLaTeXInline def "source" rest of
-    Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
-                                 <* (setPosition $ updatePosString pos cs)
+    Right (RawInline _ cs) -> do
+      let len = length cs
+      count len anyChar
+      return cs
     _ -> mzero
-- 
cgit v1.2.3


From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 22:03:26 -0700
Subject: Org reader:  make tildes create inline code.

Closes #1345.  Also relabeled 'code' and 'verbatim' parsers
to accord with the org-mode manual.

I'm not sure what the distinction between code and verbatim
is supposed to be, but I'm pretty sure both should be represented
as Code inlines in pandoc.  The previous behavior resulted in the
text not appearing in any output format.
---
 src/Text/Pandoc/Readers/Org.hs | 8 ++++----
 tests/Tests/Readers/Org.hs     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org.hs')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0e872abf0..7a35e2ca0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout    <$> emphasisBetween '+'
 underline :: OrgParser (F Inlines)
 underline = fmap B.strong       <$> emphasisBetween '_'
 
-code      :: OrgParser (F Inlines)
-code      = return . B.code         <$> verbatimBetween '='
-
 verbatim  :: OrgParser (F Inlines)
-verbatim  = return . B.rawInline "" <$> verbatimBetween '~'
+verbatim  = return . B.code     <$> verbatimBetween '='
+
+code      :: OrgParser (F Inlines)
+code      = return . B.code     <$> verbatimBetween '~'
 
 subscript   :: OrgParser (F Inlines)
 subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 4ed77887f..f8240ca3d 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -50,13 +50,13 @@ tests =
           "+Kill Bill+" =?>
           para (strikeout . spcSep $ [ "Kill", "Bill" ])
 
-      , "Code" =:
+      , "Verbatim" =:
           "=Robot.rock()=" =?>
           para (code "Robot.rock()")
 
-      , "Verbatim" =:
+      , "Code" =:
           "~word for word~" =?>
-          para (rawInline "" "word for word")
+          para (code "word for word")
 
       , "Math $..$" =:
           "$E=mc^2$" =?>
-- 
cgit v1.2.3