From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 09:37:46 +0200 Subject: Org reader: Fix parsing of nested inlines Text such as /*this*/ was not correctly parsed as a strong, emphasised word. This was due to the end-of-word recognition being to strict as it did not accept markup chars as part of a word. The fix involves an additional parser state field, listing the markup chars which might be parsed as part of a word. --- src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0ae4d231c..ad66caab9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -535,8 +535,15 @@ enclosedInlines start end = try $ -- FIXME: This is a hack inlinesEnclosedBy :: Char -> OrgParser Inlines -inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) +inlinesEnclosedBy c = try $ do + updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } + res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + return res + where shift xs + | null xs = [] + | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -561,11 +568,16 @@ atStart p = do -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars +atEnd p = try $ do + p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars -postWordChars :: [Char] -postWordChars = "\t\n\r !\"'),-.:?}" +postWordChars :: OrgParser [Char] +postWordChars = do + st <- getState + return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) + where safeSecond (_:x2:_) = [x2] + safeSecond _ = [] -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -580,9 +592,10 @@ endsOnThisLine :: [Char] -> ([Char] -> OrgParser ()) -> OrgParser () endsOnThisLine input c doOnOtherLines = do + postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest - (_,_:rest@(n:_)) -> if n `elem` postWordChars + (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero -- cgit v1.2.3