diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 68 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 26 |
3 files changed, 89 insertions, 9 deletions
diff --git a/.gitignore b/.gitignore index 98fe2fd6d..97150be15 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,13 @@ README.* !README.Debian INSTALL.* .configure-stamp +.cabal-sandbox +cabal.sandbox.config +pandoc.cabal.orig man/man?/*.1 man/man?/*.5 man/man?/*.html *.diff -pandoc.cabal.orig *.o *.hi *.pyc diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e52bff90..d68ef45ef 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String +type OrgLinkFormatters = M.Map String (String -> String) + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -86,6 +89,7 @@ data OrgParserState = OrgParserState , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable @@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] @@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + addToNotesTable :: OrgNoteRecord -> OrgParser () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState @@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ metaLineStart *> declarationLine +metaLine = try $ mempty + <$ (metaLineStart *> (optionLine <|> declarationLine)) commentLine :: OrgParser Blocks commentLine = try $ commentLineStart *> anyLine *> pure mempty @@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" commentLineStart :: OrgParser String commentLineStart = try $ mappend <$> many spaceChar <*> string "# " -declarationLine :: OrgParser Blocks +declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey inlinesF <- metaInlines updateState $ \st -> let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta in st { orgStateMeta' = orgStateMeta' st <> meta' } - return mempty + return () metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + _ -> mzero + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + -- -- Headers -- @@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- linkTarget + srcF <- applyCustomLinkFormat =<< linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + return $ do + src <- srcF + if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat link = do + let (linkType, rest) = break (== ':') link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter + + linkToInlinesF :: String -> Inlines -> F Inlines linkToInlinesF s@('#':_) = pure . B.link s "" linkToInlinesF s diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 96747d148..78684f0f1 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -304,6 +304,32 @@ tests = ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> para (emph ("See" <> space <> "here!"))) + + , "Link abbreviation" =: + unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s" + , "[[wp:Org_mode][Wikipedia on Org-mode]]" + ] =?> + (para (link "https://en.wikipedia.org/wiki/Org_mode" "" + ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))) + + , "Link abbreviation, defined after first use" =: + unlines [ "[[zl:non-sense][Non-sense articles]]" + , "#+LINK: zl http://zeitlens.com/tags/%s.html" + ] =?> + (para (link "http://zeitlens.com/tags/non-sense.html" "" + ("Non-sense" <> space <> "articles"))) + + , "Link abbreviation, URL encoded arguments" =: + unlines [ "#+link: expl http://example.com/%h/foo" + , "[[expl:Hello, World!][Moin!]]" + ] =?> + (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")) + + , "Link abbreviation, append arguments" =: + unlines [ "#+link: expl http://example.com/" + , "[[expl:foo][bar]]" + ] =?> + (para (link "http://example.com/foo" "" "bar")) ] , testGroup "Basic Blocks" $ |