{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- | Module : Text.Pandoc.Writers.Org Copyright : © 2010-2015 Puneeth Chaganti 2010-2021 John MacFarlane 2016-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel Stability : alpha Portability : portable Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: -} module Text.Pandoc.Writers.Org (writeOrg) where import Control.Monad.State.Strict import Data.Char (isAlphaNum, isDigit) import Data.List (intersect, intersperse, partition, transpose) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared data WriterState = WriterState { stNotes :: [[Block]] , stHasMath :: Bool , stOptions :: WriterOptions } type Org = StateT WriterState -- | Convert Pandoc to Org. writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOrg opts document = do let st = WriterState { stNotes = [], stHasMath = False, stOptions = opts } evalStateT (pandocToOrg document) st -- | Return Org representation of document. pandocToOrg :: PandocMonad m => Pandoc -> Org m Text pandocToOrg (Pandoc meta blocks) = do opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing metadata <- metaToContext opts blockListToOrg (fmap chomp . inlineListToOrg) meta body <- blockListToOrg blocks notes <- gets (reverse . stNotes) >>= notesToOrg hasMath <- gets stHasMath let main = body $+$ notes let context = defField "body" main . defField "math" hasMath $ metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context -- | Return Org representation of notes. notesToOrg :: PandocMonad m => [[Block]] -> Org m (Doc Text) notesToOrg notes = vsep <$> zipWithM noteToOrg [1..] notes -- | Return Org representation of a note. noteToOrg :: PandocMonad m => Int -> [Block] -> Org m (Doc Text) noteToOrg num note = do contents <- blockListToOrg note let marker = "[fn:" ++ show num ++ "] " return $ hang (length marker) (text marker) contents -- | Escape special characters for Org. escapeString :: Text -> Text escapeString t | T.all (\c -> c < '\x2013' || c > '\x2026') t = t | otherwise = T.concatMap escChar t where escChar '\x2013' = "--" escChar '\x2014' = "---" escChar '\x2019' = "'" escChar '\x2026' = "..." escChar c = T.singleton c isRawFormat :: Format -> Bool isRawFormat f = f == Format "latex" || f == Format "tex" || f == Format "org" -- | Convert Pandoc block element to Org. blockToOrg :: PandocMonad m => Block -- ^ Block element -> Org m (Doc Text) blockToOrg Null = return empty blockToOrg (Div attr bs) = divToOrg attr bs blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image attr txt (src,tgt)]) | Just tit <- T.stripPrefix "fig:" tgt = do capt <- if null txt then return empty else ("#+caption: " <>) `fmap` inlineListToOrg txt img <- inlineToOrg (Image attr txt (src,tit)) return $ capt $$ img $$ blankline blockToOrg (Para inlines) = do contents <- inlineListToOrg inlines return $ contents <> blankline blockToOrg (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline let prettifyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls contents <- joinWithBlankLines <$> mapM prettifyStanza (splitStanza lns) return $ blankline $$ "#+begin_verse" $$ nest 2 contents $$ "#+end_verse" <> blankline blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+begin_html" $$ nest 2 (literal str) $$ "#+end_html" $$ blankline blockToOrg b@(RawBlock f str) | isRawFormat f = return $ literal str | otherwise = do report $ BlockNotRendered b return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToOrg (Header level attr inlines) = do contents <- inlineListToOrg inlines let headerStr = text $ if level > 999 then " " else replicate level '*' let drawerStr = if attr == nullAttr then empty else cr <> nest (level + 1) (propertiesDrawer attr) return $ headerStr <> " " <> contents <> drawerStr <> cr blockToOrg (CodeBlock (_,classes,kvs) str) = do let startnum = maybe "" (\x -> " " <> trimr x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes then if "continuedSourceBlock" `elem` classes then " +n" <> startnum else " -n" <> startnum else "" let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers let (beg, end) = case at of [] -> ("#+begin_example" <> numberlines, "#+end_example") (x:_) -> ("#+begin_src " <> x <> numberlines, "#+end_src") return $ literal beg $$ nest 2 (literal str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do contents <- blockListToOrg blocks return $ blankline $$ "#+begin_quote" $$ nest 2 contents $$ "#+end_quote" $$ blankline blockToOrg (Table _ blkCapt specs thead tbody tfoot) = do let (caption', _, _, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot caption'' <- inlineListToOrg caption' let caption = if null caption' then empty else "#+caption: " <> caption'' headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows let numChars = maybe 0 maximum . nonEmpty . map offset -- FIXME: width is not being used. let widthsInChars = map numChars $ transpose (headers' : rawRows) -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] where sep' = vfill " | " beg = vfill "| " end = vfill " |" middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> (hcat . intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' let head'' = if all null headers then empty else head' $$ border '-' return $ head'' $$ body $$ caption $$ blankline blockToOrg (BulletList items) = do contents <- mapM bulletListItemToOrg items -- ensure that sublists have preceding blank line return $ blankline $$ (if isTightList items then vcat else vsep) contents $$ blankline blockToOrg (OrderedList (start, _, delim) items) = do let delim' = case delim of TwoParens -> OneParen x -> x let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ (if isTightList items then vcat else vsep) contents $$ blankline blockToOrg (DefinitionList items) = do contents <- mapM definitionListItemToOrg items return $ vcat contents $$ blankline -- | Convert bullet list item (list of blocks) to Org. bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text) bulletListItemToOrg items = do exts <- gets $ writerExtensions . stOptions contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang 2 "- " contents $$ if endsWithPlain items then cr else blankline -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do exts <- gets $ writerExtensions . stOptions contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline -- | Convert a list item containing text starting with @U+2610 BALLOT BOX@ -- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@). taskListItemToOrg :: Extensions -> [Block] -> [Block] taskListItemToOrg = handleTaskListItem toOrg where toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is toOrg is = is -- | Convert definition list item (label, list of blocks) to Org. definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m (Doc Text) definitionListItemToOrg (label, defs) = do label' <- inlineListToOrg label contents <- vcat <$> mapM blockListToOrg defs return $ hang 2 "- " (label' <> " :: " <> contents) $$ if isTightList defs then cr else blankline -- | Convert list of key/value pairs to Org :PROPERTIES: drawer. propertiesDrawer :: Attr -> Doc Text propertiesDrawer (ident, classes, kv) = let drawerStart = text ":PROPERTIES:" drawerEnd = text ":END:" kv' = if classes == mempty then kv else ("CLASS", T.unwords classes):kv kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv' properties = vcat $ map kvToOrgProperty kv'' in drawerStart <> cr <> properties <> cr <> drawerEnd where kvToOrgProperty :: (Text, Text) -> Doc Text kvToOrgProperty (key, value) = text ":" <> literal key <> text ": " <> literal value <> cr -- | The different methods to represent a Div block. data DivBlockType = GreaterBlock Text Attr -- ^ Greater block like @center@ or @quote@. | Drawer Text Attr -- ^ Org drawer with of given name; keeps -- key-value pairs. | UnwrappedWithAnchor Text -- ^ Not mapped to other type, only -- identifier is retained (if any). -- | Gives the most suitable method to render a list of blocks -- with attributes. divBlockType :: Attr-> DivBlockType divBlockType (ident, classes, kvs) -- if any class is named "drawer", then output as org :drawer: | ([_], drawerName:classes') <- partition (== "drawer") classes = Drawer drawerName (ident, classes', kvs) -- if any class is either @center@ or @quote@, then use a org block. | (blockName:classes'', classes') <- partition isGreaterBlockClass classes = GreaterBlock blockName (ident, classes' <> classes'', kvs) -- if no better method is found, unwrap div and set anchor | otherwise = UnwrappedWithAnchor ident where isGreaterBlockClass :: Text -> Bool isGreaterBlockClass = (`elem` ["center", "quote"]) . T.toLower -- | Converts a Div to an org-mode element. divToOrg :: PandocMonad m => Attr -> [Block] -> Org m (Doc Text) divToOrg attr bs = do contents <- blockListToOrg bs case divBlockType attr of GreaterBlock blockName attr' -> -- Write as greater block. The ID, if present, is added via -- the #+name keyword; other classes and key-value pairs -- are kept as #+attr_html attributes. return $ blankline $$ attrHtml attr' $$ "#+begin_" <> literal blockName $$ contents $$ "#+end_" <> literal blockName $$ blankline Drawer drawerName (_,_,kvs) -> do -- Write as drawer. Only key-value pairs are retained. let keys = vcat $ map (\(k,v) -> ":" <> literal k <> ":" <> space <> literal v) kvs return $ ":" <> literal drawerName <> ":" $$ cr $$ keys $$ blankline $$ contents $$ blankline $$ text ":END:" $$ blankline UnwrappedWithAnchor ident -> do -- Unwrap the div. All attributes are discarded, except for -- the identifier, which is added as an anchor before the -- div contents. let contents' = if T.null ident then contents else "<<" <> literal ident <> ">>" $$ contents return (blankline $$ contents' $$ blankline) attrHtml :: Attr -> Doc Text attrHtml ("" , [] , []) = mempty attrHtml (ident, classes, kvs) = let name = if T.null ident then mempty else "#+name: " <> literal ident <> cr keyword = "#+attr_html" classKv = ("class", T.unwords classes) kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) in name <> keyword <> ": " <> literal (T.unwords kvStrings) <> cr -- | Convert list of Pandoc block elements to Org. blockListToOrg :: PandocMonad m => [Block] -- ^ List of block elements -> Org m (Doc Text) blockListToOrg blocks = vcat <$> mapM blockToOrg blocks -- | Convert list of Pandoc inline elements to Org. inlineListToOrg :: PandocMonad m => [Inline] -> Org m (Doc Text) inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) where -- Prevent note refs and list markers from wrapping, see #4171 -- and #7132. fixMarkers [] = [] fixMarkers (Space : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (SoftBreak : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (x : rest) = x : fixMarkers rest shouldFix Note{} = True -- Prevent footnotes shouldFix (Str "-") = True -- Prevent bullet list items shouldFix (Str x) -- Prevent ordered list items | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && (c == '.' || c == ')') shouldFix _ = False -- | Convert Pandoc inline element to Org. inlineToOrg :: PandocMonad m => Inline -> Org m (Doc Text) inlineToOrg (Span (uid, [], []) []) = return $ "<<" <> literal uid <> ">>" inlineToOrg (Span _ lst) = inlineListToOrg lst inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" inlineToOrg (Underline lst) = do contents <- inlineListToOrg lst return $ "_" <> contents <> "_" inlineToOrg (Strong lst) = do contents <- inlineListToOrg lst return $ "*" <> contents <> "*" inlineToOrg (Strikeout lst) = do contents <- inlineListToOrg lst return $ "+" <> contents <> "+" inlineToOrg (Superscript lst) = do contents <- inlineListToOrg lst return $ "^{" <> contents <> "}" inlineToOrg (Subscript lst) = do contents <- inlineListToOrg lst return $ "_{" <> contents <> "}" inlineToOrg (SmallCaps lst) = inlineListToOrg lst inlineToOrg (Quoted SingleQuote lst) = do contents <- inlineListToOrg lst return $ "'" <> contents <> "'" inlineToOrg (Quoted DoubleQuote lst) = do contents <- inlineListToOrg lst return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> literal str <> "=" inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath then "\\(" <> literal str <> "\\)" else "\\[" <> literal str <> "\\]" inlineToOrg il@(RawInline f str) | isRawFormat f = return $ literal str | otherwise = do report $ InlineNotRendered il return empty inlineToOrg LineBreak = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of WrapPreserve -> return cr WrapAuto -> return space WrapNone -> return space inlineToOrg (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink return $ "[[" <> literal (orgPath x) <> "]]" _ -> do contents <- inlineListToOrg txt return $ "[[" <> literal (orgPath src) <> "][" <> contents <> "]]" inlineToOrg (Image _ _ (source, _)) = return $ "[[" <> literal (orgPath source) <> "]]" inlineToOrg (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } let ref = tshow $ length notes + 1 return $ "[fn:" <> literal ref <> "]" orgPath :: Text -> Text orgPath src = case T.uncons src of Nothing -> "" -- wiki link Just ('#', _) -> src -- internal link _ | isUrl src -> src _ | isFilePath src -> src _ -> "file:" <> src where isFilePath :: Text -> Bool isFilePath cs = any (`T.isPrefixOf` cs) ["/", "./", "../", "file:"] isUrl :: Text -> Bool isUrl cs = let (scheme, path) = T.break (== ':') cs in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme && not (T.null path) -- | Translate from pandoc's programming language identifiers to those used by -- org-mode. pandocLangToOrg :: Text -> Text pandocLangToOrg cs = case cs of "c" -> "C" "commonlisp" -> "lisp" "r" -> "R" "bash" -> "shell" "lillypond" -> "ly" _ -> cs -- | List of language identifiers recognized by org-mode. orgLangIdentifiers :: [Text] orgLangIdentifiers = [ "abc", "asymptote", "awk", "axiom", "C", "cpp", "calc", "clojure","comint" , "coq", "css", "D", "ditaa", "dot", "ebnf", "elixir", "eukleides", "fomus" , "forth", "F90", "gnuplot", "Translate", "groovy", "haskell" , "browser" , "request", "io", "ipython", "J", "java", "js", "julia", "kotlin", "latex" , "ledger", "ly", "lisp", "Flavored", "makefile", "mathematica", "mathomatic" , "matlab", "max", "mongo", "mscgen", "cypher", "Caml", "octave" , "org", "oz" , "perl", "picolisp", "plantuml", "processing", "prolog", "python" , "R" , "rec", "ruby", "sass", "scala", "scheme", "screen", "sed", "shell", "shen" , "sql", "sqlite", "stan", "ML", "stata", "tcl", "typescript", "vala" ]