aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-12-06 23:39:08 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-12-06 23:40:51 -0800
commit51142c6803cab7486dbe962f274174d246e571aa (patch)
tree104627f744bcb738cdec5fd1956bc88330f57edb /src/Text/Pandoc
parent72075423d08e7cd2052f5325a17d52fc1d7682b3 (diff)
downloadpandoc-51142c6803cab7486dbe962f274174d246e571aa.tar.gz
Ipynb reader & writer: properly handle cell "id".
This is passed through if it exists (in Nb4); otherwise the writer will add a random one so that cells all have an "id". Closes #7728.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs22
-rw-r--r--src/Text/Pandoc/Writers/Ipynb.hs58
2 files changed, 56 insertions, 24 deletions
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index e6de29b44..8e742a888 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -77,7 +77,10 @@ cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
let kvs = jsonMetaToPairs (cellMetadata c)
- let attachments = maybe mempty M.toList $ cellAttachments c
+ let attachments = case cellAttachments c of
+ Nothing -> mempty
+ Just (MimeAttachments m) -> M.toList m
+ let ident = fromMaybe mempty $ cellId c
mapM_ addAttachment attachments
case cellType c of
Ipynb.Markdown -> do
@@ -86,12 +89,12 @@ cellToBlocks opts lang c = do
else do
Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
return bs
- return $ B.divWith ("",["cell","markdown"],kvs)
+ return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Heading lev -> do
Pandoc _ bs <- readMarkdown opts
(T.replicate lev "#" <> " " <> source)
- return $ B.divWith ("",["cell","markdown"],kvs)
+ return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Raw -> do
-- we use ipynb to indicate no format given (a wildcard in nbformat)
@@ -108,11 +111,12 @@ cellToBlocks opts lang c = do
"text/restructuredtext" -> "rst"
"text/asciidoc" -> "asciidoc"
_ -> format
- return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
+ return $ B.divWith (ident,["cell","raw"],kvs)
+ $ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
- return $ B.divWith ("",["cell","code"],kvs') $
+ return $ B.divWith (ident,["cell","code"],kvs') $
B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
@@ -161,7 +165,7 @@ outputToBlock Err{ errName = ename,
-- the output format.
handleData :: PandocMonad m
=> JSONMeta -> MimeBundle -> m B.Blocks
-handleData metadata (MimeBundle mb) =
+handleData (JSONMeta metadata) (MimeBundle mb) =
mconcat <$> mapM dataBlock (M.toList mb)
where
@@ -209,7 +213,7 @@ handleData metadata (MimeBundle mb) =
dataBlock _ = return mempty
jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
-jsonMetaToMeta = M.map valueToMetaValue
+jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@Object{} =
@@ -228,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue
valueToMetaValue Aeson.Null = MetaString ""
jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
-jsonMetaToPairs = M.toList . M.map
+jsonMetaToPairs (JSONMeta m) = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
-> t
- x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
+ x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m
diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs
index 30ef100ad..b81771bb2 100644
--- a/src/Text/Pandoc/Writers/Ipynb.hs
+++ b/src/Text/Pandoc/Writers/Ipynb.hs
@@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as BL
import Data.Aeson.Encode.Pretty (Config(..), defConfig,
encodePretty', keyOrder, Indent(Spaces))
import Text.DocLayout (literal)
+import Text.Pandoc.UUID (getRandomUUID)
+import Data.Char (isAscii, isAlphaNum)
writeIpynb :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeIpynb opts d = do
@@ -79,7 +81,7 @@ pandocToNotebook opts (Pandoc meta blocks) = do
let metadata = case fromJSON metadata' of
Error _ -> mempty -- TODO warning here? shouldn't happen
Success x -> x
- cells <- extractCells opts blocks
+ cells <- extractCells nbformat opts blocks
return $ Notebook{
notebookMetadata = metadata
, notebookFormat = nbformat
@@ -97,23 +99,26 @@ addAttachment (Image attr lab (src,tit))
return $ Image attr lab ("attachment:" <> src, tit)
addAttachment x = return x
-extractCells :: PandocMonad m => WriterOptions -> [Block] -> m [Ipynb.Cell a]
-extractCells _ [] = return []
-extractCells opts (Div (_id,classes,kvs) xs : bs)
+extractCells :: PandocMonad m
+ => (Int, Int) -> WriterOptions -> [Block] -> m [Ipynb.Cell a]
+extractCells _ _ [] = return []
+extractCells nbformat opts (Div (ident,classes,kvs) xs : bs)
| "cell" `elem` classes
, "markdown" `elem` classes = do
let meta = pairsToJSONMeta kvs
(newdoc, attachments) <-
runStateT (walkM addAttachment (Pandoc nullMeta xs)) mempty
source <- writeMarkdown opts{ writerTemplate = Nothing } newdoc
+ uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Markdown
+ , cellId = uuid
, cellSource = Source $ breakLines $ T.stripEnd source
, cellMetadata = meta
, cellAttachments = if M.null attachments
then Nothing
- else Just attachments } :)
- <$> extractCells opts bs
+ else Just $ MimeAttachments attachments } :)
+ <$> extractCells nbformat opts bs
| "cell" `elem` classes
, "code" `elem` classes = do
let (codeContent, rest) =
@@ -123,14 +128,16 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
let meta = pairsToJSONMeta kvs
outputs <- catMaybes <$> mapM blockToOutput rest
let exeCount = lookup "execution_count" kvs >>= safeRead
+ uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Ipynb.Code {
codeExecutionCount = exeCount
, codeOutputs = outputs
}
+ , cellId = uuid
, cellSource = Source $ breakLines codeContent
, cellMetadata = meta
- , cellAttachments = Nothing } :) <$> extractCells opts bs
+ , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
| "cell" `elem` classes
, "raw" `elem` classes =
case consolidateAdjacentRawBlocks xs of
@@ -150,33 +157,54 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)
"rst" -> "text/restructuredtext"
"asciidoc" -> "text/asciidoc"
_ -> f
+ uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Raw
+ , cellId = uuid
, cellSource = Source $ breakLines raw
, cellMetadata = if format' == "ipynb" -- means no format given
then mempty
- else M.insert "raw_mimetype"
+ else JSONMeta $ M.insert "raw_mimetype"
(Aeson.String format') mempty
- , cellAttachments = Nothing } :) <$> extractCells opts bs
- _ -> extractCells opts bs
-extractCells opts (CodeBlock (_id,classes,kvs) raw : bs)
+ , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
+ _ -> extractCells nbformat opts bs
+extractCells nbformat opts (CodeBlock (ident,classes,kvs) raw : bs)
| "code" `elem` classes = do
let meta = pairsToJSONMeta kvs
let exeCount = lookup "execution_count" kvs >>= safeRead
+ uuid <- uuidFrom nbformat ident
(Ipynb.Cell{
cellType = Ipynb.Code {
codeExecutionCount = exeCount
, codeOutputs = []
}
+ , cellId = uuid
, cellSource = Source $ breakLines raw
, cellMetadata = meta
- , cellAttachments = Nothing } :) <$> extractCells opts bs
-extractCells opts (b:bs) = do
+ , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs
+extractCells nbformat opts (b:bs) = do
let isCodeOrDiv (CodeBlock (_,cl,_) _) = "code" `elem` cl
isCodeOrDiv (Div (_,cl,_) _) = "cell" `elem` cl
isCodeOrDiv _ = False
let (mds, rest) = break isCodeOrDiv bs
- extractCells opts (Div ("",["cell","markdown"],[]) (b:mds) : rest)
+ extractCells nbformat opts
+ (Div ("",["cell","markdown"],[]) (b:mds) : rest)
+
+-- Return Nothing if nbformat < 4.5.
+-- Otherwise construct a UUID, using the existing identifier
+-- if it is a valid UUID, otherwise constructing a new one.
+uuidFrom :: PandocMonad m => (Int, Int) -> Text -> m (Maybe Text)
+uuidFrom nbformat ident =
+ if nbformat >= (4,5)
+ then
+ if isValidUUID ident
+ then return $ Just ident
+ else Just . T.pack . drop 9 . show <$> getRandomUUID
+ else return Nothing
+ where
+ isValidUUID t = not (T.null t) && T.length t <= 64 &&
+ T.all isValidUUIDChar t
+ isValidUUIDChar c = isAscii c && (isAlphaNum c || c == '-' || c == '_')
blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a))
blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) =
@@ -229,7 +257,7 @@ extractData bs = do
go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b)
pairsToJSONMeta :: [(Text, Text)] -> JSONMeta
-pairsToJSONMeta kvs =
+pairsToJSONMeta kvs = JSONMeta $
M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of
Just val -> val
Nothing -> String v)