diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2021-12-06 23:39:08 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2021-12-06 23:40:51 -0800 | 
| commit | 51142c6803cab7486dbe962f274174d246e571aa (patch) | |
| tree | 104627f744bcb738cdec5fd1956bc88330f57edb /src/Text | |
| parent | 72075423d08e7cd2052f5325a17d52fc1d7682b3 (diff) | |
| download | pandoc-51142c6803cab7486dbe962f274174d246e571aa.tar.gz | |
Ipynb reader & writer: properly handle cell "id".
This is passed through if it exists (in Nb4); otherwise
the writer will add a random one so that cells all have
an "id".
Closes #7728.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/Ipynb.hs | 22 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Ipynb.hs | 58 | 
2 files changed, 56 insertions, 24 deletions
| diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index e6de29b44..8e742a888 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -77,7 +77,10 @@ cellToBlocks opts lang c = do    let Source ts = cellSource c    let source = mconcat ts    let kvs = jsonMetaToPairs (cellMetadata c) -  let attachments = maybe mempty M.toList $ cellAttachments c +  let attachments = case cellAttachments c of +                      Nothing -> mempty +                      Just (MimeAttachments m) -> M.toList m +  let ident = fromMaybe mempty $ cellId c    mapM_ addAttachment attachments    case cellType c of      Ipynb.Markdown -> do @@ -86,12 +89,12 @@ cellToBlocks opts lang c = do                 else do                   Pandoc _ bs <- walk fixImage <$> readMarkdown opts source                   return bs -      return $ B.divWith ("",["cell","markdown"],kvs) +      return $ B.divWith (ident,["cell","markdown"],kvs)               $ B.fromList bs      Ipynb.Heading lev -> do        Pandoc _ bs <- readMarkdown opts          (T.replicate lev "#" <> " " <> source) -      return $ B.divWith ("",["cell","markdown"],kvs) +      return $ B.divWith (ident,["cell","markdown"],kvs)               $ B.fromList bs      Ipynb.Raw -> do        -- we use ipynb to indicate no format given (a wildcard in nbformat) @@ -108,11 +111,12 @@ cellToBlocks opts lang c = do                "text/restructuredtext" -> "rst"                "text/asciidoc"         -> "asciidoc"                _                       -> format -      return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source +      return $ B.divWith (ident,["cell","raw"],kvs) +             $ B.rawBlock format' source      Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do        outputBlocks <- mconcat <$> mapM outputToBlock outputs        let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec -      return $ B.divWith ("",["cell","code"],kvs') $ +      return $ B.divWith (ident,["cell","code"],kvs') $          B.codeBlockWith ("",[lang],[]) source          <> outputBlocks @@ -161,7 +165,7 @@ outputToBlock Err{ errName = ename,  -- the output format.  handleData :: PandocMonad m             => JSONMeta -> MimeBundle -> m B.Blocks -handleData metadata (MimeBundle mb) = +handleData (JSONMeta metadata) (MimeBundle mb) =    mconcat <$> mapM dataBlock (M.toList mb)    where @@ -209,7 +213,7 @@ handleData metadata (MimeBundle mb) =      dataBlock _ = return mempty  jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue -jsonMetaToMeta = M.map valueToMetaValue +jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m    where      valueToMetaValue :: Value -> MetaValue      valueToMetaValue x@Object{} = @@ -228,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue      valueToMetaValue Aeson.Null = MetaString ""  jsonMetaToPairs :: JSONMeta -> [(Text, Text)] -jsonMetaToPairs = M.toList . M.map +jsonMetaToPairs (JSONMeta m) = M.toList . M.map    (\case        String t          | not (T.all isDigit t)          , t /= "true"          , t /= "false"                   -> t -      x          -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) +      x          -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index 30ef100ad..b81771bb2 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as BL  import Data.Aeson.Encode.Pretty (Config(..), defConfig,             encodePretty', keyOrder, Indent(Spaces))  import Text.DocLayout (literal) +import Text.Pandoc.UUID (getRandomUUID) +import Data.Char (isAscii, isAlphaNum)  writeIpynb :: PandocMonad m => WriterOptions -> Pandoc -> m Text  writeIpynb opts d = do @@ -79,7 +81,7 @@ pandocToNotebook opts (Pandoc meta blocks) = do    let metadata = case fromJSON metadata' of                     Error _ -> mempty -- TODO warning here? shouldn't happen                     Success x -> x -  cells <- extractCells opts blocks +  cells <- extractCells nbformat opts blocks    return $ Notebook{         notebookMetadata = metadata       , notebookFormat = nbformat @@ -97,23 +99,26 @@ addAttachment (Image attr lab (src,tit))    return $ Image attr lab ("attachment:" <> src, tit)  addAttachment x = return x -extractCells :: PandocMonad m => WriterOptions -> [Block] -> m [Ipynb.Cell a] -extractCells _ [] = return [] -extractCells opts (Div (_id,classes,kvs) xs : bs) +extractCells :: PandocMonad m +             => (Int, Int) -> WriterOptions -> [Block] -> m [Ipynb.Cell a] +extractCells _ _ [] = return [] +extractCells nbformat opts (Div (ident,classes,kvs) xs : bs)    | "cell" `elem` classes    , "markdown" `elem` classes = do        let meta = pairsToJSONMeta kvs        (newdoc, attachments) <-          runStateT (walkM addAttachment (Pandoc nullMeta xs)) mempty        source <- writeMarkdown opts{ writerTemplate = Nothing } newdoc +      uuid <- uuidFrom nbformat ident        (Ipynb.Cell{            cellType = Markdown +        , cellId = uuid          , cellSource = Source $ breakLines $ T.stripEnd source          , cellMetadata = meta          , cellAttachments = if M.null attachments                                 then Nothing -                               else Just attachments } :) -            <$> extractCells opts bs +                               else Just $ MimeAttachments attachments } :) +            <$> extractCells nbformat opts bs    | "cell" `elem` classes    , "code" `elem` classes = do        let (codeContent, rest) = @@ -123,14 +128,16 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)        let meta = pairsToJSONMeta kvs        outputs <- catMaybes <$> mapM blockToOutput rest        let exeCount = lookup "execution_count" kvs >>= safeRead +      uuid <- uuidFrom nbformat ident        (Ipynb.Cell{            cellType = Ipynb.Code {                  codeExecutionCount = exeCount                , codeOutputs = outputs                } +        , cellId = uuid          , cellSource = Source $ breakLines codeContent          , cellMetadata = meta -        , cellAttachments = Nothing } :) <$> extractCells opts bs +        , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs    | "cell" `elem` classes    , "raw" `elem` classes =        case consolidateAdjacentRawBlocks xs of @@ -150,33 +157,54 @@ extractCells opts (Div (_id,classes,kvs) xs : bs)                    "rst"      -> "text/restructuredtext"                    "asciidoc" -> "text/asciidoc"                    _          -> f +          uuid <- uuidFrom nbformat ident            (Ipynb.Cell{                cellType = Raw +            , cellId = uuid              , cellSource = Source $ breakLines raw              , cellMetadata = if format' == "ipynb" -- means no format given                                  then mempty -                                else M.insert "raw_mimetype" +                                else JSONMeta $ M.insert "raw_mimetype"                                         (Aeson.String format') mempty -            , cellAttachments = Nothing } :) <$> extractCells opts bs -        _ -> extractCells opts bs -extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) +            , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs +        _ -> extractCells nbformat opts bs +extractCells nbformat opts (CodeBlock (ident,classes,kvs) raw : bs)    | "code" `elem` classes = do        let meta = pairsToJSONMeta kvs        let exeCount = lookup "execution_count" kvs >>= safeRead +      uuid <- uuidFrom nbformat ident        (Ipynb.Cell{            cellType = Ipynb.Code {                  codeExecutionCount = exeCount                , codeOutputs = []                } +        , cellId = uuid          , cellSource = Source $ breakLines raw          , cellMetadata = meta -        , cellAttachments = Nothing } :) <$> extractCells opts bs -extractCells opts (b:bs) = do +        , cellAttachments = Nothing } :) <$> extractCells nbformat opts bs +extractCells nbformat opts (b:bs) = do        let isCodeOrDiv (CodeBlock (_,cl,_) _) = "code" `elem` cl            isCodeOrDiv (Div (_,cl,_) _)       = "cell" `elem` cl            isCodeOrDiv _                      = False        let (mds, rest) = break isCodeOrDiv bs -      extractCells opts (Div ("",["cell","markdown"],[]) (b:mds) : rest) +      extractCells nbformat opts +        (Div ("",["cell","markdown"],[]) (b:mds) : rest) + +-- Return Nothing if nbformat < 4.5. +-- Otherwise construct a UUID, using the existing identifier +-- if it is a valid UUID, otherwise constructing a new one. +uuidFrom :: PandocMonad m => (Int, Int) -> Text -> m (Maybe Text) +uuidFrom nbformat ident = +  if nbformat >= (4,5) +     then +       if isValidUUID ident +          then return $ Just ident +          else Just . T.pack . drop 9 . show <$> getRandomUUID +     else return Nothing + where +  isValidUUID t = not (T.null t) && T.length t <= 64 && +                  T.all isValidUUIDChar t +  isValidUUIDChar c = isAscii c && (isAlphaNum c || c == '-' || c == '_')  blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a))  blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) = @@ -229,7 +257,7 @@ extractData bs = do      go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b)  pairsToJSONMeta :: [(Text, Text)] -> JSONMeta -pairsToJSONMeta kvs = +pairsToJSONMeta kvs = JSONMeta $    M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of                             Just val -> val                             Nothing  -> String v) | 
