From 1fde92053fb1763c6af913aa9628827dff9ef899 Mon Sep 17 00:00:00 2001 From: mb21 Date: Sun, 10 Jan 2016 13:30:32 +0100 Subject: LaTeX writer: figure label --- src/Text/Pandoc/Writers/LaTeX.hs | 43 ++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2dcbf62bf..7b2911bcf 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -408,7 +408,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt @@ -420,13 +420,14 @@ blockToLaTeX (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do else brackets <$> inlineListToLaTeX (walk deNote txt) img <- inlineToLaTeX (Image attr txt (src,tit)) let footnotes = notesToLaTeX notes + figure <- refLabel ident $ cr <> + "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ + ("\\caption" <> captForLof <> braces capt) $$ + "\\end{figure}" <> cr return $ if inNote -- can't have figures in notes then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" - else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption" <> captForLof <> braces capt) $$ - "\\end{figure}" $$ - footnotes + else figure $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions @@ -717,9 +718,8 @@ sectionHeader :: Bool -- True for unnumbered -> Int -> [Inline] -> State WriterState Doc -sectionHeader unnumbered ref level lst = do +sectionHeader unnumbered ident level lst = do txt <- inlineListToLaTeX lst - lab <- text `fmap` toLabel ref plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst let noNote (Note _) = Str "" noNote x = x @@ -742,16 +742,6 @@ sectionHeader unnumbered ref level lst = do book <- gets stBook opts <- gets stOptions let level' = if book || writerChapters opts then level - 1 else level - internalLinks <- gets stInternalLinks - let refLabel x = (if ref `elem` internalLinks - then text "\\hypertarget" - <> braces lab - <> braces x - else x) - let headerWith x y = refLabel $ text x <> y <> - if null ref - then empty - else text "\\label" <> braces lab let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -767,16 +757,31 @@ sectionHeader unnumbered ref level lst = do -- needed for \paragraph, \subparagraph in quote environment -- see http://tex.stackexchange.com/questions/169830/ else empty + stuffing' <- refLabel ident $ text ('\\':sectionType) <> stuffing return $ if level' > 5 then txt - else prefix $$ - headerWith ('\\':sectionType) stuffing + else prefix $$ stuffing' $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> braces txtNoNotes else empty +-- | Append label to x and wrap in hypertarget +refLabel :: String -> Doc -> State WriterState Doc +refLabel ident x = do + ref <- text `fmap` toLabel ident + internalLinks <- gets stInternalLinks + let hypertarget y = if ident `elem` internalLinks + then text "\\hypertarget" + <> braces ref + <> braces y + else y + label = if null ident + then empty + else text "\\label" <> braces ref + return $ hypertarget $ x <> label + -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -- cgit v1.2.3 From 928a05073f87341b753f80a9b92f06dc0a19f466 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 7 Mar 2016 09:16:39 -0800 Subject: Stack-based appveyor setup. --- appveyor.yml | 47 ++++++++++++++--------------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 00a1aab34..f2fe828fa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,38 +1,19 @@ -install: -- cmd: 'git submodule update --init' -- ps: | - choco install haskellplatform -version 2014.2.0.0 -y - # Haskell Platfrom package doesn't update PATH for the current shell instance +cache: +- "c:\\sr" # stack root, short paths == fewer problems - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\bin" - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\lib\extralibs\bin" - $env:Path += ";${env:ProgramFiles}\Haskell Platform\2014.2.0.0\mingw\bin" - # choco install wixtoolset - cabal sandbox init - $env:Path += ";.\.cabal-sandbox\bin" - cabal update - cabal install --force hsb2hs +build: off -build_script: -- cmd: | - cabal install --force --enable-tests -fembed_data_files +before_test: +- curl -ostack.zip -L --insecure http://www.stackage.org/stack/windows-i386 +- 7z x stack.zip stack.exe -# after_build: -# - cmd: | -# cabal install -fembed_data_files pandoc-citeproc -# strip .\.cabal-sandbox\bin\pandoc.exe -# strip .\.cabal-sandbox\bin\pandoc-citeproc.exe -# .\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.html -S README -o README.html -# .\.cabal-sandbox\bin\pandoc.exe -s --template data\templates\default.rtf COPYING -t rtf -S -o COPYING.rtf -# copy COPYRIGHT COPYRIGHT.txt -# for /f "tokens=2 delims= " %%a in ('.\.cabal-sandbox\bin\pandoc --version') do ( set "VERSION=%%a" && exit ) -# if "%VERSION%" == "" ( echo "Error: could not determine version number." && exit /b 1 ) -# cd windows -# echo Creating msi... -# candle -dVERSION=%VERSION% pandoc.wxs -# if %errorlevel% neq 0 exit /b %errorlevel% -# light -sw1076 -ext WixUIExtension -ext WixUtilExtension -out pandoc-%VERSION%-windows.msi pandoc.wixobj +clone_folder: "c:\\stack" +environment: + global: + STACK_ROOT: "c:\\sr" test_script: -- cmd: | - cabal test +- stack setup > nul +# The ugly echo "" hack is to avoid complaints about 0 being an invalid file +# descriptor +- echo "" | stack --no-terminal test -- cgit v1.2.3 From 0b9c54d9f31db88d5cd8e888921dffc2a108f8d4 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 8 Mar 2016 00:27:09 -0500 Subject: Docx reader: update feature checklist. The feature checklist in the source code was out of date. Update. --- src/Text/Pandoc/Readers/Docx.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index eb71d8dd8..c399a2174 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -50,8 +50,7 @@ implemented, [-] means partially implemented): * Inlines - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) + - [X] Emph (italics and underline both read as Emph) - [X] Strong - [X] Strikeout - [X] Superscript @@ -62,11 +61,10 @@ implemented, [-] means partially implemented): - [X] Code (styled with `VerbatimChar`) - [X] Space - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math + - [X] Math - [X] Link (links to an arbitrary bookmark create a span with the target as id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) + - [X] Image - [X] Note (Footnotes and Endnotes are silently combined.) -} -- cgit v1.2.3 From 6bfaa5ad15d2c3acfc61ddf5ec442ca733016373 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 8 Mar 2016 10:08:14 -0800 Subject: DokuWiki writer: use $$ for display math. --- src/Text/Pandoc/Writers/DokuWiki.hs | 5 ++++- tests/writer.dokuwiki | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index f1088b158..56e2b9027 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -452,8 +452,11 @@ inlineToDokuWiki _ (Code _ str) = inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math _ str) = return $ "$" ++ str ++ "$" +inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped + where delim = case mathType of + DisplayMath -> "$$" + InlineMath -> "$" inlineToDokuWiki _ (RawInline f str) | f == Format "dokuwiki" = return str diff --git a/tests/writer.dokuwiki b/tests/writer.dokuwiki index fe1f8296a..79fcdde8a 100644 --- a/tests/writer.dokuwiki +++ b/tests/writer.dokuwiki @@ -459,7 +459,7 @@ Ellipses…and…and…. * $\alpha \wedge \omega$ * $223$ * $p$-Tree - * Here’s some display math: $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ + * Here’s some display math: $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ * Here’s one that has a line break in it: $\alpha + \omega \times x^2$. These shouldn’t be math: -- cgit v1.2.3 From 4ed64835cb475f3da80ed7b729516c7a90891d94 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 Mar 2016 08:33:13 -0800 Subject: Markdown reader: don't cross line boundary parsing pipe table row. Previously an emph element could be parsed across the newline at the end of the pipe table row. I thought this would help with #2765, but it doesn't. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 587726084..0eeda0fee 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1372,7 +1372,13 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) -pipeTableRow = do +pipeTableRow = try $ do + scanForPipe + raw <- anyLine + parseFromString pipeTableRow' (raw ++ "\n") + +pipeTableRow' :: MarkdownParser (F [Blocks]) +pipeTableRow' = do skipMany spaceChar openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> -- cgit v1.2.3 From 6e950a8eb5001314869013395c9c72ee05079110 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 Mar 2016 08:44:31 -0800 Subject: Markdown reader: allow `+` separators in pipe table cells. We already allowed them in the header, but not in the body rows, for some reason. This gives compatibility with org-mode tables. --- src/Text/Pandoc/Readers/Markdown.hs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0eeda0fee..6caf1728c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1382,16 +1382,14 @@ pipeTableRow' = do skipMany spaceChar openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> - many (notFollowedBy (blankline <|> char '|') >> inline) - first <- cell - rest <- many $ sepPipe *> cell + many (notFollowedBy (blankline <|> oneOf "+|") >> inline) + cells <- cell `sepBy1` sepPipe -- surrounding pipes needed for a one-column table: - guard $ not (null rest && not openPipe) + guard $ not (length cells == 1 && not openPipe) optional (char '|') blankline - let cells = sequence (first:rest) return $ do - cells' <- cells + cells' <- sequence cells return $ map (\ils -> case trimInlines ils of -- cgit v1.2.3 From 54a68616d7f9259840fd8a884d806782a73236a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 Mar 2016 10:11:32 -0800 Subject: Markdown reader: Clean up pipe table parsing. --- src/Text/Pandoc/Readers/Markdown.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6caf1728c..c99838352 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1354,16 +1354,18 @@ pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + let heads' = take (length aligns) <$> heads lines' <- many pipeTableRow + let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads : lines') + map (\x -> length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> fromIntegral (len + 1) / fromIntegral numColumns) seplengths else replicate (length aligns) 0.0 - return $ (aligns, widths, heads, sequence lines') + return $ (aligns, widths, heads', sequence lines'') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1375,19 +1377,17 @@ pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = try $ do scanForPipe raw <- anyLine - parseFromString pipeTableRow' (raw ++ "\n") + parseFromString pipeTableRow' raw pipeTableRow' :: MarkdownParser (F [Blocks]) pipeTableRow' = do skipMany spaceChar openPipe <- (True <$ char '|') <|> return False - let cell = mconcat <$> - many (notFollowedBy (blankline <|> oneOf "+|") >> inline) - cells <- cell `sepBy1` sepPipe + let cell = mconcat <$> (many (notFollowedBy (char '|') >> inline)) + cells <- cell `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) - optional (char '|') - blankline + spaces >> eof return $ do cells' <- sequence cells return $ map -- cgit v1.2.3 From 2b55b76ebec87f4d35b2e641e054bd6dfc74be09 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 Mar 2016 11:46:00 -0800 Subject: Markdown reader: Improved pipe table parsing. Fixes #2765. Added test case. --- src/Text/Pandoc/Readers/Markdown.hs | 30 +++++++++++++++--------------- tests/pipe-tables.native | 16 +++++++++++++++- tests/pipe-tables.txt | 8 ++++++++ 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c99838352..b5d175453 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1376,25 +1376,25 @@ sepPipe = try $ do pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = try $ do scanForPipe - raw <- anyLine - parseFromString pipeTableRow' raw - -pipeTableRow' :: MarkdownParser (F [Blocks]) -pipeTableRow' = do skipMany spaceChar openPipe <- (True <$ char '|') <|> return False - let cell = mconcat <$> (many (notFollowedBy (char '|') >> inline)) - cells <- cell `sepEndBy1` (char '|') + -- split into cells + let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') + <|> void (noneOf "|\n\r") + let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= + parseFromString pipeTableCell + cells <- cellContents `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) - spaces >> eof - return $ do - cells' <- sequence cells - return $ map - (\ils -> - case trimInlines ils of - ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + blankline + return $ sequence cells + +pipeTableCell :: MarkdownParser (F Blocks) +pipeTableCell = do + result <- many inline + if null result + then return mempty + else return $ B.plain . mconcat <$> sequence result pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do diff --git a/tests/pipe-tables.native b/tests/pipe-tables.native index 6cd37f6ff..63c2c17bc 100644 --- a/tests/pipe-tables.native +++ b/tests/pipe-tables.native @@ -98,4 +98,18 @@ ,Para [Str "Pipe",Space,Str "table",Space,Str "with",Space,Str "no",Space,Str "body:"] ,Table [] [AlignDefault] [0.0] [[Plain [Str "Header"]]] - []] + [] +,Para [Str "Pipe",Space,Str "table",Space,Str "with",Space,Str "tricky",Space,Str "cell",Space,Str "contents",Space,Str "(see",Space,Str "#2765):"] +,Table [] [AlignLeft,AlignRight,AlignRight] [0.0,0.0,0.0] + [[] + ,[Plain [Str "IP_gene8-_1st"]] + ,[Plain [Str "IP_gene8+_1st"]]] + [[[Plain [Str "IP_gene8-_1st"]] + ,[Plain [Str "1.0000000"]] + ,[Plain [Str "0.4357325"]]] + ,[[Plain [Str "IP_gene8+_1st"]] + ,[Plain [Str "0.4357325"]] + ,[Plain [Str "1.0000000"]]] + ,[[Plain [Str "foo",Code ("",[],[]) "bar|baz"]] + ,[Plain [Str "and|escaped"]] + ,[Plain [Str "3.0000000"]]]]] diff --git a/tests/pipe-tables.txt b/tests/pipe-tables.txt index e93f64af9..c27c71113 100644 --- a/tests/pipe-tables.txt +++ b/tests/pipe-tables.txt @@ -72,3 +72,11 @@ Pipe table with no body: | Header | | ------ | +Pipe table with tricky cell contents (see #2765): + +| | IP_gene8-_1st| IP_gene8+_1st| +|:--------------|-------------:|-------------:| +|IP_gene8-_1st | 1.0000000| 0.4357325| +|IP_gene8+_1st | 0.4357325| 1.0000000| +|foo`bar|baz` | and\|escaped | 3.0000000| + -- cgit v1.2.3 From 139fa54d48a878c91f6e56c17ee50e9d589d379f Mon Sep 17 00:00:00 2001 From: mb21 Date: Wed, 9 Mar 2016 23:16:02 +0100 Subject: Docx Writer: handle image alt text closes #2754 --- src/Text/Pandoc/Writers/Docx.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 150e19043..a841e1b66 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1103,7 +1103,7 @@ inlineToOpenXML opts (Link _ txt (src,_)) = do M.insert src i extlinks } return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] -inlineToOpenXML opts (Image attr alt (src, tit)) = do +inlineToOpenXML opts (Image attr alt (src, _)) = do -- first, check to see if we've already done this image pageWidth <- gets stPrintWidth imgs <- gets stImages @@ -1154,7 +1154,7 @@ inlineToOpenXML opts (Image attr alt (src, tit)) = do mknode "wp:inline" [] [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () + , mknode "wp:docPr" [("descr",stringify alt),("id","1"),("name","Picture")] () , graphic ] let imgext = case mt >>= extensionFromMimeType of Just x -> '.':x -- cgit v1.2.3 From a485c42d78d8bc819f7ad1bef137d54a324c5ea9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 10 Mar 2016 19:59:55 -0800 Subject: Fixed behavior of base tag. + If the base path does not end with slash, the last component will be replaced. E.g. base = `http://example.com/foo` combines with `bar.html` to give `http://example.com/bar.html`. + If the href begins with a slash, the whole path of the base is replaced. E.g. base = `http://example.com/foo/` combines with `/bar.html` to give `http://example.com/bar.html`. Closes #2777. --- src/Text/Pandoc/Readers/HTML.hs | 28 +++++++++++----------------- tests/Tests/Readers/HTML.hs | 5 ++++- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 69df13aac..959a2d16f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -63,7 +63,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) -import Network.URI (isURI) +import Network.URI (URI, parseURIReference, nonStrictRelativeTo) import Text.Pandoc.Error import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Compat.Monoid ((<>)) @@ -103,7 +103,7 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String, + baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String } @@ -145,15 +145,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag return mempty pBaseTag = do bt <- pSatisfy (~== TagOpen "base" []) - let baseH = fromAttrib "href" bt - if null baseH - then return mempty - else do - let baseH' = case reverse baseH of - '/':_ -> baseH - _ -> baseH ++ "/" - updateState $ \st -> st{ baseHref = Just baseH' } - return mempty + updateState $ \st -> st{ baseHref = + parseURIReference $ fromAttrib "href" bt } + return mempty block :: TagParser Blocks block = do @@ -610,9 +604,9 @@ pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let cls = words $ fromAttrib "class" tag @@ -624,9 +618,9 @@ pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState let url' = fromAttrib "src" tag - let url = case (isURI url', mbBaseHref) of - (False, Just h) -> h ++ url' - _ -> url' + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag let uid = fromAttrib "id" tag diff --git a/tests/Tests/Readers/HTML.hs b/tests/Tests/Readers/HTML.hs index 2eb87a2f3..ff27b8aed 100644 --- a/tests/Tests/Readers/HTML.hs +++ b/tests/Tests/Readers/HTML.hs @@ -15,11 +15,14 @@ html = handleError . readHtml def tests :: [Test] tests = [ testGroup "base tag" [ test html "simple" $ - "\"Stickman\"" =?> + "\"Stickman\"" =?> plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) , test html "slash at end of base" $ "\"Stickman\"" =?> plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) + , test html "slash at beginning of href" $ + "\"Stickman\"" =?> + plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman")) , test html "absolute URL" $ "\"Stickman\"" =?> plain (image "http://example.com/stickman.gif" "" (text "Stickman")) -- cgit v1.2.3 From 102ba9ecb869da80fac03480b2dd03a695a4f78c Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 10 Mar 2016 15:19:55 -0500 Subject: Docx Reader: Add state to the parser, for warnings In order to be able to collect warnings during parsing, we add a state monad transformer to the D monad. At the moment, this only includes a list of warning strings (nothing currently triggers them, however). We use StateT instead of WriterT to correspond more closely with the warnings behavior in T.P.Parsing. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index eec8b12c9..e4cfe4930 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Row(..) , Cell(..) , archiveToDocx + , archiveToDocxWithWarnings ) where import Codec.Archive.Zip import Text.XML.Light @@ -60,6 +61,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader +import Control.Monad.State import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except @@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show +data ReaderState = ReaderState { stateWarnings :: [String] } + deriving Show + + data DocxError = DocxError | WrongElem deriving Show instance Error DocxError where noMsg = WrongElem -type D = ExceptT DocxError (Reader ReaderEnv) +type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) -runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx) re +runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState) +runD dx re rs = runState (runReaderT (runExceptT dx) re) rs maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -257,7 +263,10 @@ type Author = String type ChangeDate = String archiveToDocx :: Archive -> Either DocxError Docx -archiveToDocx archive = do +archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive + +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive @@ -265,8 +274,12 @@ archiveToDocx archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles InDocument - doc <- runD (archiveToDocument archive) rEnv - return $ Docx doc + rState = ReaderState { stateWarnings = [] } + (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState + case eitherDoc of + Right doc -> Right (Docx doc, stateWarnings st) + Left e -> Left e + archiveToDocument :: Archive -> D Document -- cgit v1.2.3 From ee03e954d0d3cb76971c91001348762f55224890 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 12 Mar 2016 10:18:01 -0500 Subject: Add readDocxWithWarnings The regular readDocx just becomes a special case. --- src/Text/Pandoc/Readers/Docx.hs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c399a2174..604bc20de 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -69,7 +69,8 @@ implemented, [-] means partially implemented): -} module Text.Pandoc.Readers.Docx - ( readDocx + ( readDocxWithWarnings + , readDocx ) where import Codec.Archive.Zip @@ -96,14 +97,22 @@ import qualified Data.Sequence as Seq (null) import Text.Pandoc.Error import Text.Pandoc.Compat.Except +readDocxWithWarnings :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag, [String]) +readDocxWithWarnings opts bytes = + case archiveToDocxWithWarnings (toArchive bytes) of + Right (docx, warnings) -> do + (meta, blks, mediaBag) <- docxToOutput opts docx + return (Pandoc meta blks, mediaBag, warnings) + Left _ -> Left (ParseFailure "couldn't parse docx file") + readDocx :: ReaderOptions -> B.ByteString -> Either PandocError (Pandoc, MediaBag) -readDocx opts bytes = - case archiveToDocx (toArchive bytes) of - Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) - <$> (docxToOutput opts docx) - Left _ -> Left (ParseFailure "couldn't parse docx file") +readDocx opts bytes = do + (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes + return (pandoc, mediaBag) data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag -- cgit v1.2.3 From 68fd333ec4475c5a524004bcf2e76a7959dc3afa Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 12 Mar 2016 10:24:39 -0500 Subject: Add a general ByteStringReader with warnings. Have docx reader use it. --- src/Text/Pandoc.hs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 4b2397eb9..d59ee7846 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -223,6 +223,14 @@ mkStringReaderWithWarnings r = StringReader $ \o s -> mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) +mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader +mkBSReaderWithWarnings r = ByteStringReader $ \o s -> + case r o s of + Left err -> return $ Left err + Right (doc, mediaBag, warnings) -> do + mapM_ warn warnings + return $ Right (doc, mediaBag) + -- | Association list of formats and readers. readers :: [(String, Reader)] readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) @@ -243,7 +251,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) ,("twiki" , mkStringReader readTWiki) - ,("docx" , mkBSReader readDocx) + ,("docx" , mkBSReaderWithWarnings readDocxWithWarnings) ,("odt" , mkBSReader readOdt) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) -- cgit v1.2.3 From 5c055b4cf3cdfac534a74c5c5775aa2d58889150 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 20 Feb 2016 21:27:08 -0500 Subject: Introduce file-scope parsing (parse-before-combine) Traditionally pandoc operates on multiple files by first concetenating them (around extra line breaks) and then processing the joined file. So it only parses a multi-file document at the document scope. This has the benefit that footnotes and links can be in different files, but it also introduces a couple of difficulties: - it is difficult to join files with footnotes without some sort of preprocessing, which makes it difficult to write academic documents in small pieces. - it makes it impossible to process multiple binary input files, which can't be catted. - it makes it impossible to process files from different input formats. This commit introduces alternative method. Instead of catting the files first, it parses the files first, and then combines the parsed output. This makes it impossible to have links across multiple files, and auto-identified headers won't work correctly if headers in multiple files have the same name. On the other hand, footnotes across multiple files will work correctly and will allow more freedom for input formats. Since ByteStringReaders can currently only read one binary file, and will ignore subsequent files, we also changes the behavior to automatically parse before combining if using the ByteStringReader. If we use one file, it will work as normal. If there is more than one file it will combine them after parsing (assuming that the format is the same). Note that this is intended to be an optional method, defaulting to off. Turn it on with `--file-scope`. --- pandoc.hs | 27 +++++++++++++++++++++++---- src/Text/Pandoc/Options.hs | 2 ++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 72a7592d8..59277690a 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -215,6 +215,7 @@ data Opt = Opt , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrace :: Bool -- ^ Print debug information , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. + , optFileScope :: Bool -- ^ Parse input files before combining , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX } @@ -278,6 +279,7 @@ defaultOpts = Opt , optExtractMedia = Nothing , optTrace = False , optTrackChanges = AcceptChanges + , optFileScope = False , optKaTeXStylesheet = Nothing , optKaTeXJS = Nothing } @@ -387,6 +389,11 @@ options = "accept|reject|all") "" -- "Accepting or reject MS Word track-changes."" + , Option "" ["file-scope"] + (NoArg + (\opt -> return opt { optFileScope = True })) + "" -- "Parse input files before combining" + , Option "" ["extract-media"] (ReqArg (\arg opt -> @@ -1117,6 +1124,7 @@ convertWithOpts opts args = do , optExtractMedia = mbExtractMedia , optTrace = trace , optTrackChanges = trackChanges + , optFileScope = fileScope , optKaTeXStylesheet = katexStylesheet , optKaTeXJS = katexJS } = opts @@ -1269,6 +1277,7 @@ convertWithOpts opts args = do , readerDefaultImageExtension = defaultImageExtension , readerTrace = trace , readerTrackChanges = trackChanges + , readerFileScope = fileScope } when (not (isTextFormat format) && outputFile == "-") $ @@ -1301,13 +1310,23 @@ convertWithOpts opts args = do then handleIncludes else return . Right - (doc, media) <- fmap handleError $ - case reader of + let sourceToDoc :: [FilePath] -> IO (Pandoc, MediaBag) + sourceToDoc sources' = fmap handleError $ + case reader of StringReader r-> do - srcs <- convertTabs . intercalate "\n" <$> readSources sources + srcs <- convertTabs . intercalate "\n" <$> readSources sources' doc <- handleIncludes' srcs either (return . Left) (\s -> fmap (,mempty) <$> r readerOpts s) doc - ByteStringReader r -> readFiles sources >>= r readerOpts + ByteStringReader r -> readFiles sources' >>= r readerOpts + + -- We parse first if fileScope is set OR if the reader is a + -- BSReader. So, if it's a StringReader AND not fileScope, we + -- don't. + (doc, media) <- case reader of + (StringReader _) | not fileScope -> sourceToDoc sources + _ -> do + pairs <- mapM (\s -> sourceToDoc [s]) sources + return (mconcat $ map fst pairs, mconcat $ map snd pairs) let writerOptions = def { writerStandalone = standalone', writerTemplate = templ, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 333f499fb..b5736c63d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -264,6 +264,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges + , readerFileScope :: Bool -- ^ Parse before combining } deriving (Show, Read, Data, Typeable, Generic) instance Default ReaderOptions @@ -280,6 +281,7 @@ instance Default ReaderOptions , readerDefaultImageExtension = "" , readerTrace = False , readerTrackChanges = AcceptChanges + , readerFileScope = False } -- -- cgit v1.2.3 From c7c4ee46f8c0c72a66d260a3aca00a74f47dc729 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 20 Feb 2016 22:04:29 -0500 Subject: README: Add description of `--file-scope` option. --- README | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README b/README index 7f6feaf6f..13bdfbecf 100644 --- a/README +++ b/README @@ -388,6 +388,14 @@ Reader options require different kinds of images. Currently this option only affects the Markdown and LaTeX readers. +`--file-scope` + +: Parse each file individually before combining for multifile + documents. This will allow footnotes in different files with the + same identifiers to work as expected. If this option is set, + footnotes and links will not work across files. Reading binary + files (docx, odt, epub) implies `--file-scope`. + `--filter=`*EXECUTABLE* : Specify an executable to be used as a filter transforming the -- cgit v1.2.3 From 09b4f294bf4584f0010abc331b7e44bb26189865 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 21 Feb 2016 14:37:31 -0500 Subject: pandoc.hs: Also use filescope for json files. JSON files have metadata and list structure, so they can't be simply catted, but they're useful as intermediate build files in large projects. --- pandoc.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 59277690a..39335785c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1319,14 +1319,16 @@ convertWithOpts opts args = do either (return . Left) (\s -> fmap (,mempty) <$> r readerOpts s) doc ByteStringReader r -> readFiles sources' >>= r readerOpts - -- We parse first if fileScope is set OR if the reader is a - -- BSReader. So, if it's a StringReader AND not fileScope, we - -- don't. + -- We parse first if (1) fileScope is set, (2), it's a binary + -- reader, or (3) we're reading JSON. This is easier to do of an AND + -- of negatives as opposed to an OR of positives, so we do default + -- parsing if it's a StringReader AND (fileScope is set AND it's not + -- a JSON reader). (doc, media) <- case reader of - (StringReader _) | not fileScope -> sourceToDoc sources - _ -> do - pairs <- mapM (\s -> sourceToDoc [s]) sources - return (mconcat $ map fst pairs, mconcat $ map snd pairs) + (StringReader _) | not fileScope && readerName' /= "json" -> + sourceToDoc sources + _ -> do pairs <- mapM (\s -> sourceToDoc [s]) sources + return (mconcat $ map fst pairs, mconcat $ map snd pairs) let writerOptions = def { writerStandalone = standalone', writerTemplate = templ, -- cgit v1.2.3 From 855c8b43f0497125f8d24b113ce0df92ed7d074b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 16 Mar 2016 12:50:32 -0400 Subject: Docx reader: Don't make numbered heads into lists. Word uses list numbering styles to number its headings. We only call something a numbered list if it does not also heave a heading style. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index e4cfe4930..cbdd86221 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -589,12 +589,14 @@ elemToBodyPart ns element sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) - case pNumInfo parstyle of - Just (numId, lvl) -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> return $ Paragraph parstyle parparts + -- Word uses list enumeration for numbered headings, so we only + -- want to infer a list from the styles if it is NOT a heading. + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + num <- asks envNumbering + let levelInfo = lookupLevel numId lvl num + return $ ListItem parstyle numId lvl levelInfo parparts + _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element -- cgit v1.2.3 From 7f4a40474c77a72cb66b9f583d241c1f21ef695f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 16 Mar 2016 12:56:17 -0400 Subject: Docx reader: Add test for enumerated headers. We don't want them to turn into a list. --- tests/Tests/Readers/Docx.hs | 6 +++++- tests/docx/enumerated_headings.docx | Bin 0 -> 12539 bytes tests/docx/enumerated_headings.native | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 tests/docx/enumerated_headings.docx create mode 100644 tests/docx/enumerated_headings.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 9284d165a..e09d56529 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -169,9 +169,13 @@ tests = [ testGroup "inlines" "docx/already_auto_ident.docx" "docx/already_auto_ident.native" , testCompare - "numbered headers automatically made into list" + "single numbered item not made into list" "docx/numbered_header.docx" "docx/numbered_header.native" + , testCompare + "enumerated headers not made into numbered list" + "docx/enumerated_headings.docx" + "docx/enumerated_headings.native" , testCompare "i18n blocks (headers and blockquotes)" "docx/i18n_blocks.docx" diff --git a/tests/docx/enumerated_headings.docx b/tests/docx/enumerated_headings.docx new file mode 100644 index 000000000..afa84748a Binary files /dev/null and b/tests/docx/enumerated_headings.docx differ diff --git a/tests/docx/enumerated_headings.native b/tests/docx/enumerated_headings.native new file mode 100644 index 000000000..67c0df5e0 --- /dev/null +++ b/tests/docx/enumerated_headings.native @@ -0,0 +1,4 @@ +[Header 1 ("h1",[],[]) [Str "H1"] +,Header 2 ("h2",[],[]) [Str "H2"] +,Header 3 ("h3",[],[]) [Str "H3"] +,Para [Str "And",Space,Str "some",Space,Str "text"]] -- cgit v1.2.3 From 28c7617f19c4d6dd69e2aa9c904af13e11e4e639 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 18 Mar 2016 09:38:26 -0400 Subject: Docx reader: Handle alternate content Some word functions -- especially graphics -- give various choices for content so there can be backwards compatibility. This follows the largely undocumented feature by working through the choices until we find one that works. Note that we had to split out the processing of child elems of runs into a separate function so we can recurse properly. Any processing of an element *within* a run (other than a plain run) should go into `childElemToRun`. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 51 +++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cbdd86221..364483929 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -717,36 +717,58 @@ elemToExtent drawingElem = getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem >>= findAttr (QName at Nothing Nothing) >>= safeRead -elemToRun :: NameSpaces -> Element -> D Run -elemToRun ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + +childElemToRun :: NameSpaces -> Element -> D Run +childElemToRun ns element + | isElem ns "w" "drawing" element = let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem) + (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element) Nothing -> throwError WrongElem -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "footnoteReference") element - , Just fnId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "footnoteReference" element + , Just fnId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "endnoteReference") element - , Just enId <- findAttr (elemName ns "w" "id") ref = do +childElemToRun ns element + | isElem ns "w" "endnoteReference" element + , Just enId <- findAttr (elemName ns "w" "id") element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] +childElemToRun _ _ = throwError WrongElem + +elemToRun :: NameSpaces -> Element -> D Run +elemToRun ns element + | isElem ns "w" "r" element + , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element = + do let choices = findChildren (elemName ns "mc" "Choice") altCont + choiceChildren = map head $ filter (not . null) $ map elChildren choices + outputs <- mapD (childElemToRun ns) choiceChildren + case outputs of + r : _ -> return r + [] -> throwError WrongElem +elemToRun ns element + | isElem ns "w" "r" element + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + childElemToRun ns drawingElem +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "footnoteReference") element = + childElemToRun ns ref +elemToRun ns element + | isElem ns "w" "r" element + , Just ref <- findChild (elemName ns "w" "endnoteReference") element = + childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element = do runElems <- elemToRunElems ns element @@ -955,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem setFont :: Maybe Font -> ReaderEnv -> ReaderEnv setFont f s = s{envFont = f} + -- cgit v1.2.3 From 8d1c01809e0f45b54584bcd2a93b66a5e44968c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Mar 2016 14:54:09 -0700 Subject: README: document that --toc works with docx. Closes #2787. --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 13bdfbecf..ed407fe9d 100644 --- a/README +++ b/README @@ -545,9 +545,9 @@ General writer options `--toc`, `--table-of-contents` : Include an automatically generated table of contents (or, in - the case of `latex`, `context`, and `rst`, an instruction to create + the case of `latex`, `context`, `docx`, and `rst`, an instruction to create one) in the output document. This option has no effect on `man`, - `docbook`, `slidy`, `slideous`, `s5`, `docx`, or `odt` output. + `docbook`, `slidy`, `slideous`, `s5`, or `odt` output. `--toc-depth=`*NUMBER* -- cgit v1.2.3 From e821b05125c8b18a1c09a5d4fd62ee7483704bbb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Mar 2016 16:16:18 -0700 Subject: LaTeX writer: Avoid double toprule in headerless table with caption. Closes #2742. --- src/Text/Pandoc/Writers/LaTeX.hs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 4e4279ec5..52f525fe7 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -571,18 +571,21 @@ blockToLaTeX (Header level (id',classes,_) lst) = do blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\midrule\n") `fmap` - (tableRowToLaTeX True aligns widths) heads + else do + contents <- (tableRowToLaTeX True aligns widths) heads + return ("\\toprule" $$ contents $$ "\\midrule") let endhead = if all null heads then empty else text "\\endhead" + let endfirsthead = if all null heads + then empty + else text "\\endfirsthead" captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText - <> "\\tabularnewline\n\\toprule\n" - <> headers - <> "\\endfirsthead" + else text "\\caption" <> braces captionText <> "\\tabularnewline" + $$ headers + $$ endfirsthead rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } @@ -590,7 +593,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt - $$ "\\toprule" + $$ (if all null heads then "\\toprule" else empty) $$ headers $$ endhead $$ vcat rows' -- cgit v1.2.3 From 976e7e2054c2a4c889c3f02b83fdd092513f22b4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Mar 2016 16:36:56 -0700 Subject: ConTeXt writer: fix whitespace at line beginning in line blocks. Add a `\strut` after `\crlf` before space. Closes #2744, #2745. Thanks to @c-foster. This uses the fix suggested by @c-foster. Mid-line spaces are still not supported, because of limitations of the Markdown parser. --- src/Text/Pandoc/Writers/ConTeXt.hs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 498e2d10f..8d54d62bd 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -279,7 +279,17 @@ blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. inlineListToConTeXt :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst +inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst + -- We add a \strut after a line break that precedes a space, + -- or the space gets swallowed + where addStruts (LineBreak : s : xs) | isSpacey s = + LineBreak : RawInline (Format "context") "\\strut " : s : + addStruts xs + addStruts (x:xs) = x : addStruts xs + addStruts [] = [] + isSpacey Space = True + isSpacey (Str ('\160':_)) = True + isSpacey _ = False -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert -- cgit v1.2.3 From 9d669014a12cf4fc2bcb7d29130b0d0ecefce792 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Mar 2016 22:43:10 -0700 Subject: Updated changelog. --- changelog | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/changelog b/changelog index 7ca45044a..0f4a7de4c 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,149 @@ +pandoc (1.17) + + * Added `--file-scope` option (Jesse Rosenthal). + Traditionally pandoc operates on multiple files by first concatenating + them (around extra line breaks) and then processing the joined file. So + it only parses a multi-file document at the document scope. This has the + benefit that footnotes and links can be in different files, but it also + introduces some difficulties: (a) it is difficult to join files with + footnotes without some sort of preprocessing, which makes it difficult + to write academic documents in small pieces; (b) it makes it impossible + to process multiple binary input files, which can't be catted; (c) it + makes it impossible to process files from different input formats. + The `--file-scope` option causes pandoc to parse the files first, + and then combine the parsed output, instead of combining before + parsing. This makes it impossible to have links across multiple files, + and auto-identified headers won't work correctly if headers in multiple + files have the same name. On the other hand, footnotes across multiple + files will work correctly and will allow more freedom for input formats. + `--file-scope` is selected automatically for binary input files (which + cannot be concatenated anyway) and for pandoc json. + + * Add TEI Writer (csforste) and `tei` output format. + + * Added a general `ByteStringReader` with warnings, used by the docx + reader (API change, Jesse Rosenthal). + + * Add `readDocxWithWarnings` (API change, Jesse Rosenthal). + + * Changed type of Shared.uniqueIdent argument from [String] to Set String. + This avoids performance problems in documents with many identically + named headers (API change, #2671). + + * Removed `tex_math_single_backslash` from `markdown_github` options + (#2707). + + * Make language extensions as well as full language names + trigger syntax highlighting. For example, `py` will now work as + well as `python` (jgm/highlighting-kate#83). + + * Docx reader (Jesse Rosenthal): + + + Handle alternate content. Some word functions (especially graphics) + give various choices for content so there can be backwards compatibility. + + Don't turn numbered headers into lists. + + Docx Reader: Add state to the parser, for warnings + + Update feature checklist in source code. + + Get rid of `Modifiable` typeclass. + + Add tests for adjacent hyperlinks. + + Add a "Link" modifier to `Reducible`. We want to make sure that + links have their spaces removed, and are appropriately smushed + together (#2689). + + * HTML reader: Fixed behavior of base tag (#2777). + + + If the base path does not end with slash, the last component + will be replaced. E.g. base = `http://example.com/foo` + combines with `bar.html` to give `http://example.com/bar.html`. + + If the href begins with a slash, the whole path of the base + is replaced. E.g. base = `http://example.com/foo/` combines + with `/bar.html` to give `http://example.com/bar.html`. + + Rewrote `htmlInBalanced`. This version avoids an exponential + performance problem with `