diff options
Diffstat (limited to 'src/Text/Pandoc')
| -rw-r--r-- | src/Text/Pandoc/MIME.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Pretty.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 17 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 11 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 10 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 20 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 47 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 24 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/ICML.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 13 |
12 files changed, 103 insertions, 51 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 75b4ff0d2..2fdba93e0 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -328,7 +328,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("oth","application/vnd.oasis.opendocument.text-web") ,("otp","application/vnd.oasis.opendocument.presentation-template") ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") - ,("otf","application/x-font-opentype") + ,("otf","application/vnd.ms-opentype") ,("ott","application/vnd.oasis.opendocument.text-template") ,("oza","application/x-oz-application") ,("p","text/x-pascal") diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 9ee7fe94a..2f2656086 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -534,4 +534,4 @@ charWidth c = -- | Get real length of string, taking into account combining and double-wide -- characters. realLength :: String -> Int -realLength = sum . map charWidth +realLength = foldr (\a b -> charWidth a + b) 0 diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 29b661d10..5fd6b7a81 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -663,17 +663,20 @@ elemToParPart ns element return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttr (elemName ns "w" "anchor") element = do + , Just relId <- findAttr (elemName ns "r" "id") element = do runs <- mapD (elemToRun ns) (elChildren element) - return $ InternalHyperLink anchor runs + rels <- asks envRelationships + case lookupRelationship relId rels of + Just target -> do + case findAttr (elemName ns "w" "anchor") element of + Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + Nothing -> return $ ExternalHyperLink target runs + Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttr (elemName ns "r" "id") element = do + , Just anchor <- findAttr (elemName ns "w" "anchor") element = do runs <- mapD (elemToRun ns) (elChildren element) - rels <- asks envRelationships - return $ case lookupRelationship relId rels of - Just target -> ExternalHyperLink target runs - Nothing -> ExternalHyperLink "" runs + return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "m" "oMath" element = (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4e0bb375a..2a23f2a62 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -740,7 +740,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space -- eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", +eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", "del", "ins", "progress", "map", "area", "noscript", "script", "object", "svg", "video", "source"] @@ -758,7 +758,7 @@ blockHtmlTags :: [String] blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", "blockquote", "body", "button", "canvas", "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "embed", "fieldset", "figcaption", "figure", + "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "isindex", "menu", "noframes", "ol", "output", "p", "pre", diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9f51e9a8f..9420d602f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -494,6 +494,7 @@ inlineCommands = M.fromList $ , ("citealp", citation "citealp" NormalCitation False) , ("citealp*", citation "citealp*" NormalCitation False) , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) , ("footcite", inNote <$> citation "footcite" NormalCitation False) , ("parencite", citation "parencite" NormalCitation False) , ("supercite", citation "supercite" NormalCitation False) @@ -516,6 +517,7 @@ inlineCommands = M.fromList $ , ("supercites", citation "supercites" NormalCitation True) , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) , ("Footcite", citation "Footcite" NormalCitation False) , ("Parencite", citation "Parencite" NormalCitation False) , ("Supercite", citation "Supercite" NormalCitation False) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7a3be8291..b8487b4e6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -117,6 +117,12 @@ isBlank _ = False -- auxiliary functions -- +-- | Succeeds when we're in list context. +inList :: MarkdownParser () +inList = do + ctx <- stateParserContext <$> getState + guard (ctx == ListItemState) + isNull :: F Inlines -> Bool isNull ils = B.isNull $ runF ils def @@ -926,6 +932,8 @@ para = try $ do <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) <|> (guardEnabled Ext_lists_without_preceding_blankline >> + -- Avoid creating a paragraph in a nested list. + notFollowedBy' inList >> () <$ lookAhead listStart) <|> do guardEnabled Ext_native_divs inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1610,8 +1618,7 @@ endline = try $ do newline notFollowedBy blankline -- parse potential list-starts differently if in a list: - st <- getState - when (stateParserContext st == ListItemState) $ notFollowedBy listStart + notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 4c34b7bd5..579e38a38 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -879,18 +879,18 @@ bulletListStart = bulletListStart' Nothing bulletListStart' :: Maybe Int -> OrgParser Int -- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- many spaceChar +bulletListStart' Nothing = do ind <- length <$> many spaceChar + when (ind == 0) $ notFollowedBy (char '*') oneOf bullets many1 spaceChar - return $ length ind + 1 + return (ind + 1) -- Unindented lists are legal, but they can't use '*' bullets -- We return n to maintain compatibility with the generic listItem bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf validBullets + when (n == 1) $ notFollowedBy (char '*') + oneOf bullets many1 spaceChar return n - where validBullets = if n == 1 then noAsterisks else bullets - noAsterisks = filter (/= '*') bullets bullets :: String bullets = "*+-" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e5eccb116..732956981 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit) +import Data.Char (toLower, isHexDigit, isSpace) -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options @@ -335,6 +335,13 @@ indentedBlock = try $ do optional blanklines return $ unlines lns +quotedBlock :: Parser [Char] st [Char] +quotedBlock = try $ do + quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + lns <- many1 $ lookAhead (char quote) >> anyLine + optional blanklines + return $ unlines lns + codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline @@ -342,7 +349,8 @@ codeBlock :: Parser [Char] st Blocks codeBlock = try $ codeBlockStart >> codeBlockBody codeBlockBody :: Parser [Char] st Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock +codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> + (indentedBlock <|> quotedBlock) lhsCodeBlock :: RSTParser Blocks lhsCodeBlock = try $ do @@ -513,7 +521,6 @@ directive = try $ do -- TODO: line-block, parsed-literal, table, csv-table, list-table -- date -- include --- class -- title directive' :: RSTParser Blocks directive' = do @@ -594,6 +601,13 @@ directive' = do Just t -> B.link (escapeURI $ trim t) "" $ B.image src "" alt Nothing -> B.image src "" alt + "class" -> do + let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + -- directive content or the first immediately following element + children <- case body of + "" -> block + _ -> parseFromString parseBlocks body' + return $ B.divWith attrs children _ -> return mempty -- TODO: diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 8740e7cef..5b9cc62ab 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -64,6 +64,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe) +import Data.Char (isDigit) data ListMarker = NoMarker | BulletMarker @@ -105,6 +106,7 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer + , stHeadingStyles :: [(Int,String)] } defaultWriterState :: WriterState @@ -124,6 +126,7 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 + , stHeadingStyles = [] } type WS a = StateT WriterState IO a @@ -205,11 +208,37 @@ writeDocx opts doc@(Pandoc meta _) = do <*> (read <$> mbAttrMarLeft ::Maybe Integer) ) + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . + filter ((==Just "xmlns") . qPrefix . attrKey) . + elAttribs $ styledoc + let headingStyles = + let + mywURI = lookup "w" styleNamespaces + myName name = QName name mywURI (Just "w") + getAttrStyleId = findAttr (myName "styleId") + getNameVal = findChild (myName "name") >=> findAttr (myName "val") + getNum s | not $ null s, all isDigit s = Just (read s :: Int) + | otherwise = Nothing + getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum + getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum + toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId + toMap getF = mapMaybe (toTuple getF) $ + findChildren (myName "style") styledoc + select a b | not $ null a = a + | otherwise = b + in + select (toMap getEngHeader) (toMap getIntHeader) + ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime - , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) } - + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stHeadingStyles = headingStyles} let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -363,8 +392,6 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -616,8 +643,9 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ - getParaProps False + headingStyles <- gets stHeadingStyles + paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident @@ -687,7 +715,8 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () return $ - mknode "w:tbl" [] + caption' ++ + [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : @@ -699,7 +728,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do else map mkgridcol widths) : [ mkrow True headers' | not (all null headers) ] ++ map (mkrow False) rows' - ) : caption' + )] blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 53574711f..2291c7184 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -358,8 +358,9 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + let cpContent = renderHtml $ writeHtml + opts'{ writerVariables = ("coverpage","true"):vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -611,17 +612,14 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = UTF8.fromStringLazy $ ppTopElement $ - unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") - ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ - [ unode "head" $ - [ unode "title" plainTitle - , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ] - , unode "body" $ - unode navtag ! [("epub:type","toc") | epub3] $ - [ unode "h1" ! [("id","toc-title")] $ plainTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1] - ] + let navBlocks = [RawBlock (Format "html") $ ppElement $ + unode navtag ! [("epub:type","toc") | epub3] $ + [ unode "h1" ! [("id","toc-title")] $ plainTitle + , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + let navData = renderHtml $ writeHtml opts' + (Pandoc (setMeta "title" + (walk removeNote $ fromList $ docTitle' meta) nullMeta) + navBlocks) let navEntry = mkEntry "nav.xhtml" navData -- mimetype diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index ae20efd4b..181c63df7 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -399,7 +399,7 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] -inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML _ style LineBreak = charStyle style $ text lineSeparator diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index a96670c96..5ba4c9983 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -173,7 +173,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do capt <- inlineListToRST txt let fig = "figure:: " <> text src let alt = ":alt: " <> if null tit then capt else text tit - return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline + return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines @@ -239,8 +239,7 @@ blockToRST (Table caption _ widths headers rows) = do middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToRST row - return $ makeRow cols) rows + let rows' = map makeRow rawRows let border ch = char '+' <> char ch <> (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> @@ -253,7 +252,7 @@ blockToRST (Table caption _ widths headers rows) = do blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." @@ -265,11 +264,11 @@ blockToRST (OrderedList (start, style', delim) items) = do contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ zip markers' items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do contents <- mapM definitionListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. bulletListItemToRST :: [Block] -> State WriterState Doc @@ -427,7 +426,7 @@ inlineToRST (Image alternate (source, tit)) = do return $ "|" <> label <> "|" inlineToRST (Note contents) = do -- add to notes in state - notes <- get >>= return . stNotes + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } let ref = show $ (length notes) + 1 return $ " [" <> text ref <> "]_" |
