From ac06ca2b00f1c0b25b02b1e25aca8dd28961240d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 27 Oct 2010 18:22:45 -0700 Subject: Changes to use citeproc 0.3. Patch from Andrea Rossato. Note: the markdown syntax is preliminary and will probably change. --- src/Text/Pandoc/Biblio.hs | 75 ++++++++++++++++++++++++++++++++----- src/Text/Pandoc/Definition.hs | 16 +++++++- src/Text/Pandoc/Readers/Markdown.hs | 32 ++++++++++------ 3 files changed, 100 insertions(+), 23 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 436eadd68..cbf6191f8 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Copyright : Copyright (C) 2008 Andrea Rossato License : GNU GPL, version 2 or above - Maintainer : Andrea Rossato + Maintainer : Andrea Rossato Stability : alpha Portability : portable -} @@ -31,7 +31,9 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) import Data.List -import Text.CSL +import Data.Unique +import Text.CSL hiding ( Cite(..), Citation(..) ) +import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition -- | Process a 'Pandoc' document by adding citations formatted @@ -42,25 +44,78 @@ processBiblio cf r p else do when (null cf) $ error "Missing the needed citation style file" csl <- readCSLFile cf - let groups = queryWith getCite p - result = citeproc csl r groups + p' <- if styleClass csl == "note" + then processNote p + else processWithM setHash p + let groups = if styleClass csl /= "note" + then queryWith getCitation p' + else getNoteCitations p' + result = citeproc' csl r (setNearNote csl $ map (map toCslCite) groups) cits_map = zip groups (citations result) biblioList = map (read . renderPandoc' csl) (bibliography result) - Pandoc m b = processWith (processCite csl cits_map) p + Pandoc m b = processWith (processCite csl cits_map) p' return $ Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline +processCite :: Style -> [([Citation],[FormattedOutput])] -> Inline -> Inline processCite s cs il | Cite t _ <- il = Cite t (process t) | otherwise = il where - process t = case elemIndex t (map fst cs) of - Just i -> read . renderPandoc s $ snd (cs !! i) + process t = case lookup t cs of + Just i -> read $ renderPandoc s i Nothing -> [Str ("Error processing " ++ show t)] -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'queryWith'. -getCite :: Inline -> [[(String,String)]] -getCite i | Cite t _ <- i = [t] +getCitation :: Inline -> [[Citation]] +getCitation i | Cite t _ <- i = [t] + | otherwise = [] + +getNote :: Inline -> [Inline] +getNote i | Note _ <- i = [i] + | otherwise = [] + +getCite :: Inline -> [Inline] +getCite i | Cite _ _ <- i = [i] | otherwise = [] + +getNoteCitations :: Pandoc -> [[Citation]] +getNoteCitations + = let cits = concat . flip (zipWith $ setCiteNoteNum) [1..] . + map (queryWith getCite) . queryWith getNote + in queryWith getCitation . cits + +setHash :: Citation -> IO Citation +setHash (Citation i p l nn ao na _) + = hashUnique `fmap` newUnique >>= return . Citation i p l nn ao na + +processNote :: Pandoc -> IO Pandoc +processNote p = do + p' <- processWithM setHash p + let cits = queryWith getCite p' + ncits = map (queryWith getCite) $ queryWith getNote p' + needNote = cits \\ concat ncits + return $ processWith (mvCiteInNote needNote) p' + +mvCiteInNote :: [Inline] -> Inline -> Inline +mvCiteInNote is i = if i `elem` is then Note [Para [i]] else i + +setCiteNoteNum :: [Inline] -> Int -> [Inline] +setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n +setCiteNoteNum _ _ = [] + +setCitationNoteNum :: Int -> [Citation] -> [Citation] +setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} + +toCslCite :: Citation -> CSL.Cite +toCslCite (Citation i p l nn ao na _) + = let (la,lo) = parseLocator l + in emptyCite { CSL.citeId = i + , CSL.citePrefix = p + , CSL.citeLabel = la + , CSL.citeLocator = lo + , CSL.citeNoteNumber = show nn + , CSL.authorOnly = ao + , CSL.suppressAuthor = na + } diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index fffca3b2e..bec216b5d 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -112,7 +112,7 @@ data Inline | Subscript [Inline] -- ^ Subscripted text (list of inlines) | SmallCaps [Inline] -- ^ Small caps text (list of inlines) | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Cite [Target] [Inline] -- ^ Citation (list of inlines) + | Cite [Citation] [Inline] -- ^ Citation (list of inlines) | Code String -- ^ Inline code (literal) | Space -- ^ Inter-word space | EmDash -- ^ Em dash @@ -129,6 +129,20 @@ data Inline | Note [Block] -- ^ Footnote or endnote deriving (Show, Eq, Ord, Read, Typeable, Data) +data Citation = Citation { citationId :: String + , citationPrefix :: String + , citationLocator :: String + , citationNoteNum :: Int + , citationAutOnly :: Bool + , citationNoAut :: Bool + , citationHash :: Int + } + deriving (Show, Ord, Read, Typeable, Data) + +instance Eq Citation where + (==) (Citation _ _ _ _ _ _ ha) + (Citation _ _ _ _ _ _ hb) = ha == hb + -- | Applies a transformation on @a@s to matching elements in a @b@. processWith :: (Data a, Data b) => (a -> a) -> b -> b processWith f = everywhere (mkT f) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8c6a90edb..030da9167 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1316,27 +1316,35 @@ inlineCitation = try $ do then return $ Cite citations [] else fail "no citation found" -chkCit :: Target -> GenParser Char ParserState (Maybe Target) +chkCit :: Citation -> GenParser Char ParserState (Maybe Citation) chkCit t = do st <- getState - case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of + case lookupKeySrc (stateKeys st) (Key [Str $ citationId t]) of Just _ -> fail "This is a link" - Nothing -> if elem (fst t) $ stateCitations st + Nothing -> if elem (citationId t) $ stateCitations st then return $ Just t else return $ Nothing citeMarker :: GenParser Char ParserState String citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') -parseCitation :: GenParser Char ParserState [(String,String)] -parseCitation = try $ sepBy (parseLabel) (oneOf ";") +parseCitation :: GenParser Char ParserState [Citation] +parseCitation = try $ sepBy (parseLabel) (skipMany1 $ char ';') -parseLabel :: GenParser Char ParserState (String,String) +parseLabel :: GenParser Char ParserState Citation parseLabel = try $ do - res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@") - case res of - [lab,loc] -> return (lab, loc) - [lab] -> return (lab, "" ) - _ -> return ("" , "" ) - + r <- many (noneOf ";") + let t' s = if s /= [] then tail s else [] + trim = unwords . words + pref = takeWhile (/= '@') r + rest = t' $ dropWhile (/= '@') r + cit = takeWhile (/= ',') rest + loc = t' $ dropWhile (/= ',') rest + (p,na) = if pref /= [] && last pref == '-' + then (init pref, True ) + else (pref , False) + (p',o) = if p /= [] && last p == '+' + then (init p , True ) + else (p , False) + return $ Citation cit (trim p') (trim loc) 0 o na 0 #endif -- cgit v1.2.3 From 075840231bf6ab63d032e39651286e4fee5aa555 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 2 Nov 2010 21:10:33 -0700 Subject: Improve footnote generation of in-text citations w/ note styles. Patch from Andrea Rossato. --- src/Text/Pandoc/Biblio.hs | 142 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 118 insertions(+), 24 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index cbf6191f8..d4b72c9ad 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -30,6 +30,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) +import Data.Char ( toUpper, isPunctuation ) import Data.List import Data.Unique import Text.CSL hiding ( Cite(..), Citation(..) ) @@ -44,17 +45,18 @@ processBiblio cf r p else do when (null cf) $ error "Missing the needed citation style file" csl <- readCSLFile cf - p' <- if styleClass csl == "note" - then processNote p - else processWithM setHash p - let groups = if styleClass csl /= "note" - then queryWith getCitation p' - else getNoteCitations p' - result = citeproc' csl r (setNearNote csl $ map (map toCslCite) groups) - cits_map = zip groups (citations result) + p' <- processWithM setHash p + let (nts,grps) = if styleClass csl /= "note" + then (,) [] $ queryWith getCitation p' + else let cits = queryWith getCite p' + ncits = map (queryWith getCite) $ queryWith getNote p' + needNt = cits \\ concat ncits + in (,) needNt $ getNoteCitations needNt p' + result = citeproc' csl r (setNearNote csl $ map (map toCslCite) grps) + cits_map = zip grps (citations result) biblioList = map (read . renderPandoc' csl) (bibliography result) Pandoc m b = processWith (processCite csl cits_map) p' - return $ Pandoc m $ b ++ biblioList + return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> [([Citation],[FormattedOutput])] -> Inline -> Inline @@ -70,7 +72,7 @@ processCite s cs il -- 'queryWith'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] - | otherwise = [] + | otherwise = [] getNote :: Inline -> [Inline] getNote i | Note _ <- i = [i] @@ -80,26 +82,118 @@ getCite :: Inline -> [Inline] getCite i | Cite _ _ <- i = [i] | otherwise = [] -getNoteCitations :: Pandoc -> [[Citation]] -getNoteCitations - = let cits = concat . flip (zipWith $ setCiteNoteNum) [1..] . - map (queryWith getCite) . queryWith getNote - in queryWith getCitation . cits +getNoteCitations :: [Inline] -> Pandoc -> [[Citation]] +getNoteCitations needNote + = let mvCite i = if i `elem` needNote then Note [Para [i]] else i + setNote = processWith mvCite + getCits = concat . flip (zipWith $ setCiteNoteNum) [1..] . + map (queryWith getCite) . queryWith getNote . setNote + in queryWith getCitation . getCits setHash :: Citation -> IO Citation setHash (Citation i p l nn ao na _) = hashUnique `fmap` newUnique >>= return . Citation i p l nn ao na -processNote :: Pandoc -> IO Pandoc -processNote p = do - p' <- processWithM setHash p - let cits = queryWith getCite p' - ncits = map (queryWith getCite) $ queryWith getNote p' - needNote = cits \\ concat ncits - return $ processWith (mvCiteInNote needNote) p' +generateNotes :: [Inline] -> Pandoc -> Pandoc +generateNotes needNote = processWith (mvCiteInNote needNote) -mvCiteInNote :: [Inline] -> Inline -> Inline -mvCiteInNote is i = if i `elem` is then Note [Para [i]] else i +procInlines :: ([Inline] -> [Inline]) -> Block -> Block +procInlines f b + | Plain inls <- b = Plain $ f inls + | Para inls <- b = Para $ f inls + | Header i inls <- b = Header i $ f inls + | otherwise = b + +mvCiteInNote :: [Inline] -> Block -> Block +mvCiteInNote is = procInlines mvCite + where + elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False + mvCite :: [Inline] -> [Inline] + mvCite inls + | x:i:xs <- inls + , x == Space, i `elem_` is = mvInNote i : mvCite xs + | i:xs <- inls, i `elem_` is = mvInNote i : mvCite xs + | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs + | i:xs <- inls = i : mvCite xs + | otherwise = [] + mvInNote i + | Cite t o <- i = Note [Para [Cite t $ toCapital o]] + | otherwise = Note [Para [i ]] + checkPt i + | Cite c o : xs <- i + , headInline xs == lastInline o + , isPunct o = Cite c (initInline o) : checkPt xs + | x:xs <- i = x : checkPt xs + | otherwise = [] + isPunct = and . map isPunctuation . lastInline + checkNt = processWith $ procInlines checkPt + +headInline :: [Inline] -> String +headInline [] = [] +headInline (i:_) + | Str s <- i = head' s + | Space <- i = " " + | otherwise = headInline $ getInline i + where + head' s = if s /= [] then [head s] else [] + +lastInline :: [Inline] -> String +lastInline [] = [] +lastInline (i:[]) + | Str s <- i = last' s + | Space <- i = " " + | otherwise = lastInline $ getInline i + where + last' s = if s /= [] then [last s] else [] +lastInline (_:xs) = lastInline xs + +initInline :: [Inline] -> [Inline] +initInline [] = [] +initInline (i:[]) + | Str s <- i = return $ Str (init' s) + | Emph is <- i = return $ Emph (initInline is) + | Strong is <- i = return $ Strong (initInline is) + | Strikeout is <- i = return $ Strikeout (initInline is) + | Superscript is <- i = return $ Superscript (initInline is) + | Subscript is <- i = return $ Subscript (initInline is) + | Quoted q is <- i = return $ Quoted q (initInline is) + | SmallCaps is <- i = return $ SmallCaps (initInline is) + | Link is t <- i = return $ Link (initInline is) t + | otherwise = [] + where + init' s = if s /= [] then init s else [] +initInline (i:xs) = i : initInline xs + +toCapital :: [Inline] -> [Inline] +toCapital = mapHeadInline toCap + where + toCap s = if s /= [] then toUpper (head s) : tail s else [] + +mapHeadInline :: (String -> String) -> [Inline] -> [Inline] +mapHeadInline _ [] = [] +mapHeadInline f (i:xs) + | Str s <- i = Str (f s) : xs + | Emph is <- i = Emph (mapHeadInline f is) : xs + | Strong is <- i = Strong (mapHeadInline f is) : xs + | Strikeout is <- i = Strikeout (mapHeadInline f is) : xs + | Superscript is <- i = Superscript (mapHeadInline f is) : xs + | Subscript is <- i = Subscript (mapHeadInline f is) : xs + | Quoted q is <- i = Quoted q (mapHeadInline f is) : xs + | SmallCaps is <- i = SmallCaps (mapHeadInline f is) : xs + | Link is t <- i = Link (mapHeadInline f is) t : xs + | otherwise = [] + +getInline :: Inline -> [Inline] +getInline i + | Emph is <- i = is + | Strong is <- i = is + | Strikeout is <- i = is + | Superscript is <- i = is + | Subscript is <- i = is + | Quoted _ is <- i = is + | SmallCaps is <- i = is + | Link is _ <- i = is + | otherwise = [] setCiteNoteNum :: [Inline] -> Int -> [Inline] setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n -- cgit v1.2.3 From 5e1dc6adda5b11a1d5e51dcccae4801f60b64f4b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 3 Nov 2010 12:58:29 -0700 Subject: Biblio: Improve footnote generation. Patch from Andrea Rossato. --- src/Text/Pandoc/Biblio.hs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index d4b72c9ad..8a9b21b4e 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -107,26 +107,35 @@ procInlines f b mvCiteInNote :: [Inline] -> Block -> Block mvCiteInNote is = procInlines mvCite where - elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False mvCite :: [Inline] -> [Inline] mvCite inls + | x:i:xs <- inls, startWPt xs + , x == Space, i `elem_` is = split i xs ++ mvCite (tailInline xs) | x:i:xs <- inls - , x == Space, i `elem_` is = mvInNote i : mvCite xs - | i:xs <- inls, i `elem_` is = mvInNote i : mvCite xs - | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs - | i:xs <- inls = i : mvCite xs + , x == Space, i `elem_` is = mvInNote i : mvCite xs + | i:xs <- inls, i `elem_` is + , startWPt xs = split i xs ++ mvCite (tailInline xs) + | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs + | i:xs <- inls = i : mvCite xs | otherwise = [] + elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False + split i xs = Str (headInline xs) : mvInNote i : [] mvInNote i - | Cite t o <- i = Note [Para [Cite t $ toCapital o]] - | otherwise = Note [Para [i ]] + | Cite t o <- i = Note [Para [Cite t $ sanitize o]] + | otherwise = Note [Para [i ]] + sanitize i + | endWPt i = toCapital i + | otherwise = toCapital (i ++ [Str "."]) + checkPt i | Cite c o : xs <- i , headInline xs == lastInline o - , isPunct o = Cite c (initInline o) : checkPt xs + , endWPt o = Cite c (initInline o) : checkPt xs | x:xs <- i = x : checkPt xs | otherwise = [] - isPunct = and . map isPunctuation . lastInline - checkNt = processWith $ procInlines checkPt + endWPt = and . map isPunctuation . lastInline + startWPt = and . map isPunctuation . headInline + checkNt = processWith $ procInlines checkPt headInline :: [Inline] -> String headInline [] = [] @@ -164,6 +173,11 @@ initInline (i:[]) init' s = if s /= [] then init s else [] initInline (i:xs) = i : initInline xs +tailInline :: [Inline] -> [Inline] +tailInline = mapHeadInline tail' + where + tail' s = if s /= [] then tail s else [] + toCapital :: [Inline] -> [Inline] toCapital = mapHeadInline toCap where -- cgit v1.2.3 From 5871c4d51f0614ca82c12f6289f8241dfa209e4f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 4 Nov 2010 09:11:15 -0700 Subject: Biblio: small fix to detection of punctuation (A. Rossato). --- src/Text/Pandoc/Biblio.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 8a9b21b4e..16215505e 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -30,7 +30,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) -import Data.Char ( toUpper, isPunctuation ) +import Data.Char ( toUpper ) import Data.List import Data.Unique import Text.CSL hiding ( Cite(..), Citation(..) ) @@ -129,12 +129,12 @@ mvCiteInNote is = procInlines mvCite checkPt i | Cite c o : xs <- i - , headInline xs == lastInline o + , endWPt o, startWPt xs , endWPt o = Cite c (initInline o) : checkPt xs | x:xs <- i = x : checkPt xs | otherwise = [] - endWPt = and . map isPunctuation . lastInline - startWPt = and . map isPunctuation . headInline + endWPt = and . map (`elem` ".,;:!?") . lastInline + startWPt = and . map (`elem` ".,;:!?") . headInline checkNt = processWith $ procInlines checkPt headInline :: [Inline] -> String -- cgit v1.2.3 From db037418477d9b85be15bc8f76b0ebc016f03668 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 5 Nov 2010 17:06:15 -0700 Subject: Removed Text.Pandoc.Definition, bump version to 1.7. We now get Text.Pandoc.Definition from the new pandoc-types package. This will make it possible for other programs to supply output in Pandoc format, without depending on the whole pandoc package. --- pandoc.cabal | 6 +- src/Text/Pandoc/Definition.hs | 165 ------------------------------------------ 2 files changed, 3 insertions(+), 168 deletions(-) delete mode 100644 src/Text/Pandoc/Definition.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index c760df2be..aad149c8a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.6.1 +Version: 1.7 Cabal-Version: >= 1.2 Build-Type: Custom License: GPL @@ -158,7 +158,8 @@ Library bytestring >= 0.9, zip-archive >= 0.1.1.4, utf8-string >= 0.3, old-time >= 1, HTTP >= 4000.0.5, texmath >= 0.4, xml >= 1.3.5 && < 1.4, - random, extensible-exceptions + random, extensible-exceptions, + pandoc-types == 1.7.* if impl(ghc >= 6.10) Build-depends: base >= 4 && < 5, syb else @@ -180,7 +181,6 @@ Library Exposed-Modules: Text.Pandoc, Text.Pandoc.Blocks, - Text.Pandoc.Definition, Text.Pandoc.CharacterReferences, Text.Pandoc.Shared, Text.Pandoc.Parsing, diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs deleted file mode 100644 index bec216b5d..000000000 --- a/src/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,165 +0,0 @@ -{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable -{- -Copyright (C) 2006-2010 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -import Data.Generics - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Bibliographic information for the document: title, authors, date. -data Meta = Meta { docTitle :: [Inline] - , docAuthors :: [[Inline]] - , docDate :: [Inline] } - deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Alignment of a table column. -data Alignment = AlignLeft - | AlignRight - | AlignCenter - | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle - | Example - | Decimal - | LowerRoman - | UpperRoman - | LowerAlpha - | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim - | Period - | OneParen - | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Attributes: identifier, classes, key-value pairs -type Attr = (String, [String], [(String, String)]) - --- | Block element. -data Block - = Plain [Inline] -- ^ Plain text, not a paragraph - | Para [Inline] -- ^ Paragraph - | CodeBlock Attr String -- ^ Code block (literal) with attributes - | RawHtml String -- ^ Raw HTML block (literal) - | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes - -- and a list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each - -- a list of blocks) - | DefinitionList [([Inline],[[Block]])] -- ^ Definition list - -- Each list item is a pair consisting of a - -- term (a list of inlines) and one or more - -- definitions (each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) - | HorizontalRule -- ^ Horizontal rule - | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table, - -- with caption, column alignments, - -- relative column widths (0 = default), - -- column headers (each a list of blocks), and - -- rows (each a list of lists of blocks) - | Null -- ^ Nothing - deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Link target (URL, title). -type Target = (String, String) - --- | Type of math element (display or inline). -data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Inline elements. -data Inline - = Str String -- ^ Text (string) - | Emph [Inline] -- ^ Emphasized text (list of inlines) - | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) - | Strikeout [Inline] -- ^ Strikeout text (list of inlines) - | Superscript [Inline] -- ^ Superscripted text (list of inlines) - | Subscript [Inline] -- ^ Subscripted text (list of inlines) - | SmallCaps [Inline] -- ^ Small caps text (list of inlines) - | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Cite [Citation] [Inline] -- ^ Citation (list of inlines) - | Code String -- ^ Inline code (literal) - | Space -- ^ Inter-word space - | EmDash -- ^ Em dash - | EnDash -- ^ En dash - | Apostrophe -- ^ Apostrophe - | Ellipses -- ^ Ellipses - | LineBreak -- ^ Hard line break - | Math MathType String -- ^ TeX math (literal) - | TeX String -- ^ LaTeX code (literal) - | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target - | Image [Inline] Target -- ^ Image: alt text (list of inlines), target - -- and target - | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Ord, Read, Typeable, Data) - -data Citation = Citation { citationId :: String - , citationPrefix :: String - , citationLocator :: String - , citationNoteNum :: Int - , citationAutOnly :: Bool - , citationNoAut :: Bool - , citationHash :: Int - } - deriving (Show, Ord, Read, Typeable, Data) - -instance Eq Citation where - (==) (Citation _ _ _ _ _ _ ha) - (Citation _ _ _ _ _ _ hb) = ha == hb - --- | Applies a transformation on @a@s to matching elements in a @b@. -processWith :: (Data a, Data b) => (a -> a) -> b -> b -processWith f = everywhere (mkT f) - --- | Like 'processWith', but with monadic transformations. -processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b -processWithM f = everywhereM (mkM f) - --- | Runs a query on matching @a@ elements in a @c@. -queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b] -queryWith f = everything (++) ([] `mkQ` f) - -{-# DEPRECATED processPandoc "Use processWith instead" #-} -processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc -processPandoc = processWith - -{-# DEPRECATED queryPandoc "Use queryWith instead" #-} -queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] -queryPandoc = queryWith - -- cgit v1.2.3 From f7f6b2427d5bef595b819d30f16fb332397349d3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 6 Nov 2010 14:43:23 -0700 Subject: Changes to use citeproc-hs 0.3. --- pandoc.cabal | 2 +- src/Text/Pandoc/Biblio.hs | 113 +++++++----------------------------- src/Text/Pandoc/Readers/Markdown.hs | 6 +- 3 files changed, 27 insertions(+), 94 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index aad149c8a..058378199 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -168,7 +168,7 @@ Library Build-depends: highlighting-kate >= 0.2.7.1 cpp-options: -D_HIGHLIGHTING if flag(citeproc) - Build-depends: citeproc-hs >= 0.2 + Build-depends: citeproc-hs >= 0.3 && < 0.4 cpp-options: -D_CITEPROC if impl(ghc >= 6.12) Ghc-Options: -O2 -Wall -fno-warn-unused-do-bind diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 16215505e..d8a4659e7 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -30,7 +30,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) -import Data.Char ( toUpper ) import Data.List import Data.Unique import Text.CSL hiding ( Cite(..), Citation(..) ) @@ -52,9 +51,9 @@ processBiblio cf r p ncits = map (queryWith getCite) $ queryWith getNote p' needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt p' - result = citeproc' csl r (setNearNote csl $ map (map toCslCite) grps) + result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) cits_map = zip grps (citations result) - biblioList = map (read . renderPandoc' csl) (bibliography result) + biblioList = map (renderPandoc' csl) (bibliography result) Pandoc m b = processWith (processCite csl cits_map) p' return . generateNotes nts . Pandoc m $ b ++ biblioList @@ -65,7 +64,7 @@ processCite s cs il | otherwise = il where process t = case lookup t cs of - Just i -> read $ renderPandoc s i + Just i -> renderPandoc s i Nothing -> [Str ("Error processing " ++ show t)] -- | Retrieve all citations from a 'Pandoc' docuument. To be used with @@ -91,8 +90,8 @@ getNoteCitations needNote in queryWith getCitation . getCits setHash :: Citation -> IO Citation -setHash (Citation i p l nn ao na _) - = hashUnique `fmap` newUnique >>= return . Citation i p l nn ao na +setHash (Citation i p l cm nn _) + = hashUnique `fmap` newUnique >>= return . Citation i p l cm nn generateNotes :: [Inline] -> Pandoc -> Pandoc generateNotes needNote = processWith (mvCiteInNote needNote) @@ -109,12 +108,12 @@ mvCiteInNote is = procInlines mvCite where mvCite :: [Inline] -> [Inline] mvCite inls - | x:i:xs <- inls, startWPt xs - , x == Space, i `elem_` is = split i xs ++ mvCite (tailInline xs) + | x:i:xs <- inls, startWithPunct xs + , x == Space, i `elem_` is = split i xs ++ mvCite (tailFirstInlineStr xs) | x:i:xs <- inls , x == Space, i `elem_` is = mvInNote i : mvCite xs | i:xs <- inls, i `elem_` is - , startWPt xs = split i xs ++ mvCite (tailInline xs) + , startWithPunct xs = split i xs ++ mvCite (tailFirstInlineStr xs) | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs | i:xs <- inls = i : mvCite xs | otherwise = [] @@ -124,91 +123,17 @@ mvCiteInNote is = procInlines mvCite | Cite t o <- i = Note [Para [Cite t $ sanitize o]] | otherwise = Note [Para [i ]] sanitize i - | endWPt i = toCapital i - | otherwise = toCapital (i ++ [Str "."]) + | endWithPunct i = toCapital i + | otherwise = toCapital (i ++ [Str "."]) checkPt i | Cite c o : xs <- i - , endWPt o, startWPt xs - , endWPt o = Cite c (initInline o) : checkPt xs - | x:xs <- i = x : checkPt xs - | otherwise = [] - endWPt = and . map (`elem` ".,;:!?") . lastInline - startWPt = and . map (`elem` ".,;:!?") . headInline + , endWithPunct o, startWithPunct xs + , endWithPunct o = Cite c (initInline o) : checkPt xs + | x:xs <- i = x : checkPt xs + | otherwise = [] checkNt = processWith $ procInlines checkPt -headInline :: [Inline] -> String -headInline [] = [] -headInline (i:_) - | Str s <- i = head' s - | Space <- i = " " - | otherwise = headInline $ getInline i - where - head' s = if s /= [] then [head s] else [] - -lastInline :: [Inline] -> String -lastInline [] = [] -lastInline (i:[]) - | Str s <- i = last' s - | Space <- i = " " - | otherwise = lastInline $ getInline i - where - last' s = if s /= [] then [last s] else [] -lastInline (_:xs) = lastInline xs - -initInline :: [Inline] -> [Inline] -initInline [] = [] -initInline (i:[]) - | Str s <- i = return $ Str (init' s) - | Emph is <- i = return $ Emph (initInline is) - | Strong is <- i = return $ Strong (initInline is) - | Strikeout is <- i = return $ Strikeout (initInline is) - | Superscript is <- i = return $ Superscript (initInline is) - | Subscript is <- i = return $ Subscript (initInline is) - | Quoted q is <- i = return $ Quoted q (initInline is) - | SmallCaps is <- i = return $ SmallCaps (initInline is) - | Link is t <- i = return $ Link (initInline is) t - | otherwise = [] - where - init' s = if s /= [] then init s else [] -initInline (i:xs) = i : initInline xs - -tailInline :: [Inline] -> [Inline] -tailInline = mapHeadInline tail' - where - tail' s = if s /= [] then tail s else [] - -toCapital :: [Inline] -> [Inline] -toCapital = mapHeadInline toCap - where - toCap s = if s /= [] then toUpper (head s) : tail s else [] - -mapHeadInline :: (String -> String) -> [Inline] -> [Inline] -mapHeadInline _ [] = [] -mapHeadInline f (i:xs) - | Str s <- i = Str (f s) : xs - | Emph is <- i = Emph (mapHeadInline f is) : xs - | Strong is <- i = Strong (mapHeadInline f is) : xs - | Strikeout is <- i = Strikeout (mapHeadInline f is) : xs - | Superscript is <- i = Superscript (mapHeadInline f is) : xs - | Subscript is <- i = Subscript (mapHeadInline f is) : xs - | Quoted q is <- i = Quoted q (mapHeadInline f is) : xs - | SmallCaps is <- i = SmallCaps (mapHeadInline f is) : xs - | Link is t <- i = Link (mapHeadInline f is) t : xs - | otherwise = [] - -getInline :: Inline -> [Inline] -getInline i - | Emph is <- i = is - | Strong is <- i = is - | Strikeout is <- i = is - | Superscript is <- i = is - | Subscript is <- i = is - | Quoted _ is <- i = is - | SmallCaps is <- i = is - | Link is _ <- i = is - | otherwise = [] - setCiteNoteNum :: [Inline] -> Int -> [Inline] setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n setCiteNoteNum _ _ = [] @@ -217,13 +142,17 @@ setCitationNoteNum :: Int -> [Citation] -> [Citation] setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} toCslCite :: Citation -> CSL.Cite -toCslCite (Citation i p l nn ao na _) +toCslCite (Citation i p l cm nn _) = let (la,lo) = parseLocator l + citMode = case cm of + AuthorOnly -> (True, False) + SuppressAuthor -> (False,True ) + NormalCitation -> (False,False) in emptyCite { CSL.citeId = i , CSL.citePrefix = p , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show nn - , CSL.authorOnly = ao - , CSL.suppressAuthor = na + , CSL.authorOnly = fst citMode + , CSL.suppressAuthor = snd citMode } diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 030da9167..0256184f6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1346,5 +1346,9 @@ parseLabel = try $ do (p',o) = if p /= [] && last p == '+' then (init p , True ) else (p , False) - return $ Citation cit (trim p') (trim loc) 0 o na 0 + mode = case (na,o) of + (True, False) -> SuppressAuthor + (False,True ) -> AuthorOnly + _ -> NormalCitation + return $ Citation cit (trim p') (trim loc) mode 0 0 #endif -- cgit v1.2.3 From 23c6f56bc5bb5f7a994a60ccf1bb914366d74f81 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 6 Nov 2010 14:58:54 -0700 Subject: Removed CITEPROC CPP conditionals from library code. By Cabal policy, the API should not change depending on flags. --- src/Text/Pandoc/Parsing.hs | 4 ---- src/Text/Pandoc/Readers/Markdown.hs | 4 ---- 2 files changed, 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index dce99fd75..47e97c7cc 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -586,9 +586,7 @@ data ParserState = ParserState stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC stateCitations :: [String], -- ^ List of available citations -#endif stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ Parse bibliographic info? @@ -616,9 +614,7 @@ defaultParserState = stateQuoteContext = NoQuote, stateSanitizeHTML = False, stateKeys = M.empty, -#ifdef _CITEPROC stateCitations = [], -#endif stateNotes = [], stateTabStop = 4, stateStandalone = False, diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0256184f6..a2ee93f42 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -912,9 +912,7 @@ inlineParsers = [ str , note , inlineNote , link -#ifdef _CITEPROC , inlineCitation -#endif , image , math , strikeout @@ -1305,7 +1303,6 @@ rawHtmlInline' = do else anyHtmlInlineTag return $ HtmlInline result -#ifdef _CITEPROC inlineCitation :: GenParser Char ParserState Inline inlineCitation = try $ do failIfStrict @@ -1351,4 +1348,3 @@ parseLabel = try $ do (False,True ) -> AuthorOnly _ -> NormalCitation return $ Citation cit (trim p') (trim loc) mode 0 0 -#endif -- cgit v1.2.3 From 36d4e649a6a21da84b2ae88be2b66ed82c0f082d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Nov 2010 21:30:34 -0800 Subject: Added support for textual citations (but not yet markdown syntax). Patch from Andrea Rossato. --- src/Text/Pandoc/Biblio.hs | 45 ++++++++++++++++++++++--------------- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 2 files changed, 28 insertions(+), 19 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index d8a4659e7..c334d89ce 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -54,19 +54,27 @@ processBiblio cf r p result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) cits_map = zip grps (citations result) biblioList = map (renderPandoc' csl) (bibliography result) - Pandoc m b = processWith (processCite csl cits_map) p' + Pandoc m b = processWith (procInlines $ processCite csl cits_map) p' return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Citation],[FormattedOutput])] -> Inline -> Inline -processCite s cs il - | Cite t _ <- il = Cite t (process t) - | otherwise = il +processCite :: Style -> [([Citation],[FormattedOutput])] -> [Inline] -> [Inline] +processCite _ _ [] = [] +processCite s cs (i:is) + | Cite t _ <- i = process t ++ processCite s cs is + | otherwise = i : processCite s cs is where process t = case lookup t cs of - Just i -> renderPandoc s i + Just x -> if isTextualCitation t && x /= [] + then renderPandoc s [head x] ++ [Space] ++ + [Cite t $ renderPandoc s $ tail x] + else [Cite t $ renderPandoc s x] Nothing -> [Str ("Error processing " ++ show t)] +isTextualCitation :: [Citation] -> Bool +isTextualCitation (c:_) = citationMode c == AuthorInText +isTextualCitation _ = False + -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'queryWith'. getCitation :: Inline -> [[Citation]] @@ -109,22 +117,22 @@ mvCiteInNote is = procInlines mvCite mvCite :: [Inline] -> [Inline] mvCite inls | x:i:xs <- inls, startWithPunct xs - , x == Space, i `elem_` is = split i xs ++ mvCite (tailFirstInlineStr xs) + , x == Space, i `elem_` is = switch i xs ++ mvCite (tailFirstInlineStr xs) | x:i:xs <- inls - , x == Space, i `elem_` is = mvInNote i : mvCite xs + , x == Space, i `elem_` is = mvInNote i : mvCite xs | i:xs <- inls, i `elem_` is - , startWithPunct xs = split i xs ++ mvCite (tailFirstInlineStr xs) - | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs - | i:xs <- inls = i : mvCite xs + , startWithPunct xs = switch i xs ++ mvCite (tailFirstInlineStr xs) + | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs + | i:xs <- inls = i : mvCite xs | otherwise = [] - elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False - split i xs = Str (headInline xs) : mvInNote i : [] + elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False + switch i xs = Str (headInline xs) : mvInNote i : [] mvInNote i | Cite t o <- i = Note [Para [Cite t $ sanitize o]] | otherwise = Note [Para [i ]] sanitize i - | endWithPunct i = toCapital i - | otherwise = toCapital (i ++ [Str "."]) + | endWithPunct i = toCapital i + | otherwise = toCapital (i ++ [Str "."]) checkPt i | Cite c o : xs <- i @@ -142,10 +150,10 @@ setCitationNoteNum :: Int -> [Citation] -> [Citation] setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} toCslCite :: Citation -> CSL.Cite -toCslCite (Citation i p l cm nn _) +toCslCite (Citation i p l cm nn h) = let (la,lo) = parseLocator l citMode = case cm of - AuthorOnly -> (True, False) + AuthorInText -> (True, False) SuppressAuthor -> (False,True ) NormalCitation -> (False,False) in emptyCite { CSL.citeId = i @@ -153,6 +161,7 @@ toCslCite (Citation i p l cm nn _) , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show nn - , CSL.authorOnly = fst citMode + , CSL.authorInText = fst citMode , CSL.suppressAuthor = snd citMode + , CSL.citeHash = h } diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b7c5220d1..7a42d903e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1345,6 +1345,6 @@ parseLabel = try $ do else (p , False) mode = case (na,o) of (True, False) -> SuppressAuthor - (False,True ) -> AuthorOnly + (False,True ) -> AuthorInText _ -> NormalCitation return $ Citation cit (trim p') (trim loc) mode 0 0 -- cgit v1.2.3 From 5c6dc5767df68ae739138ae38f2baf75949ef3e6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Nov 2010 22:35:04 -0800 Subject: Biblio: Use a Map for the lookup table. --- src/Text/Pandoc/Biblio.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index c334d89ce..bca24d815 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) import Data.List import Data.Unique +import qualified Data.Map as M import Text.CSL hiding ( Cite(..), Citation(..) ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition @@ -52,19 +53,19 @@ processBiblio cf r p needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt p' result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) - cits_map = zip grps (citations result) + cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' csl) (bibliography result) Pandoc m b = processWith (procInlines $ processCite csl cits_map) p' return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Citation],[FormattedOutput])] -> [Inline] -> [Inline] +processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline] processCite _ _ [] = [] processCite s cs (i:is) | Cite t _ <- i = process t ++ processCite s cs is | otherwise = i : processCite s cs is where - process t = case lookup t cs of + process t = case M.lookup t cs of Just x -> if isTextualCitation t && x /= [] then renderPandoc s [head x] ++ [Space] ++ [Cite t $ renderPandoc s $ tail x] -- cgit v1.2.3 From 79bab2d210ffadaf4f3b6a2a7ebc33ea546dd5e0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Nov 2010 00:37:44 -0800 Subject: Revised citation parsers for markdown reader. Added a form for in-text citations: @doe99 [30; see also @smith99]. --- src/Text/Pandoc/Readers/Markdown.hs | 140 ++++++++++++++++++++++++------------ 1 file changed, 96 insertions(+), 44 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7a42d903e..eb9646df2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -373,6 +373,7 @@ attributes = try $ do attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr + identifier :: GenParser Char st [Char] identifier = do first <- letter @@ -912,7 +913,7 @@ inlineParsers = [ str , note , inlineNote , link - , inlineCitation + , cite , image , math , strikeout @@ -1303,48 +1304,99 @@ rawHtmlInline' = do else choice [htmlComment, anyHtmlInlineTag] return $ HtmlInline result -inlineCitation :: GenParser Char ParserState Inline -inlineCitation = try $ do +-- Citations + +cite :: GenParser Char ParserState Inline +cite = do failIfStrict - cit <- citeMarker - let citations = readWith parseCitation defaultParserState cit - mr <- mapM chkCit citations - if catMaybes mr /= [] - then return $ Cite citations [] - else fail "no citation found" - -chkCit :: Citation -> GenParser Char ParserState (Maybe Citation) -chkCit t = do + textualCite <|> normalCite + +spnl :: GenParser Char st () +spnl = try $ skipSpaces >> optional newline >> skipSpaces >> + notFollowedBy (char '\n') + +textualCite :: GenParser Char ParserState Inline +textualCite = try $ do + key <- citeKey st <- getState - case lookupKeySrc (stateKeys st) (Key [Str $ citationId t]) of - Just _ -> fail "This is a link" - Nothing -> if elem (citationId t) $ stateCitations st - then return $ Just t - else return $ Nothing - -citeMarker :: GenParser Char ParserState String -citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') - -parseCitation :: GenParser Char ParserState [Citation] -parseCitation = try $ sepBy (parseLabel) (skipMany1 $ char ';') - -parseLabel :: GenParser Char ParserState Citation -parseLabel = try $ do - r <- many (noneOf ";") - let t' s = if s /= [] then tail s else [] - trim = unwords . words - pref = takeWhile (/= '@') r - rest = t' $ dropWhile (/= '@') r - cit = takeWhile (/= ',') rest - loc = t' $ dropWhile (/= ',') rest - (p,na) = if pref /= [] && last pref == '-' - then (init pref, True ) - else (pref , False) - (p',o) = if p /= [] && last p == '+' - then (init p , True ) - else (p , False) - mode = case (na,o) of - (True, False) -> SuppressAuthor - (False,True ) -> AuthorInText - _ -> NormalCitation - return $ Citation cit (trim p') (trim loc) mode 0 0 + unless (key `elem` stateCitations st) $ + fail "not a citation" + let first = Citation{ citationId = key + , citationPrefix = "" + , citationLocator = "" + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + option (Cite [first] []) $ try $ do + spnl + char '[' + spnl + bareloc <- option "" locator + rest <- many $ try $ do + char ';' + spnl + citation + spnl + char ']' + let first' = if null bareloc + then first + else first{ citationLocator = bareloc + , citationMode = AuthorInText } + return $ Cite (first' : rest) [] + +normalCite :: GenParser Char ParserState Inline +normalCite = try $ do + cites <- citeList + return $ Cite cites [] + +citeKey :: GenParser Char st String +citeKey = try $ do + char '@' + first <- letter + rest <- many $ noneOf ",;]@ \t\n" + return (first:rest) + +locator :: GenParser Char st String +locator = try $ do + optional $ char ',' + spnl + -- TODO should eventually be list of inlines + many1 $ (char '\\' >> oneOf "];\n") <|> noneOf "];\n" <|> + (char '\n' >> notFollowedBy blankline >> return ' ') + +prefix :: GenParser Char st String +prefix = try $ liftM removeLeadingTrailingSpace $ + many $ (char '\\' >> anyChar) <|> noneOf "@]\n" <|> + (char '-' >> notFollowedBy (char '@') >> return '-') <|> + (char '\n' >> notFollowedBy blankline >> return ' ') + +citeList :: GenParser Char st [Citation] +citeList = try $ do + char '[' + spnl + first <- citation + spnl + rest <- many $ try $ do + char ';' + spnl + citation + spnl + char ']' + return (first:rest) + +citation :: GenParser Char st Citation +citation = try $ do + suppress_auth <- option False (char '-' >> return True) + pref <- prefix + key <- citeKey + loc <- locator + return $ Citation{ citationId = key + , citationPrefix = pref + , citationLocator = loc + , citationMode = if suppress_auth + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } -- cgit v1.2.3 From 1fa2973da690ba81a80337d9d3f0f6e2c786b602 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Nov 2010 19:30:59 -0800 Subject: Repairs to citation parser + citation test suite. --- src/Text/Pandoc/Readers/Markdown.hs | 16 ++++++++-------- tests/markdown-citations.plain | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index eb9646df2..0d0e850bc 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1332,9 +1332,9 @@ textualCite = try $ do spnl char '[' spnl - bareloc <- option "" locator + bareloc <- option "" $ notFollowedBy (oneOf "-@") >> locator rest <- many $ try $ do - char ';' + optional $ char ';' spnl citation spnl @@ -1366,10 +1366,10 @@ locator = try $ do (char '\n' >> notFollowedBy blankline >> return ' ') prefix :: GenParser Char st String -prefix = try $ liftM removeLeadingTrailingSpace $ - many $ (char '\\' >> anyChar) <|> noneOf "@]\n" <|> - (char '-' >> notFollowedBy (char '@') >> return '-') <|> - (char '\n' >> notFollowedBy blankline >> return ' ') +prefix = liftM removeLeadingTrailingSpace $ + many $ (char '\\' >> anyChar) <|> noneOf "-@]\n" <|> + (try $ char '-' >> notFollowedBy (char '@') >> return '-') <|> + (try $ char '\n' >> notFollowedBy blankline >> return ' ') citeList :: GenParser Char st [Citation] citeList = try $ do @@ -1387,10 +1387,10 @@ citeList = try $ do citation :: GenParser Char st Citation citation = try $ do - suppress_auth <- option False (char '-' >> return True) pref <- prefix + suppress_auth <- option False (char '-' >> return True) key <- citeKey - loc <- locator + loc <- option "" locator return $ Citation{ citationId = key , citationPrefix = pref , citationLocator = loc diff --git a/tests/markdown-citations.plain b/tests/markdown-citations.plain index 35218d954..b809842be 100644 --- a/tests/markdown-citations.plain +++ b/tests/markdown-citations.plain @@ -5,8 +5,8 @@ Pandoc with citeproc-hs @nonexistent -Doe (2005) says blah. Doe (2005, 30) says blah. Doe (2005; 2006, -30; see also Doe and Roe 2007) says blah. +Doe (2005) says blah. Doe (2005, 30) says blah. Doe +(2005; 2006, 30; see also Doe and Roe 2007) says blah. In a note.[^1] A citation group (see Doe 2005, 34-35; also Doe and Roe 2007, chap. 3). Another one @@ -26,12 +26,12 @@ Doe, John, and Jenny Roe. 2007. Why Water Is Wet. In Third Book, ed. Sam Smith. Oxford: Oxford University Press. [^1]: - A citation without locators [Doe and Roe (2007)]. + A citation without locators (Doe and Roe 2007). [^2]: - Some citations (see Doe 2006, chap. 3; Doe and Roe 2007; Doe - 2005). + Some citations + (see Doe 2006, chap. 3; Doe and Roe 2007; Doe 2005). [^3]: - Like a citation without author: (2005), and now Doe with a - locator (2006, 44). + Like a citation without author: (2005), and now Doe with a locator + (2006, 44). -- cgit v1.2.3 From d73a531d899600ab9999b798129dc0fa6185ef7f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Nov 2010 07:15:30 -0800 Subject: Biblio: don't add footnote if empty. --- src/Text/Pandoc/Biblio.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index bca24d815..05cc296c1 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -65,11 +65,11 @@ processCite s cs (i:is) | Cite t _ <- i = process t ++ processCite s cs is | otherwise = i : processCite s cs is where + addNt t x = if null x then [] else [Cite t $ renderPandoc s x] process t = case M.lookup t cs of Just x -> if isTextualCitation t && x /= [] - then renderPandoc s [head x] ++ [Space] ++ - [Cite t $ renderPandoc s $ tail x] - else [Cite t $ renderPandoc s x] + then renderPandoc s [head x] ++ [Space] ++ addNt t (tail x) + else [Cite t $ renderPandoc s x] Nothing -> [Str ("Error processing " ++ show t)] isTextualCitation :: [Citation] -> Bool -- cgit v1.2.3 From ce9fc2a37d51e65bbfb25eed01cd400e183fb8d9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Nov 2010 20:31:22 -0800 Subject: Updated for changes in Citaiton type. citationPrefix now [Inline] rather than String; citationSuffix added. This change presupposes no changes in citeproc-hs. It passes a string for these values to citeproc-hs. Eventually, citeproc-hs should use an [Inline] for these as well. --- src/Text/Pandoc/Biblio.hs | 29 ++++++++++++++++++++--------- src/Text/Pandoc/Readers/Markdown.hs | 7 ++++--- 2 files changed, 24 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 05cc296c1..60e059175 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -99,8 +99,8 @@ getNoteCitations needNote in queryWith getCitation . getCits setHash :: Citation -> IO Citation -setHash (Citation i p l cm nn _) - = hashUnique `fmap` newUnique >>= return . Citation i p l cm nn +setHash (Citation i p s l cm nn _) + = hashUnique `fmap` newUnique >>= return . Citation i p s l cm nn generateNotes :: [Inline] -> Pandoc -> Pandoc generateNotes needNote = processWith (mvCiteInNote needNote) @@ -150,19 +150,30 @@ setCiteNoteNum _ _ = [] setCitationNoteNum :: Int -> [Citation] -> [Citation] setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} +-- a temporary function to tide us over until citeproc is +-- changed to use Inline lists for prefixes and suffixes... +stringify :: [Inline] -> String +stringify = queryWith go + where go :: Inline -> [Char] + go Space = " " + go (Str x) = x + go (Code x) = x + go _ = "" + toCslCite :: Citation -> CSL.Cite -toCslCite (Citation i p l cm nn h) - = let (la,lo) = parseLocator l - citMode = case cm of +toCslCite c + = let (la,lo) = parseLocator $ citationLocator c + citMode = case citationMode c of AuthorInText -> (True, False) SuppressAuthor -> (False,True ) NormalCitation -> (False,False) - in emptyCite { CSL.citeId = i - , CSL.citePrefix = p + in emptyCite { CSL.citeId = citationId c + , CSL.citePrefix = stringify $ citationPrefix c + , CSL.citeSuffix = stringify $ citationSuffix c , CSL.citeLabel = la , CSL.citeLocator = lo - , CSL.citeNoteNumber = show nn + , CSL.citeNoteNumber = show $ citationNoteNum c , CSL.authorInText = fst citMode , CSL.suppressAuthor = snd citMode - , CSL.citeHash = h + , CSL.citeHash = citationHash c } diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0d0e850bc..8101d3098 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1322,7 +1322,8 @@ textualCite = try $ do unless (key `elem` stateCitations st) $ fail "not a citation" let first = Citation{ citationId = key - , citationPrefix = "" + , citationPrefix = [] + , citationSuffix = [] , citationLocator = "" , citationMode = AuthorInText , citationNoteNum = 0 @@ -1361,7 +1362,6 @@ locator :: GenParser Char st String locator = try $ do optional $ char ',' spnl - -- TODO should eventually be list of inlines many1 $ (char '\\' >> oneOf "];\n") <|> noneOf "];\n" <|> (char '\n' >> notFollowedBy blankline >> return ' ') @@ -1392,7 +1392,8 @@ citation = try $ do key <- citeKey loc <- option "" locator return $ Citation{ citationId = key - , citationPrefix = pref + , citationPrefix = [Str pref] + , citationSuffix = [] , citationLocator = loc , citationMode = if suppress_auth then SuppressAuthor -- cgit v1.2.3 From 47c64d4fc4be8733615f3378e9a68c513c5710c2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Nov 2010 15:35:53 -0800 Subject: Don't pass a [Str ""] as citationPrefix. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8101d3098..4975ee02f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1392,7 +1392,7 @@ citation = try $ do key <- citeKey loc <- option "" locator return $ Citation{ citationId = key - , citationPrefix = [Str pref] + , citationPrefix = if pref /= [] then [Str pref] else [] , citationSuffix = [] , citationLocator = loc , citationMode = if suppress_auth -- cgit v1.2.3 From dbe0cefc9a63af4333b17c06ad6308a9e0d85799 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Nov 2010 15:36:17 -0800 Subject: Biblio: Removed stringify; pass inline list to citeproc. --- src/Text/Pandoc/Biblio.hs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 60e059175..dde822da8 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -150,16 +150,6 @@ setCiteNoteNum _ _ = [] setCitationNoteNum :: Int -> [Citation] -> [Citation] setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} --- a temporary function to tide us over until citeproc is --- changed to use Inline lists for prefixes and suffixes... -stringify :: [Inline] -> String -stringify = queryWith go - where go :: Inline -> [Char] - go Space = " " - go (Str x) = x - go (Code x) = x - go _ = "" - toCslCite :: Citation -> CSL.Cite toCslCite c = let (la,lo) = parseLocator $ citationLocator c @@ -168,8 +158,8 @@ toCslCite c SuppressAuthor -> (False,True ) NormalCitation -> (False,False) in emptyCite { CSL.citeId = citationId c - , CSL.citePrefix = stringify $ citationPrefix c - , CSL.citeSuffix = stringify $ citationSuffix c + , CSL.citePrefix = PandocText $ citationPrefix c + , CSL.citeSuffix = PandocText $ citationSuffix c , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show $ citationNoteNum c -- cgit v1.2.3 From aaf7de0ddaea292ba4e869a6f0fa5adaaf02b813 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Nov 2010 12:38:45 -0800 Subject: Markdown reader: Revised parser for new citation syntax. Suffixes and prefixes are now [Inline]. The locator is separated from the citation key by a blank space. The locator consists of one introductory word and any number of words containing at least one digit. The suffix, if any, is separated from the locator by a comma, and continues til the end of the citation. --- src/Text/Pandoc/Readers/Markdown.hs | 129 ++++++++++++++++++++---------------- tests/markdown-citations.txt | 10 +-- 2 files changed, 78 insertions(+), 61 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4975ee02f..b0aab9c70 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, isDigit ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -1309,15 +1309,25 @@ rawHtmlInline' = do cite :: GenParser Char ParserState Inline cite = do failIfStrict - textualCite <|> normalCite + citations <- textualCite <|> normalCite + return $ Cite citations [] spnl :: GenParser Char st () -spnl = try $ skipSpaces >> optional newline >> skipSpaces >> - notFollowedBy (char '\n') +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') -textualCite :: GenParser Char ParserState Inline +blankSpace :: GenParser Char st () +blankSpace = try $ do + res <- many1 $ oneOf " \t\n" + guard $ length res > 0 + guard $ length (filter (=='\n') res) <= 1 + +textualCite :: GenParser Char ParserState [Citation] textualCite = try $ do - key <- citeKey + (_, key) <- citeKey st <- getState unless (key `elem` stateCitations st) $ fail "not a citation" @@ -1329,73 +1339,80 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - option (Cite [first] []) $ try $ do - spnl - char '[' - spnl - bareloc <- option "" $ notFollowedBy (oneOf "-@") >> locator - rest <- many $ try $ do - optional $ char ';' - spnl - citation - spnl - char ']' - let first' = if null bareloc - then first - else first{ citationLocator = bareloc - , citationMode = AuthorInText } - return $ Cite (first' : rest) [] - -normalCite :: GenParser Char ParserState Inline + rest <- option [] $ try $ spnl >> normalCite + if null rest + then option [first] $ bareloc first + else return $ first : rest + +bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc c = try $ do + spnl + char '[' + spnl + loc <- locator + spnl + rest <- option [] $ try $ char ';' >> citeList + spnl + char ']' + return $ c{ citationLocator = loc } : rest + +normalCite :: GenParser Char ParserState [Citation] normalCite = try $ do - cites <- citeList - return $ Cite cites [] + char '[' + spnl + citations <- citeList + spnl + char ']' + return citations -citeKey :: GenParser Char st String +citeKey :: GenParser Char st (Bool, String) citeKey = try $ do + suppress_author <- option False (char '-' >> return True) char '@' first <- letter - rest <- many $ noneOf ",;]@ \t\n" - return (first:rest) + rest <- many $ (noneOf ",;]@ \t\n") + return (suppress_author, first:rest) locator :: GenParser Char st String locator = try $ do - optional $ char ',' spnl - many1 $ (char '\\' >> oneOf "];\n") <|> noneOf "];\n" <|> - (char '\n' >> notFollowedBy blankline >> return ' ') - -prefix :: GenParser Char st String -prefix = liftM removeLeadingTrailingSpace $ - many $ (char '\\' >> anyChar) <|> noneOf "-@]\n" <|> - (try $ char '-' >> notFollowedBy (char '@') >> return '-') <|> - (try $ char '\n' >> notFollowedBy blankline >> return ' ') - -citeList :: GenParser Char st [Citation] -citeList = try $ do - char '[' + w <- many1 (noneOf " \t\n;]") spnl - first <- citation + ws <- many locatorWord + return $ unwords $ w:ws + +locatorWord :: GenParser Char st String +locatorWord = try $ do + wd <- many1 $ (try $ char '\\' >> oneOf "]; \t\n") <|> noneOf "]; \t\n" spnl - rest <- many $ try $ do - char ';' - spnl - citation + if any isDigit wd + then return wd + else pzero + +suffix :: GenParser Char ParserState [Inline] +suffix = try $ do + char ',' spnl - char ']' - return (first:rest) + liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + +prefix :: GenParser Char ParserState [Inline] +prefix = liftM normalizeSpaces $ + manyTill inline (lookAhead citeKey) + +citeList :: GenParser Char ParserState [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char st Citation +citation :: GenParser Char ParserState Citation citation = try $ do pref <- prefix - suppress_auth <- option False (char '-' >> return True) - key <- citeKey - loc <- option "" locator + (suppress_author, key) <- citeKey + loc <- option "" $ try $ blankSpace >> locator + suff <- option [] suffix return $ Citation{ citationId = key - , citationPrefix = if pref /= [] then [Str pref] else [] - , citationSuffix = [] + , citationPrefix = pref + , citationSuffix = suff , citationLocator = loc - , citationMode = if suppress_auth + , citationMode = if suppress_author then SuppressAuthor else NormalCitation , citationNoteNum = 0 diff --git a/tests/markdown-citations.txt b/tests/markdown-citations.txt index 59206f0a4..9840832ce 100644 --- a/tests/markdown-citations.txt +++ b/tests/markdown-citations.txt @@ -6,11 +6,11 @@ @item1 says blah. @item1 [p. 30] says blah. -@item1 [-@item2, p. 30; see also @item3] says blah. +@item1 [-@item2 p. 30; see also @item3] says blah. In a note.[^1] A citation group [see -@item1, p. 34-35; also @item3, chap. 3]. Another one [see -@item1, p. 34-35]. And another one in a note.[^2] +@item1 p. 34-35; also @item3 chap. 3]. Another one [see +@item1 p. 34-35]. And another one in a note.[^2] Now some modifiers.[^3] @@ -18,11 +18,11 @@ Now some modifiers.[^3] A citation without locators [@item3]. [^2]: - Some citations [see @item2, chap. 3; @item3; @item1]. + Some citations [see @item2 chap. 3; @item3; @item1]. [^3]: Like a citation without author: [-@item1], and now Doe with a - locator [-@item2, p. 44]. + locator [-@item2 p. 44]. # References -- cgit v1.2.3 From f3bb3c1ff1c85ea3bc9132b4c890905a9af20c3a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Nov 2010 13:22:20 -0800 Subject: Markdown citation parser improvements and test updates. Now we handle a suffix after a bare locator, e.g. @item1 [p. 30, suffix] The suffix now includes any punctuation that introduces it. A few tests fail because of problems with citeproc (extra space before the suffix, missing space after comma separating multiple page ranges in the locator). --- src/Text/Pandoc/Readers/Markdown.hs | 28 +++++++++++++++------------- tests/markdown-citations.plain | 8 +++++--- tests/markdown-citations.txt | 4 ++++ 3 files changed, 24 insertions(+), 16 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b0aab9c70..851cf25e7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1348,13 +1348,12 @@ bareloc :: Citation -> GenParser Char ParserState [Citation] bareloc c = try $ do spnl char '[' - spnl loc <- locator - spnl + suff <- suffix rest <- option [] $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationLocator = loc } : rest + return $ c{ citationLocator = loc, citationSuffix = suff } : rest normalCite :: GenParser Char ParserState [Citation] normalCite = try $ do @@ -1376,28 +1375,31 @@ citeKey = try $ do locator :: GenParser Char st String locator = try $ do spnl - w <- many1 (noneOf " \t\n;]") - spnl - ws <- many locatorWord + w <- many1 (noneOf " \t\n;,]") + ws <- many (locatorWord <|> locatorComma) return $ unwords $ w:ws locatorWord :: GenParser Char st String locatorWord = try $ do - wd <- many1 $ (try $ char '\\' >> oneOf "]; \t\n") <|> noneOf "]; \t\n" spnl - if any isDigit wd - then return wd - else pzero + wd <- many1 $ (try $ char '\\' >> oneOf "];, \t\n") <|> noneOf "];, \t\n" + guard $ any isDigit wd + return wd + +locatorComma :: GenParser Char st String +locatorComma = try $ do + char ',' + lookAhead $ locatorWord + return "," suffix :: GenParser Char ParserState [Inline] suffix = try $ do - char ',' spnl liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline prefix :: GenParser Char ParserState [Inline] prefix = liftM normalizeSpaces $ - manyTill inline (lookAhead citeKey) + manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) citeList :: GenParser Char ParserState [Citation] citeList = sepBy1 citation (try $ char ';' >> spnl) @@ -1407,7 +1409,7 @@ citation = try $ do pref <- prefix (suppress_author, key) <- citeKey loc <- option "" $ try $ blankSpace >> locator - suff <- option [] suffix + suff <- suffix return $ Citation{ citationId = key , citationPrefix = pref , citationSuffix = suff diff --git a/tests/markdown-citations.plain b/tests/markdown-citations.plain index b809842be..dd5d23efc 100644 --- a/tests/markdown-citations.plain +++ b/tests/markdown-citations.plain @@ -5,12 +5,14 @@ Pandoc with citeproc-hs @nonexistent -Doe (2005) says blah. Doe (2005, 30) says blah. Doe -(2005; 2006, 30; see also Doe and Roe 2007) says blah. +Doe (2005) says blah. Doe (2005, 30) says blah. Doe (2005, 30, suffix) +says blah. Doe (2005; 2006, 30; see also Doe and Roe 2007) says blah. In a note.[^1] A citation group (see Doe 2005, 34-35; also Doe and Roe 2007, chap. 3). Another one -(see Doe 2005, 34-35). And another one in a note.[^2] +(see Doe 2005, 34-35). And another one in a note.[^2] Citation with +a suffix and locator (Doe 2005, 33, 35-37, and nowhere else). +Citation with suffix only (Doe 2005, and nowhere else). Now some modifiers.[^3] diff --git a/tests/markdown-citations.txt b/tests/markdown-citations.txt index 9840832ce..c54a41304 100644 --- a/tests/markdown-citations.txt +++ b/tests/markdown-citations.txt @@ -6,11 +6,15 @@ @item1 says blah. @item1 [p. 30] says blah. +@item1 [p. 30, with suffix] says blah. @item1 [-@item2 p. 30; see also @item3] says blah. In a note.[^1] A citation group [see @item1 p. 34-35; also @item3 chap. 3]. Another one [see @item1 p. 34-35]. And another one in a note.[^2] +Citation with a suffix and locator [@item1 pp. 33, 35-37, +and nowhere else]. Citation with suffix only +[@item1, and nowhere else]. Now some modifiers.[^3] -- cgit v1.2.3 From bbb60a2586f29785fd9ba592770bc2f7842deba4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Nov 2010 14:15:26 -0800 Subject: If --csl not specified, read from data files or default. Thus --csl behaves like --reference-odt, --template, etc. --- src/Text/Pandoc/Biblio.hs | 7 ++----- src/pandoc.hs | 29 +++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index dde822da8..12911e1ee 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -29,7 +29,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA module Text.Pandoc.Biblio ( processBiblio ) where -import Control.Monad ( when ) import Data.List import Data.Unique import qualified Data.Map as M @@ -39,12 +38,10 @@ import Text.Pandoc.Definition -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc -processBiblio cf r p +processBiblio :: Style -> [Reference] -> Pandoc -> IO Pandoc +processBiblio csl r p = if null r then return p else do - when (null cf) $ error "Missing the needed citation style file" - csl <- readCSLFile cf p' <- processWithM setHash p let (nts,grps) = if styleClass csl /= "note" then (,) [] $ queryWith getCitation p' diff --git a/src/pandoc.hs b/src/pandoc.hs index 5fcec5005..7822a0b67 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -42,7 +42,7 @@ import System.FilePath import System.Console.GetOpt import Data.Char ( toLower, isDigit ) import Data.List ( intercalate, isSuffixOf ) -import System.Directory ( getAppUserDataDirectory ) +import System.Directory ( getAppUserDataDirectory, doesFileExist ) import System.IO ( stdout, stderr ) import qualified Text.Pandoc.UTF8 as UTF8 #ifdef _CITEPROC @@ -55,6 +55,7 @@ import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 (toString, fromString) import Codec.Binary.UTF8.String (decodeString, encodeString) +import Paths_pandoc (getDataFileName) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ @@ -164,7 +165,7 @@ data Opt = Opt , optDataDir :: Maybe FilePath #ifdef _CITEPROC , optBibliography :: [Reference] - , optCslFile :: String + , optCslFile :: FilePath #endif } @@ -205,7 +206,7 @@ defaultOpts = Opt , optDataDir = Nothing #ifdef _CITEPROC , optBibliography = [] - , optCslFile = [] + , optCslFile = "" #endif } @@ -532,7 +533,7 @@ options = "" , Option "" ["csl"] (ReqArg - (\arg opt -> return opt { optCslFile = arg} ) + (\arg opt -> return opt { optCslFile = arg }) "FILENAME") "" #endif @@ -685,7 +686,7 @@ main = do , optDataDir = mbDataDir #ifdef _CITEPROC , optBibliography = refs - , optCslFile = cslFile + , optCslFile = cslfile #endif } = opts @@ -838,13 +839,29 @@ main = do let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) +#ifdef _CITEPROC + cslfile' <- if null cslfile + then do + let defaultcsl = "default.csl" + csldatafile <- getDataFileName defaultcsl + case datadir of + Nothing -> return csldatafile + Just u -> do + ex <- doesFileExist $ u defaultcsl + if ex + then return $ u defaultcsl + else return csldatafile + else return cslfile + csl <- readCSLFile cslfile' +#endif + doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources) let doc' = foldr ($) doc transforms doc'' <- do #ifdef _CITEPROC - processBiblio cslFile refs doc' + processBiblio csl refs doc' #else return doc' #endif -- cgit v1.2.3 From 6390103509caba5930c8ac45d31364b244607547 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Nov 2010 14:16:18 -0800 Subject: Markdown citation parser: small refactoring for clarity. --- src/Text/Pandoc/Readers/Markdown.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 851cf25e7..d59f8a71a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1325,6 +1325,10 @@ blankSpace = try $ do guard $ length res > 0 guard $ length (filter (=='\n') res) <= 1 +noneOfUnlessEscaped :: [Char] -> GenParser Char st Char +noneOfUnlessEscaped cs = + try (char '\\' >> oneOf cs) <|> noneOf cs + textualCite :: GenParser Char ParserState [Citation] textualCite = try $ do (_, key) <- citeKey @@ -1382,7 +1386,7 @@ locator = try $ do locatorWord :: GenParser Char st String locatorWord = try $ do spnl - wd <- many1 $ (try $ char '\\' >> oneOf "];, \t\n") <|> noneOf "];, \t\n" + wd <- many1 $ noneOfUnlessEscaped "];, \t\n" guard $ any isDigit wd return wd -- cgit v1.2.3 From 9cb0581de6b485ddfbd37e66414990339cd44b72 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 19 Nov 2010 22:13:30 -0800 Subject: Shared: Added findFirstFile, findDataFile, refactored readDataFile. --- src/Text/Pandoc/Shared.hs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 0fdaf42f3..67c5153c7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -71,6 +71,8 @@ module Text.Pandoc.Shared ( defaultWriterOptions, -- * File handling inDirectory, + findFirstFile, + findDataFile, readDataFile ) where @@ -538,11 +540,28 @@ inDirectory path action = do setCurrentDirectory oldDir return result +-- | Get full file path for the first of a list of files found in the +-- specified directory. +findFirstFile :: (Maybe FilePath) -> [FilePath] -> IO (Maybe FilePath) +findFirstFile Nothing _ = return Nothing +findFirstFile (Just _) [] = return Nothing +findFirstFile (Just dir) (f:fs) = do + ex <- doesFileExist (dir f) + if ex + then return $ Just (dir f) + else findFirstFile (Just dir) fs + +-- | Get file path for data file, either from specified user data directory, +-- or, if not found there, from Cabal data directory. +findDataFile :: Maybe FilePath -> FilePath -> IO FilePath +findDataFile Nothing f = getDataFileName f +findDataFile (Just u) f = do + ex <- doesFileExist (u f) + if ex + then return (u f) + else getDataFileName f + -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> IO String -readDataFile userDir fname = - case userDir of - Nothing -> getDataFileName fname >>= UTF8.readFile - Just u -> catch (UTF8.readFile $ u fname) - (\_ -> getDataFileName fname >>= UTF8.readFile) +readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile -- cgit v1.2.3 From 3eef887dfa4e47095c4be9b2bdbf67c002e29f90 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Nov 2010 08:11:30 -0800 Subject: Citation related changes. * Don't look for bibliography in ~/.pandoc. Reason: doing this requires a read + parse of the bibliography even when the document doesn't use citations. This is a big performance drag on regular pandoc invocations. * Only look for default.csl if the document contains references. Reason: avoids the need to read and parse csl file when the document contains no references anyway. * Removed findFirstFile from Shared. --- src/Text/Pandoc/Biblio.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 12 ------------ src/pandoc.hs | 31 +++++++++---------------------- 3 files changed, 12 insertions(+), 36 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 12911e1ee..a60909e19 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -38,10 +38,11 @@ import Text.Pandoc.Definition -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: Style -> [Reference] -> Pandoc -> IO Pandoc -processBiblio csl r p +processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc +processBiblio cslfile r p = if null r then return p else do + csl <- readCSLFile cslfile p' <- processWithM setHash p let (nts,grps) = if styleClass csl /= "note" then (,) [] $ queryWith getCitation p' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 67c5153c7..6cc48b88c 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -71,7 +71,6 @@ module Text.Pandoc.Shared ( defaultWriterOptions, -- * File handling inDirectory, - findFirstFile, findDataFile, readDataFile ) where @@ -540,17 +539,6 @@ inDirectory path action = do setCurrentDirectory oldDir return result --- | Get full file path for the first of a list of files found in the --- specified directory. -findFirstFile :: (Maybe FilePath) -> [FilePath] -> IO (Maybe FilePath) -findFirstFile Nothing _ = return Nothing -findFirstFile (Just _) [] = return Nothing -findFirstFile (Just dir) (f:fs) = do - ex <- doesFileExist (dir f) - if ex - then return $ Just (dir f) - else findFirstFile (Just dir) fs - -- | Get file path for data file, either from specified user data directory, -- or, if not found there, from Cabal data directory. findDataFile :: Maybe FilePath -> FilePath -> IO FilePath diff --git a/src/pandoc.hs b/src/pandoc.hs index ab4110b42..66a09b309 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -32,7 +32,7 @@ module Main where import Text.Pandoc import Text.Pandoc.S5 (s5HeaderIncludes) import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, - headerShift, findDataFile, findFirstFile ) + headerShift, findDataFile ) #ifdef _HIGHLIGHTING import Text.Pandoc.Highlighting ( languages ) #endif @@ -767,25 +767,6 @@ main = do return $ ("mathml-script", s) : variables' _ -> return variables' -#ifdef _CITEPROC - cslfile' <- if null cslfile - then findDataFile datadir "default.csl" - else return cslfile - csl <- readCSLFile cslfile' - refs' <- if null refs - then do - f <- findFirstFile datadir - ["biblio.xml","biblio.json","biblio.bib"] - case f of - Just x -> catch (readBiblioFile x) $ \e -> do - UTF8.hPutStrLn stderr $ - "Error reading bibliography `" ++ x ++ "'" - UTF8.hPutStrLn stderr $ show e - exitWith (ExitFailure 23) >> return [] - Nothing -> return [] - else return refs -#endif - let sourceDir = if null sources then "." else takeDirectory (head sources) @@ -803,7 +784,7 @@ main = do lhsExtension sources, stateStandalone = standalone', #ifdef _CITEPROC - stateCitations = map refId refs', + stateCitations = map refId refs, #endif stateSmart = smart || writerName' `elem` ["latex", "context", "latex+lhs", "man"], @@ -863,7 +844,13 @@ main = do doc'' <- do #ifdef _CITEPROC - processBiblio csl refs' doc' + if null refs + then return doc' + else do + cslfile' <- if null cslfile + then findDataFile datadir "default.csl" + else return cslfile + processBiblio cslfile' refs doc' #else return doc' #endif -- cgit v1.2.3 From 05f5766abedbc7643cbefb94325a2d77830fa9db Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Nov 2010 22:00:17 -0800 Subject: Biblio: Check for == rather than /=. This is more perspicuous. --- src/Text/Pandoc/Biblio.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index a60909e19..921cf54c5 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -44,12 +44,12 @@ processBiblio cslfile r p else do csl <- readCSLFile cslfile p' <- processWithM setHash p - let (nts,grps) = if styleClass csl /= "note" - then (,) [] $ queryWith getCitation p' - else let cits = queryWith getCite p' + let (nts,grps) = if styleClass csl == "note" + then let cits = queryWith getCite p' ncits = map (queryWith getCite) $ queryWith getNote p' needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt p' + else (,) [] $ queryWith getCitation p' result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' csl) (bibliography result) -- cgit v1.2.3 From 7ef7d85b3fafd9175d16d1c6ef9548a4d043464b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Nov 2010 22:10:16 -0800 Subject: HTML reader: Export htmlTag. --- src/Text/Pandoc/Readers/HTML.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5ccbc4fb1..462267d89 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -35,6 +35,7 @@ module Text.Pandoc.Readers.HTML ( anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, + htmlTag, htmlEndTag, extractTagType, htmlBlockElement, -- cgit v1.2.3 From b48fa0ea59da6b32924d2f042c8a7411a03f89d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 22 Nov 2010 23:09:30 -0800 Subject: Check biblio for all citations, not just textual. --- src/Text/Pandoc/Readers/Markdown.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d59f8a71a..2d3ad1199 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1332,9 +1332,6 @@ noneOfUnlessEscaped cs = textualCite :: GenParser Char ParserState [Citation] textualCite = try $ do (_, key) <- citeKey - st <- getState - unless (key `elem` stateCitations st) $ - fail "not a citation" let first = Citation{ citationId = key , citationPrefix = [] , citationSuffix = [] @@ -1368,13 +1365,16 @@ normalCite = try $ do char ']' return citations -citeKey :: GenParser Char st (Bool, String) +citeKey :: GenParser Char ParserState (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' first <- letter rest <- many $ (noneOf ",;]@ \t\n") - return (suppress_author, first:rest) + let key = first:rest + st <- getState + guard $ key `elem` stateCitations st + return (suppress_author, key) locator :: GenParser Char st String locator = try $ do -- cgit v1.2.3 From 0871a512d7f5cc71f130a7ac56dbe8dfa9d75051 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Nov 2010 12:06:56 -0800 Subject: Split locator and suffix in Biblio rather than Markdown parser. Patch from Nathan Gass. --- src/Text/Pandoc/Biblio.hs | 38 +++++++++++++++++++++++++++++++++---- src/Text/Pandoc/Readers/Markdown.hs | 38 ++----------------------------------- 2 files changed, 36 insertions(+), 40 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 921cf54c5..bf1624bb4 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -31,6 +31,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Data.List import Data.Unique +import Data.Char ( isDigit ) import qualified Data.Map as M import Text.CSL hiding ( Cite(..), Citation(..) ) import qualified Text.CSL as CSL ( Cite(..) ) @@ -97,8 +98,8 @@ getNoteCitations needNote in queryWith getCitation . getCits setHash :: Citation -> IO Citation -setHash (Citation i p s l cm nn _) - = hashUnique `fmap` newUnique >>= return . Citation i p s l cm nn +setHash (Citation i p s cm nn _) + = hashUnique `fmap` newUnique >>= return . Citation i p s cm nn generateNotes :: [Inline] -> Pandoc -> Pandoc generateNotes needNote = processWith (mvCiteInNote needNote) @@ -150,14 +151,15 @@ setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} toCslCite :: Citation -> CSL.Cite toCslCite c - = let (la,lo) = parseLocator $ citationLocator c + = let (l, s) = locatorWords $ citationSuffix c + (la,lo) = parseLocator $ unwords l citMode = case citationMode c of AuthorInText -> (True, False) SuppressAuthor -> (False,True ) NormalCitation -> (False,False) in emptyCite { CSL.citeId = citationId c , CSL.citePrefix = PandocText $ citationPrefix c - , CSL.citeSuffix = PandocText $ citationSuffix c + , CSL.citeSuffix = PandocText $ s , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show $ citationNoteNum c @@ -165,3 +167,31 @@ toCslCite c , CSL.suppressAuthor = snd citMode , CSL.citeHash = citationHash c } + +locatorWords :: [Inline] -> ([String], [Inline]) +locatorWords (Space : t) = locatorWords t +locatorWords (Str "" : t) = locatorWords t +locatorWords a@(Str (',' : s) : t) + = if ws /= [] then (ws, t') else ([], a) + where + (ws, t') = locatorWords (Str s:t) +locatorWords i + = if any isDigit w then (w':ws, s'') else ([], i) + where + (w, s') = locatorWord i + (ws, s'') = locatorWords s' + w' = if ws == [] then w else w ++ "," + +locatorWord :: [Inline] -> (String, [Inline]) +locatorWord (Space : r) = (" " ++ ts, r') + where + (ts, r') = locatorWord r +locatorWord (Str t : r) + | t' /= "" = (w , Str t' : r) + | otherwise = (t ++ ts, r' ) + where + w = takeWhile (/= ',') t + t' = dropWhile (/= ',') t + (ts, r') = locatorWord r +locatorWord i = ("", i) + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2d3ad1199..1b3900798 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, isDigit ) +import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -1319,23 +1319,12 @@ spnl = try $ do skipSpaces notFollowedBy (char '\n') -blankSpace :: GenParser Char st () -blankSpace = try $ do - res <- many1 $ oneOf " \t\n" - guard $ length res > 0 - guard $ length (filter (=='\n') res) <= 1 - -noneOfUnlessEscaped :: [Char] -> GenParser Char st Char -noneOfUnlessEscaped cs = - try (char '\\' >> oneOf cs) <|> noneOf cs - textualCite :: GenParser Char ParserState [Citation] textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key , citationPrefix = [] , citationSuffix = [] - , citationLocator = "" , citationMode = AuthorInText , citationNoteNum = 0 , citationHash = 0 @@ -1349,12 +1338,11 @@ bareloc :: Citation -> GenParser Char ParserState [Citation] bareloc c = try $ do spnl char '[' - loc <- locator suff <- suffix rest <- option [] $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationLocator = loc, citationSuffix = suff } : rest + return $ c{ citationSuffix = suff } : rest normalCite :: GenParser Char ParserState [Citation] normalCite = try $ do @@ -1376,26 +1364,6 @@ citeKey = try $ do guard $ key `elem` stateCitations st return (suppress_author, key) -locator :: GenParser Char st String -locator = try $ do - spnl - w <- many1 (noneOf " \t\n;,]") - ws <- many (locatorWord <|> locatorComma) - return $ unwords $ w:ws - -locatorWord :: GenParser Char st String -locatorWord = try $ do - spnl - wd <- many1 $ noneOfUnlessEscaped "];, \t\n" - guard $ any isDigit wd - return wd - -locatorComma :: GenParser Char st String -locatorComma = try $ do - char ',' - lookAhead $ locatorWord - return "," - suffix :: GenParser Char ParserState [Inline] suffix = try $ do spnl @@ -1412,12 +1380,10 @@ citation :: GenParser Char ParserState Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey - loc <- option "" $ try $ blankSpace >> locator suff <- suffix return $ Citation{ citationId = key , citationPrefix = pref , citationSuffix = suff - , citationLocator = loc , citationMode = if suppress_author then SuppressAuthor else NormalCitation -- cgit v1.2.3 From 0ca84f0d381639ca996eb037c9c4057b2de4ed2f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Nov 2010 22:09:17 -0800 Subject: Markdown suffix parser fix. If suffix doesn't begin with punctuation, include opening comma and space in result. Previously, @item [only a suffix] would result in something like Doe (2002only a suffix) because there was no opening delimiter. --- src/Text/Pandoc/Readers/Markdown.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1b3900798..d39050243 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, isPunctuation ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -1367,7 +1367,12 @@ citeKey = try $ do suffix :: GenParser Char ParserState [Inline] suffix = try $ do spnl - liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + res <- many $ notFollowedBy (oneOf ";]") >> inline + return $ case res of + [] -> [] + (Str (y:_) : _) | isPunctuation y + -> res + _ -> Str "," : Space : res prefix :: GenParser Char ParserState [Inline] prefix = liftM normalizeSpaces $ -- cgit v1.2.3 From 044a9a61574ff1414b44e2f92307996cba00a2e1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Nov 2010 07:08:06 -0800 Subject: Added 'stringify' to Text.Pandoc.Shared. --- src/Text/Pandoc/Shared.hs | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6cc48b88c..f2f38519b 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -57,6 +57,7 @@ module Text.Pandoc.Shared ( -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + stringify, compactify, Element (..), hierarchicalize, @@ -340,6 +341,15 @@ normalizeSpaces list = else lst in removeLeading $ removeTrailing $ removeDoubles list +-- | Convert list of inlines to a string with formatting removed. +stringify :: [Inline] -> String +stringify = queryWith go + where go :: Inline -> [Char] + go Space = " " + go (Str x) = x + go (Code x) = x + go _ = "" + -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -- cgit v1.2.3 From 283f1e60ccd2cdfe48a056687565aedade5ceb6d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Nov 2010 07:08:32 -0800 Subject: Use parsec parsers to split locator. This is easier to read and maintain. Also, formatting is now stripped from the locator prefix, so you can write e.g. '*p.* 33'. --- src/Text/Pandoc/Biblio.hs | 61 ++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index bf1624bb4..4a8cea4da 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -36,6 +36,9 @@ import qualified Data.Map as M import Text.CSL hiding ( Cite(..), Citation(..) ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition +import Text.Pandoc.Shared (stringify) +import Text.ParserCombinators.Parsec +import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style, using 'citeproc' from citeproc-hs. @@ -152,7 +155,7 @@ setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} toCslCite :: Citation -> CSL.Cite toCslCite c = let (l, s) = locatorWords $ citationSuffix c - (la,lo) = parseLocator $ unwords l + (la,lo) = parseLocator l citMode = case citationMode c of AuthorInText -> (True, False) SuppressAuthor -> (False,True ) @@ -168,30 +171,34 @@ toCslCite c , CSL.citeHash = citationHash c } -locatorWords :: [Inline] -> ([String], [Inline]) -locatorWords (Space : t) = locatorWords t -locatorWords (Str "" : t) = locatorWords t -locatorWords a@(Str (',' : s) : t) - = if ws /= [] then (ws, t') else ([], a) - where - (ws, t') = locatorWords (Str s:t) -locatorWords i - = if any isDigit w then (w':ws, s'') else ([], i) - where - (w, s') = locatorWord i - (ws, s'') = locatorWords s' - w' = if ws == [] then w else w ++ "," - -locatorWord :: [Inline] -> (String, [Inline]) -locatorWord (Space : r) = (" " ++ ts, r') - where - (ts, r') = locatorWord r -locatorWord (Str t : r) - | t' /= "" = (w , Str t' : r) - | otherwise = (t ++ ts, r' ) - where - w = takeWhile (/= ',') t - t' = dropWhile (/= ',') t - (ts, r') = locatorWord r -locatorWord i = ("", i) +locatorWords :: [Inline] -> (String, [Inline]) +locatorWords inp = + case parse (liftM2 (,) pLocator getInput) "suffix" inp of + Right r -> r + Left _ -> ("",inp) + +pMatch :: (Inline -> Bool) -> GenParser Inline st Inline +pMatch condition = try $ do + t <- anyToken + guard $ condition t + return t + +pSpace :: GenParser Inline st Inline +pSpace = pMatch (== Space) + +pLocator :: GenParser Inline st String +pLocator = try $ do + optional $ pMatch (== Str ",") + optional pSpace + f <- many1 (notFollowedBy pSpace >> anyToken) + gs <- many1 pWordWithDigits + return $ stringify f ++ (' ' : unwords gs) + +pWordWithDigits :: GenParser Inline st String +pWordWithDigits = try $ do + pSpace + r <- many1 (notFollowedBy pSpace >> anyToken) + let s = stringify r + guard $ any isDigit s + return s -- cgit v1.2.3 From 219853b05e37be8cda8527eff80ec4f505203c5d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Nov 2010 11:28:11 -0800 Subject: Added procOpts parameter to citeproc call. --- src/Text/Pandoc/Biblio.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 4a8cea4da..717084ca7 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -54,7 +54,8 @@ processBiblio cslfile r p needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt p' else (,) [] $ queryWith getCitation p' - result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) + result = citeproc procOpts csl r (setNearNote csl $ + map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' csl) (bibliography result) Pandoc m b = processWith (procInlines $ processCite csl cits_map) p' -- cgit v1.2.3 From eac4abe36f705b852726eeceee928eb9ead6ceb0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Nov 2010 11:28:28 -0800 Subject: Biblio: If locator ends with ",", add it to the suffix. --- src/Text/Pandoc/Biblio.hs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 717084ca7..efaafd77d 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -174,10 +174,18 @@ toCslCite c locatorWords :: [Inline] -> (String, [Inline]) locatorWords inp = - case parse (liftM2 (,) pLocator getInput) "suffix" inp of + case parse pLocatorWords "suffix" inp of Right r -> r Left _ -> ("",inp) +pLocatorWords :: GenParser Inline st (String, [Inline]) +pLocatorWords = do + l <- pLocator + s <- getInput -- rest is suffix + if length l > 0 && last l == ',' + then return (init l, Str "," : s) + else return (l, s) + pMatch :: (Inline -> Bool) -> GenParser Inline st Inline pMatch condition = try $ do t <- anyToken -- cgit v1.2.3 From b10e82c9fa9a2284a3eb9ff3bc1334ed15d45a26 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Nov 2010 07:55:33 -0800 Subject: Fixed spacing bug for reference-style citations. --- src/Text/Pandoc/Biblio.hs | 5 ++++- tests/markdown-citations.ieee.html | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index efaafd77d..0241b2d6d 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -71,7 +71,10 @@ processCite s cs (i:is) addNt t x = if null x then [] else [Cite t $ renderPandoc s x] process t = case M.lookup t cs of Just x -> if isTextualCitation t && x /= [] - then renderPandoc s [head x] ++ [Space] ++ addNt t (tail x) + then renderPandoc s [head x] ++ + if tail x /= [] + then Space : addNt t (tail x) + else [] else [Cite t $ renderPandoc s x] Nothing -> [Str ("Error processing " ++ show t)] diff --git a/tests/markdown-citations.ieee.html b/tests/markdown-citations.ieee.html index bf8955940..d25a60ca0 100644 --- a/tests/markdown-citations.ieee.html +++ b/tests/markdown-citations.ieee.html @@ -53,4 +53,4 @@ > +> \ No newline at end of file -- cgit v1.2.3