diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Biblio.hs | 134 | ||||
-rw-r--r-- | src/Text/Pandoc/Definition.hs | 151 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 132 |
4 files changed, 214 insertions, 207 deletions
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 436eadd68..bca24d815 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Copyright : Copyright (C) 2008 Andrea Rossato License : GNU GPL, version 2 or above - Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> + Maintainer : Andrea Rossato <andrea.rossato@unitn.it> Stability : alpha Portability : portable -} @@ -31,7 +31,10 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Control.Monad ( when ) import Data.List -import Text.CSL +import Data.Unique +import qualified Data.Map as M +import Text.CSL hiding ( Cite(..), Citation(..) ) +import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition -- | Process a 'Pandoc' document by adding citations formatted @@ -42,25 +45,124 @@ processBiblio cf r p else do when (null cf) $ error "Missing the needed citation style file" csl <- readCSLFile cf - let groups = queryWith getCite p - result = citeproc csl r groups - cits_map = zip groups (citations result) - biblioList = map (read . renderPandoc' csl) (bibliography result) - Pandoc m b = processWith (processCite csl cits_map) p - return $ Pandoc m $ b ++ biblioList + p' <- processWithM setHash p + let (nts,grps) = if styleClass csl /= "note" + then (,) [] $ queryWith getCitation p' + else let cits = queryWith getCite p' + ncits = map (queryWith getCite) $ queryWith getNote p' + needNt = cits \\ concat ncits + in (,) needNt $ getNoteCitations needNt p' + result = citeproc csl r (setNearNote csl $ map (map toCslCite) grps) + cits_map = M.fromList $ zip grps (citations result) + biblioList = map (renderPandoc' csl) (bibliography result) + Pandoc m b = processWith (procInlines $ processCite csl cits_map) p' + return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline -processCite s cs il - | Cite t _ <- il = Cite t (process t) - | otherwise = il +processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline] +processCite _ _ [] = [] +processCite s cs (i:is) + | Cite t _ <- i = process t ++ processCite s cs is + | otherwise = i : processCite s cs is where - process t = case elemIndex t (map fst cs) of - Just i -> read . renderPandoc s $ snd (cs !! i) + process t = case M.lookup t cs of + Just x -> if isTextualCitation t && x /= [] + then renderPandoc s [head x] ++ [Space] ++ + [Cite t $ renderPandoc s $ tail x] + else [Cite t $ renderPandoc s x] Nothing -> [Str ("Error processing " ++ show t)] +isTextualCitation :: [Citation] -> Bool +isTextualCitation (c:_) = citationMode c == AuthorInText +isTextualCitation _ = False + -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'queryWith'. -getCite :: Inline -> [[(String,String)]] -getCite i | Cite t _ <- i = [t] +getCitation :: Inline -> [[Citation]] +getCitation i | Cite t _ <- i = [t] + | otherwise = [] + +getNote :: Inline -> [Inline] +getNote i | Note _ <- i = [i] + | otherwise = [] + +getCite :: Inline -> [Inline] +getCite i | Cite _ _ <- i = [i] | otherwise = [] + +getNoteCitations :: [Inline] -> Pandoc -> [[Citation]] +getNoteCitations needNote + = let mvCite i = if i `elem` needNote then Note [Para [i]] else i + setNote = processWith mvCite + getCits = concat . flip (zipWith $ setCiteNoteNum) [1..] . + map (queryWith getCite) . queryWith getNote . setNote + in queryWith getCitation . getCits + +setHash :: Citation -> IO Citation +setHash (Citation i p l cm nn _) + = hashUnique `fmap` newUnique >>= return . Citation i p l cm nn + +generateNotes :: [Inline] -> Pandoc -> Pandoc +generateNotes needNote = processWith (mvCiteInNote needNote) + +procInlines :: ([Inline] -> [Inline]) -> Block -> Block +procInlines f b + | Plain inls <- b = Plain $ f inls + | Para inls <- b = Para $ f inls + | Header i inls <- b = Header i $ f inls + | otherwise = b + +mvCiteInNote :: [Inline] -> Block -> Block +mvCiteInNote is = procInlines mvCite + where + mvCite :: [Inline] -> [Inline] + mvCite inls + | x:i:xs <- inls, startWithPunct xs + , x == Space, i `elem_` is = switch i xs ++ mvCite (tailFirstInlineStr xs) + | x:i:xs <- inls + , x == Space, i `elem_` is = mvInNote i : mvCite xs + | i:xs <- inls, i `elem_` is + , startWithPunct xs = switch i xs ++ mvCite (tailFirstInlineStr xs) + | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs + | i:xs <- inls = i : mvCite xs + | otherwise = [] + elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False + switch i xs = Str (headInline xs) : mvInNote i : [] + mvInNote i + | Cite t o <- i = Note [Para [Cite t $ sanitize o]] + | otherwise = Note [Para [i ]] + sanitize i + | endWithPunct i = toCapital i + | otherwise = toCapital (i ++ [Str "."]) + + checkPt i + | Cite c o : xs <- i + , endWithPunct o, startWithPunct xs + , endWithPunct o = Cite c (initInline o) : checkPt xs + | x:xs <- i = x : checkPt xs + | otherwise = [] + checkNt = processWith $ procInlines checkPt + +setCiteNoteNum :: [Inline] -> Int -> [Inline] +setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n +setCiteNoteNum _ _ = [] + +setCitationNoteNum :: Int -> [Citation] -> [Citation] +setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} + +toCslCite :: Citation -> CSL.Cite +toCslCite (Citation i p l cm nn h) + = let (la,lo) = parseLocator l + citMode = case cm of + AuthorInText -> (True, False) + SuppressAuthor -> (False,True ) + NormalCitation -> (False,False) + in emptyCite { CSL.citeId = i + , CSL.citePrefix = p + , CSL.citeLabel = la + , CSL.citeLocator = lo + , CSL.citeNoteNumber = show nn + , CSL.authorInText = fst citMode + , CSL.suppressAuthor = snd citMode + , CSL.citeHash = h + } diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs deleted file mode 100644 index fffca3b2e..000000000 --- a/src/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,151 +0,0 @@ -{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable -{- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -import Data.Generics - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Bibliographic information for the document: title, authors, date. -data Meta = Meta { docTitle :: [Inline] - , docAuthors :: [[Inline]] - , docDate :: [Inline] } - deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Alignment of a table column. -data Alignment = AlignLeft - | AlignRight - | AlignCenter - | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle - | Example - | Decimal - | LowerRoman - | UpperRoman - | LowerAlpha - | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim - | Period - | OneParen - | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Attributes: identifier, classes, key-value pairs -type Attr = (String, [String], [(String, String)]) - --- | Block element. -data Block - = Plain [Inline] -- ^ Plain text, not a paragraph - | Para [Inline] -- ^ Paragraph - | CodeBlock Attr String -- ^ Code block (literal) with attributes - | RawHtml String -- ^ Raw HTML block (literal) - | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes - -- and a list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each - -- a list of blocks) - | DefinitionList [([Inline],[[Block]])] -- ^ Definition list - -- Each list item is a pair consisting of a - -- term (a list of inlines) and one or more - -- definitions (each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) - | HorizontalRule -- ^ Horizontal rule - | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table, - -- with caption, column alignments, - -- relative column widths (0 = default), - -- column headers (each a list of blocks), and - -- rows (each a list of lists of blocks) - | Null -- ^ Nothing - deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Link target (URL, title). -type Target = (String, String) - --- | Type of math element (display or inline). -data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Inline elements. -data Inline - = Str String -- ^ Text (string) - | Emph [Inline] -- ^ Emphasized text (list of inlines) - | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) - | Strikeout [Inline] -- ^ Strikeout text (list of inlines) - | Superscript [Inline] -- ^ Superscripted text (list of inlines) - | Subscript [Inline] -- ^ Subscripted text (list of inlines) - | SmallCaps [Inline] -- ^ Small caps text (list of inlines) - | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Cite [Target] [Inline] -- ^ Citation (list of inlines) - | Code String -- ^ Inline code (literal) - | Space -- ^ Inter-word space - | EmDash -- ^ Em dash - | EnDash -- ^ En dash - | Apostrophe -- ^ Apostrophe - | Ellipses -- ^ Ellipses - | LineBreak -- ^ Hard line break - | Math MathType String -- ^ TeX math (literal) - | TeX String -- ^ LaTeX code (literal) - | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target - | Image [Inline] Target -- ^ Image: alt text (list of inlines), target - -- and target - | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Applies a transformation on @a@s to matching elements in a @b@. -processWith :: (Data a, Data b) => (a -> a) -> b -> b -processWith f = everywhere (mkT f) - --- | Like 'processWith', but with monadic transformations. -processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b -processWithM f = everywhereM (mkM f) - --- | Runs a query on matching @a@ elements in a @c@. -queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b] -queryWith f = everything (++) ([] `mkQ` f) - -{-# DEPRECATED processPandoc "Use processWith instead" #-} -processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc -processPandoc = processWith - -{-# DEPRECATED queryPandoc "Use queryWith instead" #-} -queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] -queryPandoc = queryWith - diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index dce99fd75..47e97c7cc 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -586,9 +586,7 @@ data ParserState = ParserState stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC stateCitations :: [String], -- ^ List of available citations -#endif stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ Parse bibliographic info? @@ -616,9 +614,7 @@ defaultParserState = stateQuoteContext = NoQuote, stateSanitizeHTML = False, stateKeys = M.empty, -#ifdef _CITEPROC stateCitations = [], -#endif stateNotes = [], stateTabStop = 4, stateStandalone = False, diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b655ea1a9..eb9646df2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -373,6 +373,7 @@ attributes = try $ do attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr + identifier :: GenParser Char st [Char] identifier = do first <- letter @@ -912,9 +913,7 @@ inlineParsers = [ str , note , inlineNote , link -#ifdef _CITEPROC - , inlineCitation -#endif + , cite , image , math , strikeout @@ -1305,38 +1304,99 @@ rawHtmlInline' = do else choice [htmlComment, anyHtmlInlineTag] return $ HtmlInline result -#ifdef _CITEPROC -inlineCitation :: GenParser Char ParserState Inline -inlineCitation = try $ do +-- Citations + +cite :: GenParser Char ParserState Inline +cite = do failIfStrict - cit <- citeMarker - let citations = readWith parseCitation defaultParserState cit - mr <- mapM chkCit citations - if catMaybes mr /= [] - then return $ Cite citations [] - else fail "no citation found" - -chkCit :: Target -> GenParser Char ParserState (Maybe Target) -chkCit t = do + textualCite <|> normalCite + +spnl :: GenParser Char st () +spnl = try $ skipSpaces >> optional newline >> skipSpaces >> + notFollowedBy (char '\n') + +textualCite :: GenParser Char ParserState Inline +textualCite = try $ do + key <- citeKey st <- getState - case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of - Just _ -> fail "This is a link" - Nothing -> if elem (fst t) $ stateCitations st - then return $ Just t - else return $ Nothing - -citeMarker :: GenParser Char ParserState String -citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') - -parseCitation :: GenParser Char ParserState [(String,String)] -parseCitation = try $ sepBy (parseLabel) (oneOf ";") - -parseLabel :: GenParser Char ParserState (String,String) -parseLabel = try $ do - res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@") - case res of - [lab,loc] -> return (lab, loc) - [lab] -> return (lab, "" ) - _ -> return ("" , "" ) - -#endif + unless (key `elem` stateCitations st) $ + fail "not a citation" + let first = Citation{ citationId = key + , citationPrefix = "" + , citationLocator = "" + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + option (Cite [first] []) $ try $ do + spnl + char '[' + spnl + bareloc <- option "" locator + rest <- many $ try $ do + char ';' + spnl + citation + spnl + char ']' + let first' = if null bareloc + then first + else first{ citationLocator = bareloc + , citationMode = AuthorInText } + return $ Cite (first' : rest) [] + +normalCite :: GenParser Char ParserState Inline +normalCite = try $ do + cites <- citeList + return $ Cite cites [] + +citeKey :: GenParser Char st String +citeKey = try $ do + char '@' + first <- letter + rest <- many $ noneOf ",;]@ \t\n" + return (first:rest) + +locator :: GenParser Char st String +locator = try $ do + optional $ char ',' + spnl + -- TODO should eventually be list of inlines + many1 $ (char '\\' >> oneOf "];\n") <|> noneOf "];\n" <|> + (char '\n' >> notFollowedBy blankline >> return ' ') + +prefix :: GenParser Char st String +prefix = try $ liftM removeLeadingTrailingSpace $ + many $ (char '\\' >> anyChar) <|> noneOf "@]\n" <|> + (char '-' >> notFollowedBy (char '@') >> return '-') <|> + (char '\n' >> notFollowedBy blankline >> return ' ') + +citeList :: GenParser Char st [Citation] +citeList = try $ do + char '[' + spnl + first <- citation + spnl + rest <- many $ try $ do + char ';' + spnl + citation + spnl + char ']' + return (first:rest) + +citation :: GenParser Char st Citation +citation = try $ do + suppress_auth <- option False (char '-' >> return True) + pref <- prefix + key <- citeKey + loc <- locator + return $ Citation{ citationId = key + , citationPrefix = pref + , citationLocator = loc + , citationMode = if suppress_auth + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } |