From 9328f4cd3d5d5b96e7783b419214bd8599c17ebc Mon Sep 17 00:00:00 2001 From: mb21 Date: Sat, 17 Oct 2015 14:48:31 +0200 Subject: LaTeX and ConTeXt writers: support lang attribute on divs and spans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For LaTeX, also collect lang and dir attributes on spans and divs to set the lang, otherlangs and dir variables if they aren’t set already. See #895. --- src/Text/Pandoc/Writers/ConTeXt.hs | 43 +++++++++++------ src/Text/Pandoc/Writers/LaTeX.hs | 99 ++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 32 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 7d3830a60..61e62aa17 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -157,17 +157,21 @@ blockToConTeXt (CodeBlock _ str) = blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty blockToConTeXt (Div (ident,_,kvs) bs) = do - contents <- blockListToConTeXt bs - let contents' = if null ident - then contents - else ("\\reference" <> brackets (text $ toLabel ident) <> - braces empty <> "%") $$ contents - let align dir = blankline <> "\\startalignment[" <> dir <> "]" - $$ contents' $$ "\\stopalignment" <> blankline - return $ case lookup "dir" kvs of - Just "rtl" -> align "righttoleft" - Just "ltr" -> align "lefttoright" - _ -> contents' + let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + let wrapRef txt = if null ident + then txt + else ("\\reference" <> brackets (text $ toLabel ident) <> + braces empty <> "%") $$ txt + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "righttoleft" + Just "ltr" -> align "lefttoright" + _ -> id + wrapLang txt = case lookup "lang" kvs of + Just lng -> "\\start\\language[" + <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + Nothing -> txt + wrapBlank txt = blankline <> txt <> blankline + fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -346,11 +350,15 @@ inlineToConTeXt (Note contents) = do else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do - contents <- inlineListToConTeXt ils - return $ case lookup "dir" kvs of - Just "rtl" -> braces $ "\\righttoleft " <> contents - Just "ltr" -> braces $ "\\lefttoright " <> contents - _ -> contents + let wrapDir txt = case lookup "dir" kvs of + Just "rtl" -> braces $ "\\righttoleft " <> txt + Just "ltr" -> braces $ "\\lefttoright " <> txt + _ -> txt + wrapLang txt = case lookup "lang" kvs of + Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + <> "]" <> txt <> "\\stop " + Nothing -> txt + fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Attr @@ -377,6 +385,9 @@ sectionHeader (ident,classes,_) hdrLevel lst = do then char '\\' <> chapter <> braces contents else contents <> blankline +fromBcp47' :: String -> String +fromBcp47' = fromBcp47 . splitBy (=='-') + -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 770a674b7..b31497a22 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) import Data.Aeson (object, (.=)) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse ) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) import qualified Data.Text as T @@ -145,6 +145,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta + let docLangs = nub $ query (extract "lang") blocks let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -179,18 +180,48 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ + -- set lang to something so polyglossia/babel is included + defField "lang" (if null docLangs then ""::String else "en") $ + defField "otherlangs" docLangs $ + defField "dir" (if (null $ query (extract "dir") blocks) + then ""::String + else "ltr") $ metadata let toPolyObj lang = object [ "name" .= T.pack name , "options" .= T.pack opts ] where (name, opts) = toPolyglossia lang let lang = maybe [] (splitBy (=='-')) $ getField "lang" context + otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context let context' = defField "babel-lang" (toBabel lang) + $ defField "babel-otherlangs" (map toBabel otherlangs) + $ defField "babel-newcommands" (concatMap (\(poly, babel) -> + -- \textspanish and \textgalician are already used by babel + -- save them as \oritext... and let babel use that + if poly `elem` ["spanish", "galician"] + then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" + ++ poly ++ "}}\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ poly ++ "}{##2}}}\n" + else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ babel ++ "}{#2}}\n" ++ + "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{" + ++ babel ++ "}}{\\end{otherlanguage}}\n" + ) + -- eliminate duplicates that have same polyglossia name + $ nubBy (\a b -> fst a == fst b) + -- find polyglossia and babel names of languages used in the document + $ map (\l -> + let lng = splitBy (=='-') l + in (fst $ toPolyglossia lng, toBabel lng) + ) + docLangs ) $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" - (maybe [] (map $ toPolyObj . splitBy (=='-')) $ - getField "otherlangs" context) + $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of Just "rtl" -> True _ -> False) @@ -340,15 +371,24 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do then empty else "\\hyperdef{}" <> braces (text ref) <> braces ("\\label" <> braces (text ref)) - contents' <- blockListToLaTeX bs - let align dir = inCmd "begin" dir $$ contents' $$ inCmd "end" dir - let contents = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> contents' - if beamer && "notes" `elem` classes -- speaker notes - then return $ "\\note" <> braces contents - else return (linkAnchor $$ contents) + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + let wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o + then "" + else brackets $ text o + in inCmd "begin" (text l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (text l) + Nothing -> txt + wrapNotes txt = if beamer && "notes" `elem` classes + then "\\note" <> braces txt -- speaker notes + else linkAnchor $$ txt + fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure @@ -759,9 +799,12 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do (if noSmallCaps then inCmd "textnormal" else id) . (if rtl then inCmd "RL" else id) . (if ltr then inCmd "LR" else id) . - (if not (noEmph || noStrong || noSmallCaps || rtl || ltr) - then braces - else id)) `fmap` inlineListToLaTeX ils + (case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o then "" else brackets (text o) + in \c -> char '\\' <> "text" <> text l <> ops <> braces c + Nothing -> id) + ) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -1002,6 +1045,30 @@ getListingsLanguage :: [String] -> Maybe String getListingsLanguage [] = Nothing getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs +-- Extract a key from divs and spans +extract :: String -> Block -> [String] +extract key (Div attr _) = lookKey key attr +extract key (Plain ils) = concatMap (extractInline key) ils +extract key (Para ils) = concatMap (extractInline key) ils +extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract _ _ = [] + +-- Extract a key from spans +extractInline :: String -> Inline -> [String] +extractInline key (Span attr _) = lookKey key attr +extractInline _ _ = [] + +-- Look up a key in an attribute and give a list of its values +lookKey :: String -> Attr -> [String] +lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv l = + case toPolyglossia $ (splitBy (=='-')) l of + ("arabic", o) -> ("Arabic", o) + x -> x + -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf -- cgit v1.2.3