From ea479bf28a4031f408af12ea92d3e19f9a838820 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 4 Jan 2021 14:05:03 -0800
Subject: LaTeX reader: handle filecontents environment.

Closes #7003.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..e92ed387c 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -152,6 +152,7 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
+                            , sFileContents  :: M.Map Text Text
                             }
      deriving Show
 
@@ -177,6 +178,7 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sHasChapters   = False
                               , sToggles       = M.empty
                               , sExpanded      = False
+                              , sFileContents  = M.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
-- 
cgit v1.2.3


From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 8 Jan 2021 18:38:20 +0100
Subject: Update copyright notices for 2021 (#7012)

---
 COPYRIGHT                                       | 26 ++++++++++++-------------
 MANUAL.txt                                      |  2 +-
 README.md                                       |  2 +-
 README.template                                 |  2 +-
 app/pandoc.hs                                   |  2 +-
 benchmark/benchmark-pandoc.hs                   |  2 +-
 benchmark/weigh-pandoc.hs                       |  2 +-
 pandoc.cabal                                    |  2 +-
 src/Text/Pandoc.hs                              |  2 +-
 src/Text/Pandoc/App.hs                          |  2 +-
 src/Text/Pandoc/App/CommandLineOptions.hs       |  4 ++--
 src/Text/Pandoc/App/FormatHeuristics.hs         |  2 +-
 src/Text/Pandoc/App/Opt.hs                      |  2 +-
 src/Text/Pandoc/App/OutputSettings.hs           |  2 +-
 src/Text/Pandoc/Asciify.hs                      |  2 +-
 src/Text/Pandoc/BCP47.hs                        |  2 +-
 src/Text/Pandoc/CSS.hs                          |  2 +-
 src/Text/Pandoc/CSV.hs                          |  2 +-
 src/Text/Pandoc/Data.hs                         |  2 +-
 src/Text/Pandoc/Error.hs                        |  2 +-
 src/Text/Pandoc/Extensions.hs                   |  2 +-
 src/Text/Pandoc/Filter.hs                       |  2 +-
 src/Text/Pandoc/Filter/JSON.hs                  |  2 +-
 src/Text/Pandoc/Filter/Lua.hs                   |  2 +-
 src/Text/Pandoc/Filter/Path.hs                  |  2 +-
 src/Text/Pandoc/Highlighting.hs                 |  2 +-
 src/Text/Pandoc/Image.hs                        |  2 +-
 src/Text/Pandoc/ImageSize.hs                    |  2 +-
 src/Text/Pandoc/Logging.hs                      |  2 +-
 src/Text/Pandoc/Lua.hs                          |  2 +-
 src/Text/Pandoc/Lua/ErrorConversion.hs          |  2 +-
 src/Text/Pandoc/Lua/Filter.hs                   |  4 ++--
 src/Text/Pandoc/Lua/Global.hs                   |  2 +-
 src/Text/Pandoc/Lua/Init.hs                     |  2 +-
 src/Text/Pandoc/Lua/Marshaling.hs               |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AST.hs           |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AnyValue.hs      |  2 +-
 src/Text/Pandoc/Lua/Marshaling/CommonState.hs   |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/Context.hs       |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/List.hs          |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/MediaBag.hs      |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/PandocError.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/Version.hs       |  2 +-
 src/Text/Pandoc/Lua/Module/MediaBag.hs          |  2 +-
 src/Text/Pandoc/Lua/Module/Pandoc.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/System.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/Types.hs             |  2 +-
 src/Text/Pandoc/Lua/Module/Utils.hs             |  2 +-
 src/Text/Pandoc/Lua/Packages.hs                 |  2 +-
 src/Text/Pandoc/Lua/PandocLua.hs                |  2 +-
 src/Text/Pandoc/Lua/Util.hs                     |  4 ++--
 src/Text/Pandoc/Lua/Walk.hs                     |  4 ++--
 src/Text/Pandoc/MIME.hs                         |  2 +-
 src/Text/Pandoc/MediaBag.hs                     |  2 +-
 src/Text/Pandoc/Options.hs                      |  2 +-
 src/Text/Pandoc/PDF.hs                          |  2 +-
 src/Text/Pandoc/Parsing.hs                      |  2 +-
 src/Text/Pandoc/Process.hs                      |  2 +-
 src/Text/Pandoc/Readers.hs                      |  2 +-
 src/Text/Pandoc/Readers/BibTeX.hs               |  2 +-
 src/Text/Pandoc/Readers/CSV.hs                  |  2 +-
 src/Text/Pandoc/Readers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Readers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Readers/DocBook.hs              |  2 +-
 src/Text/Pandoc/Readers/Docx/Combine.hs         |  2 +-
 src/Text/Pandoc/Readers/Docx/Util.hs            |  2 +-
 src/Text/Pandoc/Readers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Readers/HTML/Parsing.hs         |  2 +-
 src/Text/Pandoc/Readers/HTML/Table.hs           |  4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs   |  2 +-
 src/Text/Pandoc/Readers/HTML/Types.hs           |  2 +-
 src/Text/Pandoc/Readers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Readers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs           |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs          |  2 +-
 src/Text/Pandoc/Readers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Readers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Readers/Metadata.hs             |  2 +-
 src/Text/Pandoc/Readers/Native.hs               |  2 +-
 src/Text/Pandoc/Readers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Readers/Org.hs                  |  2 +-
 src/Text/Pandoc/Readers/Org/BlockStarts.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Blocks.hs           |  2 +-
 src/Text/Pandoc/Readers/Org/DocumentTree.hs     |  2 +-
 src/Text/Pandoc/Readers/Org/ExportSettings.hs   |  2 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Meta.hs             |  2 +-
 src/Text/Pandoc/Readers/Org/ParserState.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Shared.hs           |  2 +-
 src/Text/Pandoc/Readers/RST.hs                  |  2 +-
 src/Text/Pandoc/Readers/Textile.hs              |  2 +-
 src/Text/Pandoc/RoffChar.hs                     |  2 +-
 src/Text/Pandoc/SelfContained.hs                |  2 +-
 src/Text/Pandoc/Shared.hs                       |  2 +-
 src/Text/Pandoc/Slides.hs                       |  2 +-
 src/Text/Pandoc/Templates.hs                    |  2 +-
 src/Text/Pandoc/Translations.hs                 |  2 +-
 src/Text/Pandoc/UTF8.hs                         |  2 +-
 src/Text/Pandoc/UUID.hs                         |  2 +-
 src/Text/Pandoc/Writers.hs                      |  2 +-
 src/Text/Pandoc/Writers/AsciiDoc.hs             |  2 +-
 src/Text/Pandoc/Writers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs              |  2 +-
 src/Text/Pandoc/Writers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Writers/Custom.hs               |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs              |  2 +-
 src/Text/Pandoc/Writers/Docx.hs                 |  2 +-
 src/Text/Pandoc/Writers/Docx/StyleMap.hs        |  2 +-
 src/Text/Pandoc/Writers/DokuWiki.hs             |  2 +-
 src/Text/Pandoc/Writers/EPUB.hs                 |  2 +-
 src/Text/Pandoc/Writers/FB2.hs                  |  2 +-
 src/Text/Pandoc/Writers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Writers/JATS.hs                 |  2 +-
 src/Text/Pandoc/Writers/JATS/Table.hs           |  2 +-
 src/Text/Pandoc/Writers/JATS/Types.hs           |  2 +-
 src/Text/Pandoc/Writers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Caption.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Notes.hs          |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Table.hs          |  2 +-
 src/Text/Pandoc/Writers/Man.hs                  |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Writers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Writers/Ms.hs                   |  2 +-
 src/Text/Pandoc/Writers/Native.hs               |  2 +-
 src/Text/Pandoc/Writers/ODT.hs                  |  2 +-
 src/Text/Pandoc/Writers/OOXML.hs                |  2 +-
 src/Text/Pandoc/Writers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Org.hs                  |  4 ++--
 src/Text/Pandoc/Writers/RST.hs                  |  2 +-
 src/Text/Pandoc/Writers/RTF.hs                  |  2 +-
 src/Text/Pandoc/Writers/Roff.hs                 |  2 +-
 src/Text/Pandoc/Writers/Shared.hs               |  2 +-
 src/Text/Pandoc/Writers/TEI.hs                  |  2 +-
 src/Text/Pandoc/Writers/Texinfo.hs              |  2 +-
 src/Text/Pandoc/Writers/Textile.hs              |  2 +-
 src/Text/Pandoc/Writers/ZimWiki.hs              |  2 +-
 src/Text/Pandoc/XML.hs                          |  2 +-
 test/Tests/Command.hs                           |  2 +-
 test/Tests/Helpers.hs                           |  2 +-
 test/Tests/Lua.hs                               |  2 +-
 test/Tests/Lua/Module.hs                        |  2 +-
 test/Tests/Old.hs                               |  2 +-
 test/Tests/Readers/Creole.hs                    |  2 +-
 test/Tests/Readers/EPUB.hs                      |  2 +-
 test/Tests/Readers/HTML.hs                      |  2 +-
 test/Tests/Readers/Jira.hs                      |  2 +-
 test/Tests/Readers/LaTeX.hs                     |  2 +-
 test/Tests/Readers/Man.hs                       |  2 +-
 test/Tests/Readers/Markdown.hs                  |  2 +-
 test/Tests/Readers/Odt.hs                       |  2 +-
 test/Tests/Readers/Org.hs                       |  2 +-
 test/Tests/Readers/Org/Block.hs                 |  2 +-
 test/Tests/Readers/Org/Block/CodeBlock.hs       |  2 +-
 test/Tests/Readers/Org/Block/Figure.hs          |  2 +-
 test/Tests/Readers/Org/Block/Header.hs          |  2 +-
 test/Tests/Readers/Org/Block/List.hs            |  2 +-
 test/Tests/Readers/Org/Block/Table.hs           |  2 +-
 test/Tests/Readers/Org/Directive.hs             |  2 +-
 test/Tests/Readers/Org/Inline.hs                |  2 +-
 test/Tests/Readers/Org/Inline/Citation.hs       |  2 +-
 test/Tests/Readers/Org/Inline/Note.hs           |  2 +-
 test/Tests/Readers/Org/Inline/Smart.hs          |  2 +-
 test/Tests/Readers/Org/Meta.hs                  |  2 +-
 test/Tests/Readers/Org/Shared.hs                |  2 +-
 test/Tests/Readers/RST.hs                       |  2 +-
 test/Tests/Readers/Txt2Tags.hs                  |  2 +-
 test/Tests/Shared.hs                            |  2 +-
 trypandoc/trypandoc.hs                          |  2 +-
 175 files changed, 200 insertions(+), 200 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/COPYRIGHT b/COPYRIGHT
index eb916cc54..9992e5680 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,5 +1,5 @@
 Pandoc
-Copyright (C) 2006-2020 John MacFarlane <jgm at berkeley dot edu>
+Copyright (C) 2006-2021 John MacFarlane <jgm at berkeley dot edu>
 
 With the exceptions noted below, this code is released under the [GPL],
 version 2 or later:
@@ -37,7 +37,7 @@ The modules in the `pandoc-types` repository (Text.Pandoc.Definition,
 Text.Pandoc.Builder, Text.Pandoc.Generics, Text.Pandoc.JSON,
 Text.Pandoc.Walk) are licensed under the BSD 3-clause license:
 
-Copyright (c) 2006-2020, John MacFarlane
+Copyright (c) 2006-2021, John MacFarlane
 
 All rights reserved.
 
@@ -73,7 +73,7 @@ Pandoc's templates (in `data/templates`) are dual-licensed as either
 GPL (v2 or higher, same as pandoc) or (at your option) the BSD
 3-clause license.
 
-Copyright (c) 2014--2020, John MacFarlane
+Copyright (c) 2014--2021, John MacFarlane
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Muse.hs
@@ -83,19 +83,19 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Texinfo.hs
-Copyright (C) 2008-2020 John MacFarlane and Peter Wang
+Copyright (C) 2008-2021 John MacFarlane and Peter Wang
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/OpenDocument.hs
-Copyright (C) 2008-2020 Andrea Rossato and John MacFarlane
+Copyright (C) 2008-2021 Andrea Rossato and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Org.hs
-Copyright (C) 2010-2020 Puneeth Chaganti, John MacFarlane, and
+Copyright (C) 2010-2021 Puneeth Chaganti, John MacFarlane, and
                         Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
@@ -115,7 +115,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/Textile.hs
-Copyright (C) 2010-2020 Paul Rivier and John MacFarlane
+Copyright (C) 2010-2021 Paul Rivier and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -133,7 +133,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/EPUB.hs
-Copyright (C) 2014-2020 Matthew Pickering and John MacFarlane
+Copyright (C) 2014-2021 Matthew Pickering and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -141,7 +141,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Org.hs
 src/Text/Pandoc/Readers/Org/*
 test/Tests/Readers/Org/*
-Copyright (C) 2014-2020 Albert Krewinkel
+Copyright (C) 2014-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -149,7 +149,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Lua.hs
 src/Text/Pandoc/Lua/*
 test/lua/*
-Copyright (C) 2017--2020 Albert Krewinkel and John MacFarlane
+Copyright (C) 2017--2021 Albert Krewinkel and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -157,7 +157,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Jira.hs
 src/Text/Pandoc/Writers/Jira.hs
 test/Tests/Readers/Jira.hs
-Copyright (C) 2019--2020 Albert Krewinkel
+Copyright (C) 2019--2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -169,7 +169,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 data/pandoc.lua
-Copyright (C) 2017-2020 Albert Krewinkel
+Copyright (C) 2017-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -183,7 +183,7 @@ Released under the Do What the Fuck You Want To Public License.
 ------------------------------------------------------------------------
 Pandoc embeds a lua interpreter (via hslua).
 
-Copyright © 1994–2019 Lua.org, PUC-Rio.
+Copyright © 1994–2020 Lua.org, PUC-Rio.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/MANUAL.txt b/MANUAL.txt
index 831ffb0b0..aa75a32a9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -6329,7 +6329,7 @@ application, here are some things to keep in mind:
 
 # Authors
 
-Copyright 2006--2020 John MacFarlane (jgm@berkeley.edu). Released
+Copyright 2006--2021 John MacFarlane (jgm@berkeley.edu). Released
 under the [GPL], version 2 or greater.  This software carries no
 warranty of any kind.  (See COPYRIGHT for full copyright and
 warranty notices.) For a full list of contributors, see the file
diff --git a/README.md b/README.md
index aea55b0c4..9317438fc 100644
--- a/README.md
+++ b/README.md
@@ -236,7 +236,7 @@ opening a new issue.
 
 ## License
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL](http://www.gnu.org/copyleft/gpl.html "GNU General Public License"),
 version 2 or greater. This software carries no warranty of any kind.
 (See COPYRIGHT for full copyright and warranty notices.)
diff --git a/README.template b/README.template
index 5eeafb9e3..53d14b584 100644
--- a/README.template
+++ b/README.template
@@ -81,7 +81,7 @@ new issue.
 License
 -------
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL], version 2 or greater.  This software carries no warranty of
 any kind.  (See COPYRIGHT for full copyright and warranty notices.)
 
diff --git a/app/pandoc.hs b/app/pandoc.hs
index 9ed3b9e9f..162570f18 100644
--- a/app/pandoc.hs
+++ b/app/pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Main
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs
index 3e7b663b4..b3e67bb14 100644
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs
index 4ba6feb03..ad4c83ad7 100644
--- a/benchmark/weigh-pandoc.hs
+++ b/benchmark/weigh-pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2016-2019 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2016-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/pandoc.cabal b/pandoc.cabal
index cce3c1a58..bb68ea402 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -4,7 +4,7 @@ version:         2.11.3.2
 build-type:      Simple
 license:         GPL-2.0-or-later
 license-file:    COPYING.md
-copyright:       (c) 2006-2020 John MacFarlane
+copyright:       (c) 2006-2021 John MacFarlane
 author:          John MacFarlane <jgm@berkeley.edu>
 maintainer:      John MacFarlane <jgm@berkeley.edu>
 bug-reports:     https://github.com/jgm/pandoc/issues
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 549aeddfb..f09dfd8c7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index e6d5c93d4..725c76424 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 21ee47b7b..307f28b5c 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.CommandLineOptions
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -968,7 +968,7 @@ usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
 
 copyrightMessage :: String
 copyrightMessage = intercalate "\n" [
- "Copyright (C) 2006-2020 John MacFarlane. Web:  https://pandoc.org",
+ "Copyright (C) 2006-2021 John MacFarlane. Web:  https://pandoc.org",
  "This is free software; see the source for copying conditions. There is no",
  "warranty, not even for merchantability or fitness for a particular purpose." ]
 
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 155b7e586..17ed30fe9 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.App.FormatHeuristics
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 6dd19758e..0d96ab67c 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.Opt
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 139b408cb..53c7d82ef 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE TupleSections       #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 9e9cc8d9b..01a7b624a 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Asciify
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index b41e93125..69824aa57 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
index d98c85147..625feadbb 100644
--- a/src/Text/Pandoc/CSS.hs
+++ b/src/Text/Pandoc/CSS.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Text.Pandoc.CSS
-Copyright   : © 2006-2020 John MacFarlane <jgm@berkeley.edu>,
+Copyright   : © 2006-2021 John MacFarlane <jgm@berkeley.edu>,
                 2015-2016 Mauro Bieg,
                 2015      Ophir Lifshitz <hangfromthefloor@gmail.com>
 License     : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 10812644f..ec212fa9a 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.CSV
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs
index 38682b9f9..55ed3f5bf 100644
--- a/src/Text/Pandoc/Data.hs
+++ b/src/Text/Pandoc/Data.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE TemplateHaskell #-}
 {- |
 Module      : Text.Pandoc.Data
-Copyright   : Copyright (C) 2013-2020 John MacFarlane
+Copyright   : Copyright (C) 2013-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 2c311bb49..204cf15ca 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Error
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 9865f897b..39c2a0489 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Extensions
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
index 6d4846f98..1209ceeb7 100644
--- a/src/Text/Pandoc/Filter.hs
+++ b/src/Text/Pandoc/Filter.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs
index 83ec9a97c..d2323fac4 100644
--- a/src/Text/Pandoc/Filter/JSON.hs
+++ b/src/Text/Pandoc/Filter/JSON.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs
index a76c8da2f..c238e53d9 100644
--- a/src/Text/Pandoc/Filter/Lua.hs
+++ b/src/Text/Pandoc/Filter/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Lua
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs
index 9e5e8fa77..1d16c53b9 100644
--- a/src/Text/Pandoc/Filter/Path.hs
+++ b/src/Text/Pandoc/Filter/Path.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Path
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index ce8880f84..0bb6ed319 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Highlighting
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs
index e37de4e00..e0c938938 100644
--- a/src/Text/Pandoc/Image.hs
+++ b/src/Text/Pandoc/Image.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
 {- |
 Module      : Text.Pandoc.Image
-Copyright   : Copyright (C) 2020 John MacFarlane
+Copyright   : Copyright (C) 2020-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9ce5c668d..098c16721 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -3,7 +3,7 @@
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {- |
 Module      : Text.Pandoc.ImageSize
-Copyright   : Copyright (C) 2011-2020 John MacFarlane
+Copyright   : Copyright (C) 2011-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 825fdaadb..3a3acedc7 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Logging
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index 39db0074a..f0e9e076b 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017–2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs
index 59c962723..4e6880722 100644
--- a/src/Text/Pandoc/Lua/ErrorConversion.hs
+++ b/src/Text/Pandoc/Lua/ErrorConversion.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.ErrorConversion
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 94d7adeb2..bffe01a34 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -1,8 +1,8 @@
 {-# LANGUAGE FlexibleContexts  #-}
 {- |
 Module      : Text.Pandoc.Lua.Filter
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs
index 4285be662..29b788f04 100644
--- a/src/Text/Pandoc/Lua/Global.hs
+++ b/src/Text/Pandoc/Lua/Global.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index e89e9d6e0..0a5ce85cb 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs
index 1254402b6..f517c7c27 100644
--- a/src/Text/Pandoc/Lua/Marshaling.hs
+++ b/src/Text/Pandoc/Lua/Marshaling.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index c889618c4..6485da661 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -2,8 +2,8 @@
 {-# LANGUAGE LambdaCase           #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AST
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
index c4720aedf..82e26b963 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AnyValue
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
index 636650af3..147197c5d 100644
--- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings    #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.CommonState
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs
index c0e7aef60..606bdcfb2 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Context.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs
@@ -1,8 +1,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Context
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs
index e6614400d..0446302a1 100644
--- a/src/Text/Pandoc/Lua/Marshaling/List.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/List.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE UndecidableInstances #-}
 {- |
 Module      : Text.Pandoc.Lua.Marshaling.List
-Copyright   : © 2012-2020 John MacFarlane
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
index 2cf5b8893..70bd010a0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.MediaBag
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
index 74537a1dd..f698704e0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.PandocError
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
index 2e45affe4..dd7bf2e61 100644
--- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
@@ -4,8 +4,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.ReaderOptions
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
index 98fa1efa4..6d43039fa 100644
--- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.SimpleTable
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs
index 9adb1b763..4f4ffac51 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Version.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Version.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Version
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index e5a10217a..715e53885 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.MediaBag
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 3886568b7..a9ce3866d 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Pandoc
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs
index 04508e461..bd35babaf 100644
--- a/src/Text/Pandoc/Lua/Module/System.hs
+++ b/src/Text/Pandoc/Lua/Module/System.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.System
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs
index 999f2e588..bb4f02c3c 100644
--- a/src/Text/Pandoc/Lua/Module/Types.hs
+++ b/src/Text/Pandoc/Lua/Module/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.Types
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 7595b9c0f..1b04021a7 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Utils
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 4c3b9d79d..d62fb725d 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Packages
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs
index 6c3b410dd..4beac22b7 100644
--- a/src/Text/Pandoc/Lua/PandocLua.hs
+++ b/src/Text/Pandoc/Lua/PandocLua.hs
@@ -9,7 +9,7 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.PandocLua
-   Copyright   : Copyright © 2020 Albert Krewinkel
+   Copyright   : Copyright © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
index fbd013801..70a8a6d47 100644
--- a/src/Text/Pandoc/Lua/Util.hs
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Util
-   Copyright   : © 2012–2020 John MacFarlane,
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane,
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs
index 695c7b44e..d6d973496 100644
--- a/src/Text/Pandoc/Lua/Walk.hs
+++ b/src/Text/Pandoc/Lua/Walk.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
 Module      : Text.Pandoc.Lua.Walk
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 4fe25ebe1..53c5cd018 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.MIME
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 26f44cef0..3249bcdeb 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {- |
    Module      : Text.Pandoc.MediaBag
-   Copyright   : Copyright (C) 2014-2015, 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2014-2015, 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index ecd65a54d..92bda36b2 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE TemplateHaskell    #-}
 {- |
    Module      : Text.Pandoc.Options
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index c4080a227..7c0082c29 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.PDF
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index fd14341ad..4c4dd531d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -9,7 +9,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 866972e3f..b896feb7e 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Process
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 9a069f7d0..ac70f7d4c 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Readers
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..956b9f1f7 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.BibTeX
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 45f4d88d4..2958d6180 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CSV
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 9eef498e1..150a837e4 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..30bb19483 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..d3b2dd4d3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.DocBook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..bcf26c4a3 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.Combine
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2020 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..f9c9a8e26 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 8e94a0812..afc7a3e25 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index e28ebe77b..4c069e2c3 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 91639fa4c..6179ea8e7 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
-   Copyright   : © 2006-2020 John MacFarlane,
-                   2020 Albert Krewinkel
+   Copyright   : © 2006-2021 John MacFarlane,
+                   2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.TagCategories
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Types
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..70296bb6b 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..5a5d0ee1e 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 14a41a911..f49323996 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..5f634818e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Lang
-   Copyright   : Copyright (C) 2018-2020 John MacFarlane
+   Copyright   : Copyright (C) 2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index e92ed387c..12a3ba2f6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables   #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..f8c214318 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..5c3a21bb7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d712b1120..cdb746c67 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..927291776 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Metadata
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..9c8bc0374 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..5b8996025 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..afeb27a87 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.BlockStarts
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 17e3ff986..6bd046e04 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ExportSettings
-   Copyright   : © 2016–2020 Albert Krewinkel
+   Copyright   : © 2016-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b819a34a3..68c2ba5e0 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Inlines
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..6621822a2 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Meta
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ParserState
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..d33920d47 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.Org.Parsing
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Shared
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..707af905f 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..4991c6308 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Textile
    Copyright   : Copyright (C) 2010-2012 Paul Rivier
-                               2010-2020 John MacFarlane
+                               2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs
index 67e8b9cd5..d1c38204f 100644
--- a/src/Text/Pandoc/RoffChar.hs
+++ b/src/Text/Pandoc/RoffChar.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.RoffChar
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 061361aba..c9e20cad0 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4853621c8..b908a0172 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 9ea0389c9..a3e550b1f 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Slides
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 0c10b258d..e83f26329 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Templates
-   Copyright   : Copyright (C) 2009-2020 John MacFarlane
+   Copyright   : Copyright (C) 2009-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 200d756f6..0c7d7ab23 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Translations
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 4621e1765..b583dbbdb 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.UTF8
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index ca0df2d0b..12579be90 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.UUID
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 0654c2d85..49531d924 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 0a312d1d1..b4ef7c8b9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.AsciiDoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 66ded218f..8733b7149 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0a6313513..4d44842e2 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.ConTeXt
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index 08310de65..13b95586b 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 8da611b61..58c4bb5be 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Writers.Custom
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index affa0de04..a6776608d 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 65946ec88..a99e13a85 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.Docx
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
index c3c54c7e5..04868eaad 100644
--- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
@@ -2,7 +2,7 @@
 {- |
    Module : Text.Pandoc.Writers.Docx.StyleMap
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015-2019 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 90ec6824f..7df47c912 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.DokuWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Clare Macrae <clare.macrae@googlemail.com>
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index c8f5ff60d..3e3fd8fd6 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.EPUB
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 701ff3d9b..25b1f28d1 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -3,7 +3,7 @@
 {- |
 Module      : Text.Pandoc.Writers.FB2
 Copyright   : Copyright (C) 2011-2012 Sergey Astanin
-                            2012-2020 John MacFarlane
+                            2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 0b051c8f0..26df0325e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs
index d01d5a7e5..2613851c5 100644
--- a/src/Text/Pandoc/Writers/Ipynb.hs
+++ b/src/Text/Pandoc/Writers/Ipynb.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 7058a4557..e8d93b8d5 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index a4d42832d..465480f59 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS.Table
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 8162f3bc0..54ed4a8bd 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.JATS.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index 6bc048a61..c21085a4f 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE PatternGuards #-}
 {- |
    Module      : Text.Pandoc.Writers.Jira
-   Copyright   : © 2010-2020 Albert Krewinkel, John MacFarlane
+   Copyright   : © 2010-2021 Albert Krewinkel, John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2281290c0..df922e17b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
index 61ca41fb1..7b9ce186f 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Caption
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
index 216a7bfc3..f225ef0c5 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Notes.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Notes
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 8411d9f80..6f8386937 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Table
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 4eb0db042..edb70f53e 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Man
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 1b5c00468..d62727d90 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index d1912caa6..5029be69f 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.MediaWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 0fc333bc2..48395c420 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Ms
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 4d4dfca15..9c2ce805d 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Native
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index e41fb7176..05dfad5eb 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.ODT
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index ac991b594..3ac007f4e 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.OOXML
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 810a94775..8c9229fc0 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE CPP               #-}
 {- |
    Module      : Text.Pandoc.Writers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 2af93017d..43ebf1807 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -3,8 +3,8 @@
 {- |
    Module      : Text.Pandoc.Writers.Org
    Copyright   : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
-                   2010-2020 John MacFarlane <jgm@berkeley.edu>
-                   2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+                   2010-2021 John MacFarlane <jgm@berkeley.edu>
+                   2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 8beeef46a..a3be1d723 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index e3966ed07..cf27011c2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.RTF
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs
index 00b027cc9..6af56242f 100644
--- a/src/Text/Pandoc/Writers/Roff.hs
+++ b/src/Text/Pandoc/Writers/Roff.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Roff
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 129e45e9d..fc3f8ff3a 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Shared
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index a9ee5eece..b926c48a1 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE PatternGuards     #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index c6debd9ce..53da70f84 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Texinfo
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
                                2012 Peter Wang
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 61ddb7497..03d030477 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Textile
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 902b093d3..9e45f0417 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.ZimWiki
-   Copyright   : © 2008-2020 John MacFarlane,
+   Copyright   : © 2008-2021 John MacFarlane,
                    2017-2019 Alex Ivkin
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 4b71d7b69..c4e3ed1e7 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.XML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 522c4b3a1..b3e2a0509 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Command
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs
index c9ee6d206..21898d10e 100644
--- a/test/Tests/Helpers.hs
+++ b/test/Tests/Helpers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE FlexibleInstances    #-}
 {- |
    Module      : Tests.Helpers
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs
index 853375327..1dfbbd053 100644
--- a/test/Tests/Lua.hs
+++ b/test/Tests/Lua.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Lua
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs
index e2570e87f..d88633cf8 100644
--- a/test/Tests/Lua/Module.hs
+++ b/test/Tests/Lua/Module.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Tests.Lua.Module
-Copyright   : © 2019-2020 Albert Krewinkel
+Copyright   : © 2019-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 
 Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index cf0396d0a..638620a36 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Old
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs
index 15b826460..1fc0e62d7 100644
--- a/test/Tests/Readers/Creole.hs
+++ b/test/Tests/Readers/Creole.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Tests.Readers.Creole
    Copyright   : © 2017 Sascha Wilde
-                   2017-2020 John MacFarlane
+                   2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Sascha Wilde <wilde@sha-bang.de>
diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs
index 700d6723d..3c75dd08d 100644
--- a/test/Tests/Readers/EPUB.hs
+++ b/test/Tests/Readers/EPUB.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Readers.EPUB
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.eu>
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index e4c681421..578c76860 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.HTML
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index 32b8ecb7c..bf78fe1fe 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.Jira
-   Copyright   : © 2019-2020 Albert Krewinel
+   Copyright   : © 2019-2021 Albert Krewinel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index c50c91ca1..2a52ffd18 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.LaTeX
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index f591aa00d..4f3ab5a28 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Tests.Readers.Man
    Copyright   : © 2018-2019 Yan Pas <yanp.bugz@gmail.com>,
-                   2018-2020 John MacFarlane
+                   2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 3af5e2a94..18f909583 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Markdown
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index 14062c884..f5e427ba2 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Odt
-   Copyright   : © 2015-2020 John MacFarlane
+   Copyright   : © 2015-2021 John MacFarlane
                    2015 Martin Linnemann
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Readers/Org.hs b/test/Tests/Readers/Org.hs
index d4f7bb6dc..290bb603e 100644
--- a/test/Tests/Readers/Org.hs
+++ b/test/Tests/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs
index 995bd0316..2ce07c4bb 100644
--- a/test/Tests/Readers/Org/Block.hs
+++ b/test/Tests/Readers/Org/Block.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index 2648a6e1f..d40c3bc1d 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.CodeBlock
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs
index 56ddde9d8..8822f5b03 100644
--- a/test/Tests/Readers/Org/Block/Figure.hs
+++ b/test/Tests/Readers/Org/Block/Figure.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Figure
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs
index d38d26efb..887055451 100644
--- a/test/Tests/Readers/Org/Block/Header.hs
+++ b/test/Tests/Readers/Org/Block/Header.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index 15373b3b3..ac03c583b 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs
index 31c994d3f..cb38fcc12 100644
--- a/test/Tests/Readers/Org/Block/Table.hs
+++ b/test/Tests/Readers/Org/Block/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Table
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs
index 727a29658..00cb9762b 100644
--- a/test/Tests/Readers/Org/Directive.hs
+++ b/test/Tests/Readers/Org/Directive.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Directive
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index b5d0caa26..13e9fef21 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 792e4559c..87bb3ca75 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Citation
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs
index 5924e69cc..20157d2ae 100644
--- a/test/Tests/Readers/Org/Inline/Note.hs
+++ b/test/Tests/Readers/Org/Inline/Note.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Note
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs
index e9309108e..7fde380af 100644
--- a/test/Tests/Readers/Org/Inline/Smart.hs
+++ b/test/Tests/Readers/Org/Inline/Smart.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Smart
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index bc167f2a5..b30b8949a 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Meta
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs
index 184adee44..4d0848575 100644
--- a/test/Tests/Readers/Org/Shared.hs
+++ b/test/Tests/Readers/Org/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs
index bd9897ebc..68241b7f9 100644
--- a/test/Tests/Readers/RST.hs
+++ b/test/Tests/Readers/RST.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.RST
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs
index 989b7f673..62f336690 100644
--- a/test/Tests/Readers/Txt2Tags.hs
+++ b/test/Tests/Readers/Txt2Tags.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Txt2Tags
-   Copyright   : © 2014-2020 John MacFarlane,
+   Copyright   : © 2014-2021 John MacFarlane,
                  © 2014 Matthew Pickering
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs
index a23edf452..72a59fec0 100644
--- a/test/Tests/Shared.hs
+++ b/test/Tests/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs
index 9383c92ce..9a53aa18c 100644
--- a/trypandoc/trypandoc.hs
+++ b/trypandoc/trypandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2014-2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2014-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-- 
cgit v1.2.3


From d9322629a36ad50035912ee56df876c345039225 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Feb 2021 17:45:05 -0800
Subject: LaTeX reader improvements.

* Rewrote `withRaw` so it doesn't rely on fragile assumptions
  about token positions (which break when macros are expanded).
  This requires the addition of `sEnableWithRaw` and `sRawTokens`
  in `LaTeXState`, and a new combinator `disablingWithRaw` to
  disable collecting of raw tokens in certain contexts.
* Add `parseFromToks` to T.P.Readers.LaTeX.Parsing.
* Fix parsing of single character tokens so it doesn't mess
  up the new raw token collecting.
* These changes slightly increase allocations and have a small
  performance impact, but it's minor.

Closes #7092.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  6 +--
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 84 +++++++++++++++++++++++++-------
 2 files changed, 68 insertions(+), 22 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 91c71c000..0a66b7f39 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1054,7 +1054,7 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
       symbol '}'
       return res
     parser = do
-      Tok _ Word s <- satisfyTok isWordTok
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
       let (digits, rest) = T.span isDigit s
       unless (T.null rest) $
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
@@ -2208,9 +2208,7 @@ parseTableRow envname prefsufs = do
         option [] (count 1 amp)
         return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
   rawcells <- mapM celltoks prefsufs
-  oldInput <- getInput
-  cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
-  setInput oldInput
+  cells <- mapM (parseFromToks parseTableCell) rawcells
   spaces
   return $ Row nullAttr cells
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 12a3ba2f6..313aa6c51 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -32,6 +32,8 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , totoks
   , toksToString
   , satisfyTok
+  , parseFromToks
+  , disablingWithRaw
   , doMacros
   , doMacros'
   , setpos
@@ -87,13 +89,15 @@ import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -153,6 +157,8 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
+                            , sEnableWithRaw :: Bool
+                            , sRawTokens     :: IntMap.IntMap [Tok]
                             }
      deriving Show
 
@@ -179,6 +185,8 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sToggles       = M.empty
                               , sExpanded      = False
                               , sFileContents  = M.empty
+                              , sEnableWithRaw = True
+                              , sRawTokens     = IntMap.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -404,11 +412,31 @@ untoken t = untokenAccum t mempty
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+  oldInput <- getInput
+  setInput toks
+  result <- disablingWithRaw parser
+  setInput oldInput
+  return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+  oldEnableWithRaw <- sEnableWithRaw <$> getState
+  updateState $ \st -> st{ sEnableWithRaw = False }
+  result <- parser
+  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+  return result
+
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False }
+    updateState $ \st -> st{ sExpanded = False
+                           , sRawTokens =
+                              if sEnableWithRaw st
+                                 then IntMap.map (res:) $ sRawTokens st
+                                 else sRawTokens st }
     return res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
@@ -594,18 +622,22 @@ isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+  satisfyTok $ \case
+     Tok _ Word  t   -> T.length t == 1
+     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t)
+     _               -> False
+
 singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+  singleCharFromWord = do
+    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+    let (t1, t2) = (T.take 1 t, T.drop 1 t)
+    inp <- getInput
+    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+    anyTok
 
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
@@ -725,11 +757,23 @@ ignore raw = do
 
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
-  inp <- getInput
+  rawTokensMap <- sRawTokens <$> getState
+  let key = case IntMap.lookupMax rawTokensMap of
+               Nothing     -> 0
+               Just (n,_)  -> n + 1
+  -- insert empty list at key
+  updateState $ \st -> st{ sRawTokens =
+                             IntMap.insert key [] $ sRawTokens st }
   result <- parser
-  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
-  let raw = takeWhile (\(Tok pos _ _) -> maybe True
-                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+  raw <- case mbRevToks of
+           Just revtoks -> do
+             updateState $ \st -> st{ sRawTokens =
+                                        IntMap.delete key $ sRawTokens st}
+             return $ reverse revtoks
+           Nothing      ->
+             throwError $ PandocShouldNeverHappenError $
+                "sRawTokens has nothing at key " <> T.pack (show key)
   return (result, raw)
 
 keyval :: PandocMonad m => LP m (Text, Text)
@@ -794,7 +838,7 @@ getRawCommand name txt = do
   (_, rawargs) <- withRaw $
       case name of
            "write" -> do
-             void $ satisfyTok isWordTok -- digits
+             void $ many $ satisfyTok isDigitTok -- digits
              void braced
            "titleformat" -> do
              void braced
@@ -809,6 +853,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _              = False
+
 skipopts :: PandocMonad m => LP m ()
 skipopts = skipMany (void overlaySpecification <|> void rawopt)
 
-- 
cgit v1.2.3


From 0f955b10b455e9b3d326262d03261f17538a6943 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 18:57:21 -0800
Subject: T.P.Readers.LaTeX.Parsing: improve braced'.

Remove the parameter, have it parse the opening brace,
and make it more efficient.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 313aa6c51..dab4d334e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -680,28 +680,25 @@ grouped parser = try $ do
   -- {{a,b}} should be parsed the same as {a,b}
   try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
-  handleEgroup <|> handleBgroup <|> handleOther
-  where handleEgroup = do
-          t <- symbol '}'
-          if n == 1
-             then return []
-             else (t:) <$> braced' getTok (n - 1)
-        handleBgroup = do
-          t <- symbol '{'
-          (t:) <$> braced' getTok (n + 1)
-        handleOther = do
-          t <- getTok
-          (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+  go n = do
+    t <- getTok
+    case t of
+      Tok _ Symbol "}"
+        | n > 1     -> (t:) <$> go (n - 1)
+        | otherwise -> return []
+      Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+      _ -> (t:) <$> go n
 
 braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
 
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
 
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
-- 
cgit v1.2.3


From 31b8f60ea82d96b370cf4a765c46c18004ff6fa8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:03:29 -0800
Subject: LaTeX reader:  avoid macro resolution code if no macros defined.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index dab4d334e..fc8542894 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -453,20 +453,24 @@ doMacros = do
       updateState $ \st -> st{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp =
-  case inp of
-     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos name ts
-     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos ("end" <> name) ts
-     Tok _ (CtrlSeq "expandafter") _ : t : ts
-        -> combineTok t <$> doMacros' n ts
-     Tok spos (CtrlSeq name) _ : ts
-        -> handleMacros n spos name ts
-     _ -> return inp
-   <|> return inp
+doMacros' n inp = do
+  macros <- sMacros <$> getState
+  if M.null macros
+     then return inp
+     else
+       case inp of
+          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos name ts
+          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos ("end" <> name) ts
+          Tok _ (CtrlSeq "expandafter") _ : t : ts
+             -> combineTok t <$> doMacros' n ts
+          Tok spos (CtrlSeq name) _ : ts
+             -> handleMacros macros n spos name ts
+          _ -> return inp
+        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -507,10 +511,9 @@ doMacros' n inp =
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros n' spos name ts = do
+    handleMacros macros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
-      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From cec541e54cd947c8032f9148db18104cd1a48783 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:14:31 -0800
Subject: LaTeX reader: Another small improvement to macro handling.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index fc8542894..953747d2f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -461,16 +461,15 @@ doMacros' n inp = do
        case inp of
           Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts
+             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
           Tok _ (CtrlSeq "expandafter") _ : t : ts
              -> combineTok t <$> doMacros' n ts
           Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           _ -> return inp
-        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
-- 
cgit v1.2.3


From f43cb5ddcf56ab9387b24ad55c2c30eceb606fad Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 10:26:48 -0800
Subject: LaTeX reader: further performance optimization.

Avoid unnecessary 'doMacros'.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 42 +++++++++++++++-----------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 953747d2f..20311651b 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -446,30 +446,25 @@ satisfyTok f = do
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
-  expanded <- sExpanded <$> getState
-  verbatimMode <- sVerbatimMode <$> getState
-  unless (expanded || verbatimMode) $ do
-      getInput >>= doMacros' 1 >>= setInput
-      updateState $ \st -> st{ sExpanded = True }
+  st <- getState
+  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+    getInput >>= doMacros' 1 >>= setInput
+    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp = do
-  macros <- sMacros <$> getState
-  if M.null macros
-     then return inp
-     else
-       case inp of
-          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
-          Tok _ (CtrlSeq "expandafter") _ : t : ts
-             -> combineTok t <$> doMacros' n ts
-          Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          _ -> return inp
+doMacros' n inp =
+  case inp of
+     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos name ts <|> return inp
+     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos ("end" <> name) ts <|> return inp
+     Tok _ (CtrlSeq "expandafter") _ : t : ts
+        -> combineTok t <$> doMacros' n ts
+     Tok spos (CtrlSeq name) _ : ts
+        -> handleMacros n spos name ts <|> return inp
+     _ -> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -510,9 +505,10 @@ doMacros' n inp = do
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros macros n' spos name ts = do
+    handleMacros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
+      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From db4f8823152578d199691f8084fddcc0f04f679b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:23:04 -0800
Subject: LaTeX reader: removed sExpanded in state.

This isn't actually needed and checking it doesn't change
anything.

Also remove an unnecessary `doMacros` before `satisfyTok`,
which does it anyway.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 20311651b..0e098d9d9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -155,7 +155,6 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sLabels        :: M.Map Text [Inline]
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
-                            , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
                             , sEnableWithRaw :: Bool
                             , sRawTokens     :: IntMap.IntMap [Tok]
@@ -183,7 +182,6 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sLabels        = M.empty
                               , sHasChapters   = False
                               , sToggles       = M.empty
-                              , sExpanded      = False
                               , sFileContents  = M.empty
                               , sEnableWithRaw = True
                               , sRawTokens     = IntMap.empty
@@ -256,7 +254,6 @@ rawLaTeXParser toks retokenize parser valParser = do
        Right toks' -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        doMacros
                                         ts <- many (satisfyTok (const True))
                                         setInput ts
                                       rawparser)
@@ -432,8 +429,7 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False
-                           , sRawTokens =
+    updateState $ \st -> st{ sRawTokens =
                               if sEnableWithRaw st
                                  then IntMap.map (res:) $ sRawTokens st
                                  else sRawTokens st }
@@ -447,9 +443,8 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st || M.null (sMacros st)) $ do
     getInput >>= doMacros' 1 >>= setInput
-    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
 doMacros' n inp =
-- 
cgit v1.2.3


From 2b37ed9f212b711a6381dc89ff7d3431d7c5d916 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:29:38 -0800
Subject: LaTeX reader: further optimizations in satisfyTok.

Benchmarks show 2/3 of the run time and 2/3 of the allocation
of the Feb. 10 benchmarks.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 0e098d9d9..c2e10570d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -429,11 +429,11 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sRawTokens =
-                              if sEnableWithRaw st
-                                 then IntMap.map (res:) $ sRawTokens st
-                                 else sRawTokens st }
-    return res
+    updateState $ \st ->
+      if sEnableWithRaw st
+         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+         else st
+    return $! res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
-- 
cgit v1.2.3


From 925815bb33b462e1a4c19a8e2c617d403dec0ce7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 17:02:44 -0800
Subject: Split off T.P.Readers.LaTeX.Accent.

To help reduce memory demands compiling the main LaTeX reader.
---
 pandoc.cabal                            |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs        | 68 ++++------------------------
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 +++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 60 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 83c5c0120..68edb2b64 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -630,6 +630,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Parsing,
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
+                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7d8dfab0e..51c031f78 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -64,7 +65,6 @@ import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
 import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
 import Safe
 
 -- for debugging:
@@ -247,9 +247,6 @@ doxspace =
         startsWithLetter _ = False
 
 
-lit :: Text -> LP m Inlines
-lit = pure . str
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,6 +293,9 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
+lit :: Text -> LP m Inlines
+lit = pure . str
+
 enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
 enquote starred mblang = do
   skipopts
@@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [
   ]
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
+inlineCommands =
+  M.union inlineLanguageCommands $
+  M.union (accentCommands tok) $
+  M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
   , ("textsl", extractSpaces emph <$> tok)
@@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("MakeTextLowercase", makeLowercase <$> tok)
   , ("lowercase", makeLowercase <$> tok)
   , ("/", pure mempty) -- italic correction
-  , ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
                             optional opt
@@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("hyphen", pure (str "-"))
   ]
 
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
-
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
   optional opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
new file mode 100644
index 000000000..f8c53491c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
@@ -0,0 +1,78 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Accent
+  ( accentCommands )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Parsing
+import qualified Data.Text as T
+import qualified Data.Text.Normalize as Normalize
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+      lit = pure . str
+  in M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
+
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       _                 -> return ils
+
-- 
cgit v1.2.3


From 08231f5cdd16e31d38d9d6bf59bc5ca12638b438 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 21:40:56 -0800
Subject: Factor out T.P.Readers.LaTeX.Table.

---
 pandoc.cabal                             |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 368 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  33 +++
 src/Text/Pandoc/Readers/LaTeX/Table.hs   | 373 +++++++++++++++++++++++++++++++
 4 files changed, 412 insertions(+), 363 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Table.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 68edb2b64..9149c4f8f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 51c031f78..831c5df05 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE BangPatterns          #-}
 {-# LANGUAGE CPP                   #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -32,7 +31,6 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isDigit, isLetter, toUpper, chr)
 import Data.Default
-import Data.Functor (($>))
 import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
@@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -551,12 +550,8 @@ inlineCommand' = try $ do
          <|> ignore rawcommand
   lookupListDefault raw names inlineCommands
 
-
 tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
-  where singleChar' = do
-          Tok _ _ t <- singleChar
-          return $ str t
+tok = tokWith inline
 
 opt :: PandocMonad m => LP m Inlines
 opt = do
@@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-label :: PandocMonad m => LP m ()
-label = do
-  controlSeq "label"
-  t <- braced
-  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -1421,13 +1410,6 @@ bracketedNum = do
        Just i -> return i
        _      -> return 0
 
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
-  skipopts
-  ils <- tok
-  optional $ try $ spaces *> label
-  updateState $ \st -> st{ sCaption = Just ils }
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
@@ -1441,10 +1423,6 @@ epigraph = do
   p2 <- grouped block
   return $ divWith ("", ["epigraph"], []) (p1 <> p2)
 
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption   = Nothing
-                                      , sLastLabel = Nothing }
-
 section :: PandocMonad m => Attr -> Int -> LP m Blocks
 section (ident, classes, kvs) lvl = do
   skipopts
@@ -1585,7 +1563,7 @@ blockCommands = M.fromList
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-   , ("caption", mempty <$ setCaption)
+   , ("caption", mempty <$ setCaption inline)
    , ("bibliography", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1623,7 +1601,8 @@ blockCommands = M.fromList
 
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+   M.fromList
    [ ("document", env "document" blocks <* skipMany anyTok)
    , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
    , ("sloppypar", env "sloppypar" blocks)
@@ -1633,13 +1612,6 @@ environments = M.fromList
    , ("figure", env "figure" $ skipopts *> figure)
    , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
    , ("center", divWith ("", ["center"], []) <$> env "center" blocks)
-   , ("longtable",  env "longtable" $
-          resetCaption *> simpTable "longtable" False >>= addTableCaption)
-   , ("table",  env "table" $
-          skipopts *> resetCaption *> blocks >>= addTableCaption)
-   , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
-   , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
-   , ("tabular", env "tabular"  $ simpTable "tabular" False)
    , ("quote", blockQuote <$> env "quote" blocks)
    , ("quotation", blockQuote <$> env "quotation" blocks)
    , ("verse", blockQuote <$> env "verse" blocks)
@@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils]
 italicize (Plain ils) = Plain [Emph ils]
 italicize x = x
 
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
@@ -2045,333 +2014,6 @@ orderedList' = try $ do
   bs <- listenv "enumerate" (many item)
   return $ orderedListWith (start, style, delim) bs
 
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
-  spaces
-  controlSeq "hline" <|>
-    -- booktabs rules:
-    controlSeq "toprule" <|>
-    controlSeq "bottomrule" <|>
-    controlSeq "midrule" <|>
-    controlSeq "endhead" <|>
-    controlSeq "endfirsthead"
-  spaces
-  optional opt
-  return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
-         <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
-  inp <- getInput
-  case inp of
-       (Tok spos Word t : rest) ->
-         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
-       _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
-  let maybeBar = skipMany
-        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
-  let cAlign = AlignCenter <$ symbol 'c'
-  let lAlign = AlignLeft <$ symbol 'l'
-  let rAlign = AlignRight <$ symbol 'r'
-  let parAlign = AlignLeft <$ symbol 'p'
-  -- aligns from tabularx
-  let xAlign = AlignLeft <$ symbol 'X'
-  let mAlign = AlignLeft <$ symbol 'm'
-  let bAlign = AlignLeft <$ symbol 'b'
-  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
-                                 <|> xAlign <|> mAlign <|> bAlign )
-  let alignPrefix = symbol '>' >> braced
-  let alignSuffix = symbol '<' >> braced
-  let colWidth = try $ do
-        symbol '{'
-        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
-        spaces
-        symbol '}'
-        return $ safeRead ds
-  let alignSpec = do
-        pref <- option [] alignPrefix
-        spaces
-        al <- alignChar
-        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
-                                                 pos <- getPosition
-                                                 report $ SkippedContent s pos
-                                                 return Nothing)
-        spaces
-        suff <- option [] alignSuffix
-        return (al, width, (pref, suff))
-  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
-        symbol '*'
-        spaces
-        ds <- trim . untokenize <$> braced
-        spaces
-        spec <- braced
-        case safeRead ds of
-             Just n  ->
-               getInput >>= setInput . (mconcat (replicate n spec) ++)
-             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
-  bgroup
-  spaces
-  maybeBar
-  aligns' <- many $ try $ spaces >> optional starAlign >>
-                            (alignSpec <* maybeBar)
-  spaces
-  egroup
-  spaces
-  return $ map toSpec aligns'
-  where
-    toColWidth (Just w) | w > 0 = ColWidth w
-    toColWidth _                = ColWidthDefault
-    toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
-              => Text   -- ^ table environment name
-              -> [([Tok], [Tok])] -- ^ pref/suffixes
-              -> LP m Row
-parseTableRow envname prefsufs = do
-  notFollowedBy (spaces *> end_ envname)
-  -- add prefixes and suffixes in token stream:
-  let celltoks (pref, suff) = do
-        prefpos <- getPosition
-        contents <- mconcat <$>
-            many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
-                  <|>
-                   snd <$> withRaw (inlineEnvironment <|> dollarsMath)
-                  <|>
-                   (do notFollowedBy
-                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
-                       count 1 anyTok) )
-
-        suffpos <- getPosition
-        option [] (count 1 amp)
-        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
-  rawcells <- mapM celltoks prefsufs
-  cells <- mapM (parseFromToks parseTableCell) rawcells
-  spaces
-  return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
-  spaces
-  updateState $ \st -> st{ sInTableCell = True }
-  cell' <-   multicolumnCell
-         <|> multirowCell
-         <|> parseSimpleCell
-         <|> parseEmptyCell
-  updateState $ \st -> st{ sInTableCell = False }
-  spaces
-  return cell'
-  where
-    -- The parsing of empty cells is important in LaTeX, especially when dealing
-    -- with multirow/multicolumn. See #6603.
-    parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
-  where
-    alignment = do
-      c <- untoken <$> singleChar
-      return $ case c of
-        "l" -> AlignLeft
-        "r" -> AlignRight
-        "c" -> AlignCenter
-        "*" -> AlignDefault
-        _   -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
-                [Para ils] -> plain (fromList ils)
-                _          -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
-  -- Full prototype for \multirow macro is:
-  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-  -- However, everything except `nrows` and `text` make
-  -- sense in the context of the Pandoc AST
-  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
-  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
-  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
-  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
-  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
-  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
-  let singleCell = do
-        content <- plainify <$> blocks
-        return $ cell alignment (RowSpan 1) (ColSpan span') content
-
-  -- Two possible contents: either a \multirow cell, or content.
-  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-  -- Note that a \multirow cell can be nested in a \multicolumn,
-  -- but not the other way around. See #6603
-  let nestedCell = do
-        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
-        return $ cell
-                  alignment
-                  (RowSpan rs)
-                  (ColSpan span')
-                  (fromList bs)
-
-  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
-  where
-    rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
-  = TableBody attr rhc th' tb'
-  where
-    th' = fixTableRows th
-    tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
-  where
-    fixTableRows' oldHang (Row attr cells : rs)
-      = let (newHang, cells') = fixTableRow oldHang cells
-            rs'               = fixTableRows' newHang rs
-        in Row attr cells' : rs'
-    fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
-  -- If there's overhang, drop cells until their total width meets the
-  -- width of the occupied grid spaces (or we run out)
-  | (n, prefHang, restHang) <- splitHang oldHang
-  , n > 0
-  = let cells' = dropToWidth getCellW n cells
-        (restHang', cells'') = fixTableRow restHang cells'
-    in (prefHang restHang', cells'')
-  -- Otherwise record the overhang of a pending cell and fix the rest
-  -- of the row
-  | c@(Cell _ _ h w _):cells' <- cells
-  = let h' = max 1 h
-        w' = max 1 w
-        oldHang' = dropToWidth getHangW w' oldHang
-        (newHang, cells'') = fixTableRow oldHang' cells'
-    in (toHang w' h' <> newHang, c : cells'')
-  | otherwise
-  = (oldHang, [])
-  where
-    getCellW (Cell _ _ _ w _) = w
-    getHangW = maybe 1 fst
-    getCS (ColSpan n) = n
-
-    toHang c r
-      | r > 1     = [Just (c, r)]
-      | otherwise = replicate (getCS c) Nothing
-
-    -- Take the prefix of the overhang list representing filled grid
-    -- spaces. Also return the remainder and the length of this prefix.
-    splitHang = splitHang' 0 id
-
-    splitHang' !n l (Just (c, r):xs)
-      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
-    splitHang' n l xs = (n, l, xs)
-
-    -- Drop list items until the total width of the dropped items
-    -- exceeds the passed width.
-    dropToWidth _     n l | n < 1 = l
-    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
-    dropToWidth _     _ []        = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
-  when hasWidthParameter $ () <$ (spaces >> tok)
-  skipopts
-  colspecs <- parseAligns
-  let (aligns, widths, prefsufs) = unzip3 colspecs
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  skipMany hline
-  spaces
-  header' <- option [] . try . fmap (:[]) $
-             parseTableRow envname prefsufs <* lbreak <* many1 hline
-  spaces
-  rows <- sepEndBy (parseTableRow envname prefsufs)
-                    (lbreak <* optional (skipMany hline))
-  spaces
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  lookAhead $ controlSeq "end" -- make sure we're at end
-  let th  = fixTableHead $ TableHead nullAttr header'
-  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
-  let tf  = TableFoot nullAttr []
-  return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
-  where go (Table attr c spec th tb tf) = do
-          st <- getState
-          let mblabel = sLastLabel st
-          capt <- case (sCaption st, mblabel) of
-                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
-                   (Just ils, Just lab) -> do
-                     num <- getNextNumber sLastTableNum
-                     setState
-                       st{ sLastTableNum = num
-                         , sLabels = M.insert lab
-                                    [Str (renderDottedNum num)]
-                                    (sLabels st) }
-                     return $ caption Nothing (plain ils) -- add number??
-                   (Nothing, _)  -> return c
-          let attr' = case (attr, mblabel) of
-                        ((_,classes,kvs), Just ident) ->
-                           (ident,classes,kvs)
-                        _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
-        go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b       = Div attr [b]
-
 block :: PandocMonad m => LP m Blocks
 block = do
   res <- (mempty <$ spaces1)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index c2e10570d..4a9fa03ad 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , comment
   , anyTok
   , singleChar
+  , tokWith
   , specialChars
   , endline
   , blankline
@@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawopt
   , overlaySpecification
   , getNextNumber
+  , label
+  , setCaption
+  , resetCaption
+  , env
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do
                Just n  -> [n, 1]
                Nothing -> [1]
 
+label :: PandocMonad m => LP m ()
+label = do
+  controlSeq "label"
+  t <- braced
+  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+  skipopts
+  ils <- tokWith inline
+  optional $ try $ spaces *> label
+  updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption   = Nothing
+                                      , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+                                 grouped inlineParser
+                            <|> (lookAhead anyControlSeq >> inlineParser)
+                            <|> singleChar'
+  where singleChar' = do
+          Tok _ _ t <- singleChar
+          return $ str t
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..2ea9caf58
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,373 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+  ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+                  => LP m Blocks
+                  -> LP m Inlines
+                  -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+  M.fromList
+  [ ("longtable",  env "longtable" $
+          resetCaption *>
+            simpTable blocks inline "longtable" False >>= addTableCaption)
+  , ("table",  env "table" $
+          skipopts *> resetCaption *> blocks >>= addTableCaption)
+  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
+  ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+  spaces
+  controlSeq "hline" <|>
+    -- booktabs rules:
+    controlSeq "toprule" <|>
+    controlSeq "bottomrule" <|>
+    controlSeq "midrule" <|>
+    controlSeq "endhead" <|>
+    controlSeq "endfirsthead"
+  spaces
+  optional rawopt
+  return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+         <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+  inp <- getInput
+  case inp of
+       (Tok spos Word t : rest) ->
+         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+       _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+  let maybeBar = skipMany
+        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+  let cAlign = AlignCenter <$ symbol 'c'
+  let lAlign = AlignLeft <$ symbol 'l'
+  let rAlign = AlignRight <$ symbol 'r'
+  let parAlign = AlignLeft <$ symbol 'p'
+  -- aligns from tabularx
+  let xAlign = AlignLeft <$ symbol 'X'
+  let mAlign = AlignLeft <$ symbol 'm'
+  let bAlign = AlignLeft <$ symbol 'b'
+  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
+                                 <|> xAlign <|> mAlign <|> bAlign )
+  let alignPrefix = symbol '>' >> braced
+  let alignSuffix = symbol '<' >> braced
+  let colWidth = try $ do
+        symbol '{'
+        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+        spaces
+        symbol '}'
+        return $ safeRead ds
+  let alignSpec = do
+        pref <- option [] alignPrefix
+        spaces
+        al <- alignChar
+        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+                                                 pos <- getPosition
+                                                 report $ SkippedContent s pos
+                                                 return Nothing)
+        spaces
+        suff <- option [] alignSuffix
+        return (al, width, (pref, suff))
+  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+        symbol '*'
+        spaces
+        ds <- trim . untokenize <$> braced
+        spaces
+        spec <- braced
+        case safeRead ds of
+             Just n  ->
+               getInput >>= setInput . (mconcat (replicate n spec) ++)
+             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+  bgroup
+  spaces
+  maybeBar
+  aligns' <- many $ try $ spaces >> optional starAlign >>
+                            (alignSpec <* maybeBar)
+  spaces
+  egroup
+  spaces
+  return $ map toSpec aligns'
+  where
+    toColWidth (Just w) | w > 0 = ColWidth w
+    toColWidth _                = ColWidthDefault
+    toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+              => LP m Blocks -- ^ block parser
+              -> LP m Inlines -- ^ inline parser
+              -> Text   -- ^ table environment name
+              -> [([Tok], [Tok])] -- ^ pref/suffixes
+              -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+  notFollowedBy (spaces *> end_ envname)
+  -- add prefixes and suffixes in token stream:
+  let celltoks (pref, suff) = do
+        prefpos <- getPosition
+        contents <- mconcat <$>
+            many ( snd <$> withRaw
+                     ((lookAhead (controlSeq "parbox") >>
+                       void blocks) -- #5711
+                      <|>
+                      (lookAhead (controlSeq "begin") >> void inline)
+                      <|>
+                      (lookAhead (symbol '$') >> void inline))
+                  <|>
+                   (do notFollowedBy
+                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
+                       count 1 anyTok) )
+
+        suffpos <- getPosition
+        option [] (count 1 amp)
+        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+  rawcells <- mapM celltoks prefsufs
+  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+  spaces
+  return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+  spaces
+  updateState $ \st -> st{ sInTableCell = True }
+  cell' <-   multicolumnCell blocks
+         <|> multirowCell blocks
+         <|> parseSimpleCell
+         <|> parseEmptyCell
+  updateState $ \st -> st{ sInTableCell = False }
+  spaces
+  return cell'
+  where
+    -- The parsing of empty cells is important in LaTeX, especially when dealing
+    -- with multirow/multicolumn. See #6603.
+    parseEmptyCell = spaces $> emptyCell
+    parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+  where
+    alignment = do
+      c <- untoken <$> singleChar
+      return $ case c of
+        "l" -> AlignLeft
+        "r" -> AlignRight
+        "c" -> AlignCenter
+        "*" -> AlignDefault
+        _   -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+                [Para ils] -> plain (fromList ils)
+                _          -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+  -- Full prototype for \multirow macro is:
+  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+  -- However, everything except `nrows` and `text` make
+  -- sense in the context of the Pandoc AST
+  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
+  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
+  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
+  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+  let singleCell = do
+        content <- plainify <$> blocks
+        return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+  -- Two possible contents: either a \multirow cell, or content.
+  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+  -- Note that a \multirow cell can be nested in a \multicolumn,
+  -- but not the other way around. See #6603
+  let nestedCell = do
+        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+        return $ cell
+                  alignment
+                  (RowSpan rs)
+                  (ColSpan span')
+                  (fromList bs)
+
+  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+  where
+    rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+  = TableBody attr rhc th' tb'
+  where
+    th' = fixTableRows th
+    tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+  where
+    fixTableRows' oldHang (Row attr cells : rs)
+      = let (newHang, cells') = fixTableRow oldHang cells
+            rs'               = fixTableRows' newHang rs
+        in Row attr cells' : rs'
+    fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+  -- If there's overhang, drop cells until their total width meets the
+  -- width of the occupied grid spaces (or we run out)
+  | (n, prefHang, restHang) <- splitHang oldHang
+  , n > 0
+  = let cells' = dropToWidth getCellW n cells
+        (restHang', cells'') = fixTableRow restHang cells'
+    in (prefHang restHang', cells'')
+  -- Otherwise record the overhang of a pending cell and fix the rest
+  -- of the row
+  | c@(Cell _ _ h w _):cells' <- cells
+  = let h' = max 1 h
+        w' = max 1 w
+        oldHang' = dropToWidth getHangW w' oldHang
+        (newHang, cells'') = fixTableRow oldHang' cells'
+    in (toHang w' h' <> newHang, c : cells'')
+  | otherwise
+  = (oldHang, [])
+  where
+    getCellW (Cell _ _ _ w _) = w
+    getHangW = maybe 1 fst
+    getCS (ColSpan n) = n
+
+    toHang c r
+      | r > 1     = [Just (c, r)]
+      | otherwise = replicate (getCS c) Nothing
+
+    -- Take the prefix of the overhang list representing filled grid
+    -- spaces. Also return the remainder and the length of this prefix.
+    splitHang = splitHang' 0 id
+
+    splitHang' !n l (Just (c, r):xs)
+      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+    splitHang' n l xs = (n, l, xs)
+
+    -- Drop list items until the total width of the dropped items
+    -- exceeds the passed width.
+    dropToWidth _     n l | n < 1 = l
+    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
+    dropToWidth _     _ []        = []
+
+simpTable :: PandocMonad m
+          => LP m Blocks
+          -> LP m Inlines
+          -> Text
+          -> Bool
+          -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+  when hasWidthParameter $ () <$ tokWith inline
+  skipopts
+  colspecs <- parseAligns
+  let (aligns, widths, prefsufs) = unzip3 colspecs
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  skipMany hline
+  spaces
+  header' <- option [] . try . fmap (:[]) $
+             parseTableRow blocks inline envname prefsufs <*
+               lbreak <* many1 hline
+  spaces
+  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+                    (lbreak <* optional (skipMany hline))
+  spaces
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  let th  = fixTableHead $ TableHead nullAttr header'
+  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+  let tf  = TableFoot nullAttr []
+  return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+  where go (Table attr c spec th tb tf) = do
+          st <- getState
+          let mblabel = sLastLabel st
+          capt <- case (sCaption st, mblabel) of
+                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
+                   (Just ils, Just lab) -> do
+                     num <- getNextNumber sLastTableNum
+                     setState
+                       st{ sLastTableNum = num
+                         , sLabels = M.insert lab
+                                    [Str (renderDottedNum num)]
+                                    (sLabels st) }
+                     return $ caption Nothing (plain ils) -- add number??
+                   (Nothing, _)  -> return c
+          let attr' = case (attr, mblabel) of
+                        ((_,classes,kvs), Just ident) ->
+                           (ident,classes,kvs)
+                        _ -> attr
+          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+        go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b       = Div attr [b]
-- 
cgit v1.2.3


From 2faa57e8e96d9905676e30f62d34c06b074acf76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:12:09 -0800
Subject: Factor out T.P.Readers.LaTeX.Citation.

---
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs          | 202 +++-------------------------
 src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 ++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |   5 +
 4 files changed, 232 insertions(+), 186 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Citation.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 9149c4f8f..567b650a1 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 831c5df05..2d1b83486 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -56,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
@@ -169,7 +170,7 @@ rawLaTeXInline = do
   let toks = tokenize "source" inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
-              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
               inlines
           <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
               inlines
@@ -311,7 +312,7 @@ blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
                 then (\xs -> para (cite xs mempty))
-                       <$> cites NormalCitation False
+                       <$> cites inline NormalCitation False
                 else option mempty $ para <$> bracketed inline
   let lang = mblang >>= babelLangToBCP47
   let langdiv = case lang of
@@ -425,116 +426,6 @@ pDollarsMath n = do
                 else mzero
        _ -> (tk :) <$> pDollarsMath n
 
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _      = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
-  let k = last ks
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ toList <$> opt
-  second <- optionMaybe $ toList <$> opt
-  keys <- try $ bgroup *> manyTill citationLabel egroup
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> (mempty, s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> (mempty, mempty)
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel  = do
-  sp
-  untokenize <$>
-    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
-          <* sp
-          <* optional (symbol ',')
-          <* sp)
-  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
-  cits <- if multi
-             then do
-               multiprenote <- optionMaybe $ toList <$> paropt
-               multipostnote <- optionMaybe $ toList <$> paropt
-               let (pre, suf) = case (multiprenote, multipostnote) of
-                     (Just s , Nothing) -> (mempty, s)
-                     (Nothing , Just t) -> (mempty, t)
-                     (Just s , Just t ) -> (s, t)
-                     _                  -> (mempty, mempty)
-               tempCits <- many1 simpleCiteArgs
-               case tempCits of
-                 (k:ks) -> case ks of
-                             (_:_) -> return $ (addMprenote pre k : init ks) ++
-                                                 [addMpostnote suf (last ks)]
-                             _ -> return [addMprenote pre (addMpostnote suf k)]
-                 _ -> return [[]]
-             else count 1 simpleCiteArgs
-  let cs = concat cits
-  return $ case mode of
-        AuthorInText -> case cs of
-                             (c:rest) -> c {citationMode = mode} : rest
-                             []       -> []
-        _            -> map (\a -> a {citationMode = mode}) cs
-  where mprenote (k:ks) = (k:ks) ++ [Space]
-        mprenote _ = mempty
-        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
-        mpostnote _ = mempty
-        addMprenote mpn (k:ks) =
-          let mpnfinal = case citationPrefix k of
-                           (_:_) -> mprenote mpn
-                           _ -> mpn
-          in addPrefix mpnfinal (k:ks)
-        addMprenote _ _ = []
-        addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
-  (c,raw) <- withRaw $ cites mode multi
-  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
-  let isCite Cite{} = True
-      isCite _      = False
-      (pref, rest) = break isCite (toList ils)
-  in case rest of
-          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
-          _                -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
-  (cs, raw) <-
-    withRaw $ concat <$> do
-      bgroup
-      items <- mconcat <$>
-                many1 (notFollowedBy (symbol ';') >> inline)
-                  `sepBy1` symbol ';'
-      egroup
-      return $ map handleCitationPart items
-  case cs of
-       []       -> mzero
-       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
-                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
-  note $ para $ ils <> str "."
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -553,19 +444,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-opt :: PandocMonad m => LP m Inlines
-opt = do
-  toks <- try (sp *> bracketedToks <* sp)
-  -- now parse the toks as inlines
-  st <- getState
-  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
-  case parsed of
-    Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
 inBrackets :: Inlines -> Inlines
 inBrackets x = str "[" <> x <> str "]"
 
@@ -629,6 +507,7 @@ inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
+  M.union (citationCommands inline) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -703,7 +582,7 @@ inlineCommands =
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
-                            optional opt
+                            optional rawopt
                             spaces))
   , (",", lit "\8198")
   , ("@", pure mempty)
@@ -761,61 +640,6 @@ inlineCommands =
   , ("proofname", doTerm Translations.Proof)
   , ("glossaryname", doTerm Translations.Glossary)
   , ("lstlistingname", doTerm Translations.Listing)
-  , ("cite", citation "cite" NormalCitation False)
-  , ("Cite", citation "Cite" NormalCitation False)
-  , ("citep", citation "citep" NormalCitation False)
-  , ("citep*", citation "citep*" NormalCitation False)
-  , ("citeal", citation "citeal" NormalCitation False)
-  , ("citealp", citation "citealp" NormalCitation False)
-  , ("citealp*", citation "citealp*" NormalCitation False)
-  , ("autocite", citation "autocite" NormalCitation False)
-  , ("smartcite", citation "smartcite" NormalCitation False)
-  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
-  , ("parencite", citation "parencite" NormalCitation False)
-  , ("supercite", citation "supercite" NormalCitation False)
-  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
-  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
-  , ("citeyear", citation "citeyear" SuppressAuthor False)
-  , ("autocite*", citation "autocite*" SuppressAuthor False)
-  , ("cite*", citation "cite*" SuppressAuthor False)
-  , ("parencite*", citation "parencite*" SuppressAuthor False)
-  , ("textcite", citation "textcite" AuthorInText False)
-  , ("citet", citation "citet" AuthorInText False)
-  , ("citet*", citation "citet*" AuthorInText False)
-  , ("citealt", citation "citealt" AuthorInText False)
-  , ("citealt*", citation "citealt*" AuthorInText False)
-  , ("textcites", citation "textcites" AuthorInText True)
-  , ("cites", citation "cites" NormalCitation True)
-  , ("autocites", citation "autocites" NormalCitation True)
-  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
-  , ("parencites", citation "parencites" NormalCitation True)
-  , ("supercites", citation "supercites" NormalCitation True)
-  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
-  , ("Autocite", citation "Autocite" NormalCitation False)
-  , ("Smartcite", citation "Smartcite" NormalCitation False)
-  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
-  , ("Parencite", citation "Parencite" NormalCitation False)
-  , ("Supercite", citation "Supercite" NormalCitation False)
-  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
-  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
-  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
-  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
-  , ("Cite*", citation "Cite*" SuppressAuthor False)
-  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
-  , ("Textcite", citation "Textcite" AuthorInText False)
-  , ("Textcites", citation "Textcites" AuthorInText True)
-  , ("Cites", citation "Cites" NormalCitation True)
-  , ("Autocites", citation "Autocites" NormalCitation True)
-  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
-  , ("Parencites", citation "Parencites" NormalCitation True)
-  , ("Supercites", citation "Supercites" NormalCitation True)
-  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
-  , ("citetext", complexNatbibCitation NormalCitation)
-  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
-                        complexNatbibCitation AuthorInText)
-                   <|> citation "citeauthor" AuthorInText False)
-  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
-                          addMeta "nocite"))
   , ("hyperlink", hyperlink)
   , ("hypertarget", hypertargetInline)
   -- glossaries package
@@ -918,7 +742,7 @@ inlineCommands =
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
-  optional opt
+  optional rawopt
   x <- tok
   y <- tok
   return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -1168,6 +992,16 @@ inline = (mempty <$ comment)
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
 
+opt :: PandocMonad m => LP m Inlines
+opt = do
+  toks <- try (sp *> bracketedToks <* sp)
+  -- now parse the toks as inlines
+  st <- getState
+  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
 -- block elements:
 
 preamble :: PandocMonad m => LP m Blocks
@@ -1261,10 +1095,6 @@ insertIncluded defaultExtension f' = do
   getInput >>= setInput . (tokenize f contents ++)
   updateState dropLatestIncludeFile
 
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
-   st{ sMeta = addMetaField field val $ sMeta st }
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..655823dab
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+  ( citationCommands
+  , cites
+  )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+  let citation = citationWith inline
+      tok = spaces *> grouped inline
+   in M.fromList
+  [ ("cite", citation "cite" NormalCitation False)
+  , ("Cite", citation "Cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("smartcite", citation "smartcite" NormalCitation False)
+  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Smartcite", citation "Smartcite" NormalCitation False)
+  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation inline NormalCitation)
+  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+                        complexNatbibCitation inline AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
+  ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _      = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
+  keys <- try $ bgroup *> manyTill citationLabel egroup
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+  opt :: PandocMonad m => LP m Inlines
+  opt = do
+    toks <- try (sp *> bracketedToks <* sp)
+    -- now parse the toks as inlines
+    st <- getState
+    parsed <- lift $
+      runParserT (mconcat <$> many inline) st "bracketed option" toks
+    case parsed of
+      Right result -> return result
+      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel  = do
+  sp
+  untokenize <$>
+    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+          <* sp
+          <* optional (symbol ',')
+          <* sp)
+  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+  let paropt = parenWrapped inline
+  cits <- if multi
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 $ simpleCiteArgs inline
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ (addMprenote pre k : init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
+             else count 1 $ simpleCiteArgs inline
+  let cs = concat cits
+  return $ case mode of
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+  (c,raw) <- withRaw $ cites inline mode multi
+  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+  let isCite Cite{} = True
+      isCite _      = False
+      (pref, rest) = break isCite (toList ils)
+  in case rest of
+          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+          _                -> []
+
+complexNatbibCitation :: PandocMonad m
+                      => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+  (cs, raw) <-
+    withRaw $ concat <$> do
+      bgroup
+      items <- mconcat <$>
+                many1 (notFollowedBy (symbol ';') >> inline)
+                  `sepBy1` symbol ';'
+      egroup
+      return $ map handleCitationPart items
+  case cs of
+       []       -> mzero
+       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+  note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 4a9fa03ad..a5a39d3c9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -85,6 +85,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , setCaption
   , resetCaption
   , env
+  , addMeta
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -947,3 +948,7 @@ tokWith inlineParser = try $ spaces >>
   where singleChar' = do
           Tok _ _ t <- singleChar
           return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+   st{ sMeta = addMetaField field val $ sMeta st }
-- 
cgit v1.2.3


From 564c39beef36bf008fa5d2c840560ef064152e7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:49:34 -0800
Subject: Move setDefaultLanguage to T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs      | 16 ++--------------
 src/Text/Pandoc/Readers/LaTeX/Lang.hs | 22 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3935c92ef..2155379db 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -44,7 +44,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
                                       readFileFromDirs, report, setResourcePath,
-                                      setTranslations, translateTerm)
+                                      translateTerm)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
@@ -59,7 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
-                                       babelLangToBCP47)
+                                       babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
@@ -1856,15 +1856,3 @@ block = do
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  polylang <- untokenize <$> braced
-  case M.lookup polylang polyglossiaLangToBCP47 of
-       Nothing -> return mempty -- TODO mzero? warning?
-       Just langFunc -> do
-         let l = langFunc o
-         setTranslations l
-         updateState $ setMeta "lang" $ str (renderLang l)
-         return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 5f634818e..adbeaa6d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -12,13 +12,31 @@ Functions for parsing polyglossia and babel language specifiers to
 BCP47 'Lang'.
 -}
 module Text.Pandoc.Readers.LaTeX.Lang
-  ( polyglossiaLangToBCP47
+  ( setDefaultLanguage
+  , polyglossiaLangToBCP47
   , babelLangToBCP47
   )
 where
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option)
+import Text.Pandoc.Builder (Blocks, setMeta, str)
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  polylang <- untokenize <$> braced
+  case M.lookup polylang polyglossiaLangToBCP47 of
+       Nothing -> return mempty -- TODO mzero? warning?
+       Just langFunc -> do
+         let l = langFunc o
+         setTranslations l
+         updateState $ setMeta "lang" $ str (renderLang l)
+         return mempty
 
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
-- 
cgit v1.2.3


From d2bb0c7c8d599e6cd2aaef787b207bbfa66d4b9e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 21:05:25 -0800
Subject: Factor out T.P.Readers.LaTeX.Math.

---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs      | 201 ++-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 ++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+), 193 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Math.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 61b1de0dd..56a9491f2 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -632,6 +632,7 @@ library
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4062e8a53..772263578 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,6 @@ import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -57,6 +56,11 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+                                       inlineEnvironment,
+                                       mathDisplay, mathInline,
+                                       newtheorem, theoremstyle, proof,
+                                       theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
@@ -387,39 +391,6 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
-  symbol '$'
-  display <- option False (True <$ symbol '$')
-  (do contents <- try $ untokenize <$> pDollarsMath 0
-      if display
-         then mathDisplay contents <$ symbol '$'
-         else return $ mathInline contents)
-   <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
-  tk@(Tok _ toktype t) <- anyTok
-  case toktype of
-       Symbol | t == "$"
-              , n == 0 -> return []
-              | t == "\\" -> do
-                  tk' <- anyTok
-                  (tk :) . (tk' :) <$> pDollarsMath n
-              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
-              | t == "}" ->
-                if n > 0
-                then (tk :) <$> pDollarsMath (n-1)
-                else mzero
-       _ -> (tk :) <$> pDollarsMath n
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -452,51 +423,6 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       , isEscapable c = t
       | otherwise = "\\" <> t
 
-mathEnvWith :: PandocMonad m
-            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
-   where inner x = case innerEnv of
-                        Nothing -> x
-                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
-                                   "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
-  skipopts
-  optional blankline
-  res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
-  controlSeq "begin"
-  name <- untokenize <$> braced
-  M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
-    ("displaymath", mathEnvWith id Nothing "displaymath")
-  , ("math", math <$> mathEnv "math")
-  , ("equation", mathEnvWith id Nothing "equation")
-  , ("equation*", mathEnvWith id Nothing "equation*")
-  , ("gather", mathEnvWith id (Just "gathered") "gather")
-  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
-  , ("multline", mathEnvWith id (Just "gathered") "multline")
-  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
-  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
-  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
-  , ("align", mathEnvWith id (Just "aligned") "align")
-  , ("align*", mathEnvWith id (Just "aligned") "align*")
-  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
-  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
-  , ("dmath", mathEnvWith id Nothing "dmath")
-  , ("dmath*", mathEnvWith id Nothing "dmath*")
-  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
-  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
-  , ("darray", mathEnvWith id (Just "aligned") "darray")
-  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
-  ]
-
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
@@ -1354,7 +1280,7 @@ blockCommands = M.fromList
    , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
    , ("signature", mempty <$ (skipopts *> authors))
    , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-   , ("newtheorem", newtheorem)
+   , ("newtheorem", newtheorem inline)
    , ("theoremstyle", theoremstyle)
    -- KOMA-Script metadata commands
    , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1473,7 +1399,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("lilypond", rawVerbEnv "lilypond")
    , ("ly", rawVerbEnv "ly")
    -- amsthm
-   , ("proof", proof)
+   , ("proof", proof blocks opt)
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -1494,128 +1420,17 @@ filecontents = try $ do
     st{ sFileContents = M.insert fp txt (sFileContents st) }
   return mempty
 
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
-  stylename <- untokenize <$> braced
-  let mbstyle = case stylename of
-                  "plain"      -> Just PlainStyle
-                  "definition" -> Just DefinitionStyle
-                  "remark"     -> Just RemarkStyle
-                  _            -> Nothing
-  case mbstyle of
-    Nothing  -> return ()
-    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
-  return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
-  number <- option True (False <$ symbol '*' <* sp)
-  name <- untokenize <$> braced
-  sp
-  series <- option Nothing $ Just . untokenize <$> bracketedToks
-  sp
-  showName <- tok
-  sp
-  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
-  sty <- sLastTheoremStyle <$> getState
-  let spec = TheoremSpec { theoremName = showName
-                         , theoremStyle = sty
-                         , theoremSeries = series
-                         , theoremSyncTo = syncTo
-                         , theoremNumber = number
-                         , theoremLastNum = DottedNum [0] }
-  tmap <- sTheoremMap <$> getState
-  updateState $ \s -> s{ sTheoremMap =
-                            M.insert name spec tmap }
-  return mempty
-
-proof :: PandocMonad m => LP m Blocks
-proof = do
-  title <- option (B.text "Proof") opt
-  bs <- env "proof" blocks
-  return $
-    B.divWith ("", ["proof"], []) $
-      addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
-  case B.toList bs of
-    (Para xs : rest)
-      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
-    _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
-  case Seq.viewr (B.unMany bs) of
-    s Seq.:> Para ils
-      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
-    _ -> bs <> B.para qedSign
- where
-  qedSign = B.str "\xa0\x25FB"
-
 environment :: PandocMonad m => LP m Blocks
 environment = try $ do
   controlSeq "begin"
   name <- untokenize <$> braced
   M.findWithDefault mzero name environments <|>
-    theoremEnvironment name <|>
+    theoremEnvironment blocks opt name <|>
     if M.member name (inlineEnvironments
                        :: M.Map Text (LP PandocPure Inlines))
        then mzero
        else try (rawEnv name) <|> rawVerbEnv name
 
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
-  tmap <- sTheoremMap <$> getState
-  case M.lookup name tmap of
-    Nothing -> mzero
-    Just tspec -> do
-       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
-       bs <- env name blocks
-       number <-
-         if theoremNumber tspec
-            then do
-               let name' = fromMaybe name $ theoremSeries tspec
-               num <- getNextNumber
-                   (maybe (DottedNum [0]) theoremLastNum .
-                    M.lookup name' . sTheoremMap)
-               updateState $ \s ->
-                 s{ sTheoremMap =
-                       M.adjust
-                       (\spec -> spec{ theoremLastNum = num })
-                       name'
-                       (sTheoremMap s)
-                  }
-
-               case mblabel of
-                 Just ident ->
-                   updateState $ \s ->
-                     s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
-                 Nothing -> return ()
-               return $ space <> B.text (renderDottedNum num)
-            else return mempty
-       let titleEmph = case theoremStyle tspec of
-                         PlainStyle      -> B.strong
-                         DefinitionStyle -> B.strong
-                         RemarkStyle     -> B.emph
-       let title = titleEmph (theoremName tspec <> number)
-                      <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
-              $ case theoremStyle tspec of
-                  PlainStyle -> walk italicize bs
-                  _          -> bs
-
-italicize :: Block -> Block
-italicize x@(Para [Image{}]) = x -- see #6925
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+  ( dollarsMath
+  , inlineEnvironments
+  , inlineEnvironment
+  , mathInline
+  , mathDisplay
+  , theoremstyle
+  , theoremEnvironment
+  , newtheorem
+  , proof
+  )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+  symbol '$'
+  display <- option False (True <$ symbol '$')
+  (do contents <- try $ untokenize <$> pDollarsMath 0
+      if display
+         then mathDisplay contents <$ symbol '$'
+         else return $ mathInline contents)
+   <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+  tk@(Tok _ toktype t) <- anyTok
+  case toktype of
+       Symbol | t == "$"
+              , n == 0 -> return []
+              | t == "\\" -> do
+                  tk' <- anyTok
+                  (tk :) . (tk' :) <$> pDollarsMath n
+              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+              | t == "}" ->
+                if n > 0
+                then (tk :) <$> pDollarsMath (n-1)
+                else mzero
+       _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+   where inner x = case innerEnv of
+                        Nothing -> x
+                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
+                                   "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+  skipopts
+  optional blankline
+  res <- manyTill anyTok (end_ name)
+  return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+    ("displaymath", mathEnvWith id Nothing "displaymath")
+  , ("math", math <$> mathEnv "math")
+  , ("equation", mathEnvWith id Nothing "equation")
+  , ("equation*", mathEnvWith id Nothing "equation*")
+  , ("gather", mathEnvWith id (Just "gathered") "gather")
+  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+  , ("multline", mathEnvWith id (Just "gathered") "multline")
+  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+  , ("align", mathEnvWith id (Just "aligned") "align")
+  , ("align*", mathEnvWith id (Just "aligned") "align*")
+  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+  ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+  stylename <- untokenize <$> braced
+  let mbstyle = case stylename of
+                  "plain"      -> Just PlainStyle
+                  "definition" -> Just DefinitionStyle
+                  "remark"     -> Just RemarkStyle
+                  _            -> Nothing
+  case mbstyle of
+    Nothing  -> return ()
+    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+  return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+  number <- option True (False <$ symbol '*' <* sp)
+  name <- untokenize <$> braced
+  sp
+  series <- option Nothing $ Just . untokenize <$> bracketedToks
+  sp
+  showName <- tokWith inline
+  sp
+  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+  sty <- sLastTheoremStyle <$> getState
+  let spec = TheoremSpec { theoremName = showName
+                         , theoremStyle = sty
+                         , theoremSeries = series
+                         , theoremSyncTo = syncTo
+                         , theoremNumber = number
+                         , theoremLastNum = DottedNum [0] }
+  tmap <- sTheoremMap <$> getState
+  updateState $ \s -> s{ sTheoremMap =
+                            M.insert name spec tmap }
+  return mempty
+
+theoremEnvironment :: PandocMonad m
+                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+  tmap <- sTheoremMap <$> getState
+  case M.lookup name tmap of
+    Nothing -> mzero
+    Just tspec -> do
+       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+       mblabel <- option Nothing $ Just . untokenize <$>
+                   try (spaces >> controlSeq "label" >> spaces >> braced)
+       bs <- env name blocks
+       number <-
+         if theoremNumber tspec
+            then do
+               let name' = fromMaybe name $ theoremSeries tspec
+               num <- getNextNumber
+                   (maybe (DottedNum [0]) theoremLastNum .
+                    M.lookup name' . sTheoremMap)
+               updateState $ \s ->
+                 s{ sTheoremMap =
+                       M.adjust
+                       (\spec -> spec{ theoremLastNum = num })
+                       name'
+                       (sTheoremMap s)
+                  }
+
+               case mblabel of
+                 Just ident ->
+                   updateState $ \s ->
+                     s{ sLabels = M.insert ident
+                         (B.toList $
+                           theoremName tspec <> "\160" <>
+                           str (renderDottedNum num)) (sLabels s) }
+                 Nothing -> return ()
+               return $ space <> B.text (renderDottedNum num)
+            else return mempty
+       let titleEmph = case theoremStyle tspec of
+                         PlainStyle      -> B.strong
+                         DefinitionStyle -> B.strong
+                         RemarkStyle     -> B.emph
+       let title = titleEmph (theoremName tspec <> number)
+                      <> optTitle <> "." <> space
+       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+              $ case theoremStyle tspec of
+                  PlainStyle -> walk italicize bs
+                  _          -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+  title <- option (B.text "Proof") opt
+  bs <- env "proof" blocks
+  return $
+    B.divWith ("", ["proof"], []) $
+      addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+  case B.toList bs of
+    (Para xs : rest)
+      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+    _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+  case Seq.viewr (B.unMany bs) of
+    s Seq.:> Para ils
+      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+    _ -> bs <> B.para qedSign
+ where
+  qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
-- 
cgit v1.2.3


From 7e38b8e55a49f027b6aaa690d84225b0374eb057 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 22:51:26 -0800
Subject: T.P.Readers.LaTeX: Don't export tokenize, untokenize.

[API change]

These were only exported for testing, which seems the
wrong thing to do.  They don't belong in the public
API and are not really usable as they are, without access
to the Tok type which is not exported.

Removed the tokenize/untokenize roundtrip test.

We put a quickcheck property in the comments which
may be used when this code is touched (if it is).
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  2 --
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  9 +++++++++
 test/Tests/Readers/LaTeX.hs              | 17 +----------------
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 772263578..e63fbc185 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -22,8 +22,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
                                    inlineCommand,
-                                   tokenize,
-                                   untokenize
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a5a39d3c9..db58b333d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -292,6 +292,15 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
            Left e   -> Prelude.fail (show e)
            Right s' -> return s'
 
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+>   let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 8385b751e..9388fd040 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -15,10 +15,8 @@ module Tests.Readers.LaTeX (tests) where
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Readers.LaTeX (tokenize, untokenize)
 import Test.Tasty
 import Test.Tasty.HUnit
-import Test.Tasty.QuickCheck
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
@@ -47,21 +45,8 @@ simpleTable' aligns rows
   where
     toRow = Row nullAttr . map simpleCell
 
-tokUntokRt :: String -> Bool
-tokUntokRt s = untokenize (tokenize "random" t) == t
-  where t = T.pack s
-
 tests :: [TestTree]
-tests = [ testGroup "tokenization"
-          [ testCase "tokenizer round trip on test case" $ do
-                 orig <- UTF8.readFile "../test/latex-reader.latex"
-                 let new = untokenize $ tokenize "../test/latex-reader.latex"
-                             orig
-                 assertEqual "untokenize . tokenize is identity" orig new
-          , testProperty "untokenize . tokenize is identity" tokUntokRt
-          ]
-
-        , testGroup "basic"
+tests = [ testGroup "basic"
           [ "simple" =:
             "word" =?> para "word"
           , "space" =:
-- 
cgit v1.2.3


From 6a6291d9e3c1ce9c3453318d87239b6f9260d924 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 23:05:35 -0800
Subject: Change T.P.Readers.LaTeX.SIunitx to export a command map...

instead of individual commands.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 11 ++---------
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index e63fbc185..304584072 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
@@ -426,6 +426,7 @@ inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
   M.union (citationCommands inline) $
+  M.union (siunitxCommands tok) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -594,14 +595,6 @@ inlineCommands =
   , ("Acfp", doAcronymPlural "full")
   , ("Acsp", doAcronymPlural "abbrv")
   , ("Aclp", doAcronymPlural "long")
-  -- siuntix
-  , ("si", skipopts *> dosi tok)
-  , ("SI", doSI tok)
-  , ("SIrange", doSIrange True tok)
-  , ("numrange", doSIrange False tok)
-  , ("numlist", doSInumlist)
-  , ("num", doSInum)
-  , ("ang", doSIang)
   -- hyphenat
   , ("bshyp", lit "\\\173")
   , ("fshyp", lit "/\173")
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..1952f4e1a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
-  ( dosi
-  , doSI
-  , doSIrange
-  , doSInum
-  , doSInumlist
-  , doSIang
-  )
+  ( siunitxCommands )
 where
 import Text.Pandoc.Builder
 import Text.Pandoc.Readers.LaTeX.Parsing
@@ -21,6 +15,19 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
 
+
+siunitxCommands :: PandocMonad m
+                 => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+  [ ("si", skipopts *> dosi tok)
+  , ("SI", doSI tok)
+  , ("SIrange", doSIrange True tok)
+  , ("numrange", doSIrange False tok)
+  , ("numlist", doSInumlist)
+  , ("num", doSInum)
+  , ("ang", doSIang)
+  ]
+
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
 dosi tok = grouped (siUnit tok) <|> siUnit tok
 
-- 
cgit v1.2.3


From 382f0e23d22b15aaa9fe2aeb6117ef0a102e379d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Mar 2021 08:55:42 -0800
Subject: Factor out T.P.Readers.LaTeX.Macro.

---
 pandoc.cabal                           |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs       | 141 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Macro.hs | 153 +++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 139 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Macro.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 56a9491f2..c3317f24b 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -634,6 +634,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad94e417..fa77595b9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
@@ -58,6 +57,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        newtheorem, theoremstyle, proof,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
@@ -1027,143 +1027,6 @@ authors = try $ do
   egroup
   addMeta "author" (map trimInlines auths)
 
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
-    (_, s) <- withRaw (commandDef <|> environmentDef)
-    (constructor (untokenize s) <$
-      guardDisabled Ext_latex_macros)
-     <|> return mempty
-  where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
-          guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
-        environmentDef = do
-          mbenv <- newenvironment
-          case mbenv of
-            Nothing -> return ()
-            Just (name, macro1, macro2) ->
-              guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
-        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
-        -- is equivalent to
-        -- @\newcommand{\envname}[n-args][default]{begin}@
-        -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
-  controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    optional $ symbol '='
-    spaces
-    -- we first parse in verbatim mode, and then expand macros,
-    -- because we don't want \let\foo\bar to turn into
-    -- \let\foo hello if we have previously \def\bar{hello}
-    contents <- bracedOrToken
-    return (name, contents)
-  contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
-  -- we use withVerbatimMode, because macros are to be expanded
-  -- at point of use, not point of definition
-  withVerbatimMode $ do
-    controlSeq "def"
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    argspecs <- many (argspecArg <|> argspecPattern)
-    contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
-  Tok _ (Arg i) _ <- satisfyTok isArgTok
-  return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
-  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
-                              (toktype' == Symbol || toktype' == Word) &&
-                              (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
-                             controlSeq "renewcommand" <|>
-                             controlSeq "providecommand" <|>
-                             controlSeq "DeclareMathOperator" <|>
-                             controlSeq "DeclareRobustCommand"
-  withVerbatimMode $ do
-    Tok _ (CtrlSeq name) txt <- do
-      optional (symbol '*')
-      anyControlSeq <|>
-        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    let argspecs = map ArgNum [1..numargs]
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    spaces
-    contents' <- bracedOrToken
-    let contents =
-         case mtype of
-              "DeclareMathOperator" ->
-                 Tok pos (CtrlSeq "mathop") "\\mathop"
-                 : Tok pos Symbol "{"
-                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
-                 : Tok pos Symbol "{"
-                 : (contents' ++
-                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
-              _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
-                             controlSeq "renewenvironment" <|>
-                             controlSeq "provideenvironment"
-  withVerbatimMode $ do
-    optional $ symbol '*'
-    spaces
-    name <- untokenize <$> braced
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    let argspecs = map (\i -> ArgNum i) [1..numargs]
-    startcontents <- spaces >> bracedOrToken
-    endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
-  ds <- untokenize <$> bracketedToks
-  case safeRead ds of
-       Just i -> return i
-       _      -> return 0
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..607f5438c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,153 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+  ( macroDef
+  )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+    (_, s) <- withRaw (commandDef <|> environmentDef)
+    (constructor (untokenize s) <$
+      guardDisabled Ext_latex_macros)
+     <|> return mempty
+  where commandDef = do
+          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          guardDisabled Ext_latex_macros <|>
+           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+        environmentDef = do
+          mbenv <- newenvironment
+          case mbenv of
+            Nothing -> return ()
+            Just (name, macro1, macro2) ->
+              guardDisabled Ext_latex_macros <|>
+                do updateState $ \s -> s{ sMacros =
+                    M.insert name macro1 (sMacros s) }
+                   updateState $ \s -> s{ sMacros =
+                    M.insert ("end" <> name) macro2 (sMacros s) }
+        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+        -- is equivalent to
+        -- @\newcommand{\envname}[n-args][default]{begin}@
+        -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro = do
+  controlSeq "let"
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    optional $ symbol '='
+    spaces
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
+    contents <- bracedOrToken
+    return (name, contents)
+  contents' <- doMacros' 0 contents
+  return (name, Macro ExpandWhenDefined [] Nothing contents')
+
+defmacro :: PandocMonad m => LP m (Text, Macro)
+defmacro = try $
+  -- we use withVerbatimMode, because macros are to be expanded
+  -- at point of use, not point of definition
+  withVerbatimMode $ do
+    controlSeq "def"
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    argspecs <- many (argspecArg <|> argspecPattern)
+    contents <- bracedOrToken
+    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+  Tok _ (Arg i) _ <- satisfyTok isArgTok
+  return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                              (toktype' == Symbol || toktype' == Word) &&
+                              (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+                             controlSeq "renewcommand" <|>
+                             controlSeq "providecommand" <|>
+                             controlSeq "DeclareMathOperator" <|>
+                             controlSeq "DeclareRobustCommand"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) txt <- do
+      optional (symbol '*')
+      anyControlSeq <|>
+        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    let argspecs = map ArgNum [1..numargs]
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    spaces
+    contents' <- bracedOrToken
+    let contents =
+         case mtype of
+              "DeclareMathOperator" ->
+                 Tok pos (CtrlSeq "mathop") "\\mathop"
+                 : Tok pos Symbol "{"
+                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+                 : Tok pos Symbol "{"
+                 : (contents' ++
+                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+              _                     -> contents'
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+        Just macro
+          | mtype == "newcommand" -> do
+              report $ MacroAlreadyDefined txt pos
+              return (name, macro)
+          | mtype == "providecommand" -> return (name, macro)
+        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+                             controlSeq "renewenvironment" <|>
+                             controlSeq "provideenvironment"
+  withVerbatimMode $ do
+    optional $ symbol '*'
+    spaces
+    name <- untokenize <$> braced
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    let argspecs = map (\i -> ArgNum i) [1..numargs]
+    startcontents <- spaces >> bracedOrToken
+    endcontents <- spaces >> bracedOrToken
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+         Just _
+           | mtype == "newenvironment" -> do
+               report $ MacroAlreadyDefined name pos
+               return Nothing
+           | mtype == "provideenvironment" ->
+               return Nothing
+         _ -> return $ Just (name,
+                      Macro ExpandWhenUsed argspecs optarg startcontents,
+                      Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+  ds <- untokenize <$> bracketedToks
+  case safeRead ds of
+       Just i -> return i
+       _      -> return 0
-- 
cgit v1.2.3


From bbcc1501a5fa6b40ded88f6738d35ce7a8079313 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 10:05:46 -0800
Subject: Split out T.P.Readers.LaTeX.Inline.

---
 pandoc.cabal                            |  11 +-
 src/Text/Pandoc/Readers/LaTeX.hs        | 474 ++++++++++----------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 275 ++++++++++++++++++
 3 files changed, 419 insertions(+), 341 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Inline.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index d457a0620..211327642 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,15 +626,16 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Types,
-                   Text.Pandoc.Readers.LaTeX.Parsing,
-                   Text.Pandoc.Readers.LaTeX.Lang,
-                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Lang,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.LaTeX.Math,
+                   Text.Pandoc.Readers.LaTeX.Parsing,
+                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Table,
-                   Text.Pandoc.Readers.LaTeX.Macro,
+                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fc85f0545..a27135fd2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,13 +35,13 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report, setResourcePath,
-                                      translateTerm)
+                                      readFileFromDirs, report,
+                                      setResourcePath)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
@@ -61,10 +61,12 @@ import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+                                         nameCommands, charCommands,
+                                         verbCommands, rawInlineOr,
+                                         listingsLanguage)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
 import Safe
 
 -- for debugging:
@@ -317,76 +319,6 @@ blockquote cvariant mblang = do
   optional $ symbolIn (".:;?!" :: [Char])  -- currently ignored
   return $ blockQuote . langdiv $ (bs <> citepar)
 
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
-  acro <- braced
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "singular+" <> form)])
-    $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
-  acro <- braced
-  plural <- lit "s"
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "plural+" <> form)]) $
-   mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  withVerbatimMode $
-    code . untokenize <$>
-      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
-  t@(Tok pos toktype txt) <- anyTok
-  case T.findIndex (== stopchar) txt of
-       Nothing -> return t
-       Just i  -> do
-         let (t1, t2) = T.splitAt i txt
-         inp <- getInput
-         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
-                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
-         return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
-  case lookup "language" opts of
-    Nothing  -> Nothing
-    Just l   -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
-  options <- option [] keyvals
-  let classes = maybeToList $ listingsLanguage options
-  doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
-  skipopts
-  cls <- untokenize <$> braced
-  doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  let stopchar = if marker == '{' then '}' else marker
-  withVerbatimMode $
-    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
-      manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x    = x
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -405,9 +337,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
 unescapeURL :: Text -> Text
 unescapeURL = T.concat . go . T.splitOn "\\"
   where
@@ -420,234 +349,136 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       | otherwise = "\\" <> t
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands =
-  M.union inlineLanguageCommands $
-  M.union (accentCommands tok) $
-  M.union (citationCommands inline) $
-  M.union (siunitxCommands tok) $
-  M.fromList
-  [ ("emph", extractSpaces emph <$> tok)
-  , ("textit", extractSpaces emph <$> tok)
-  , ("textsl", extractSpaces emph <$> tok)
-  , ("textsc", extractSpaces smallcaps <$> tok)
-  , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
-  , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
-  , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
-  , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-  , ("texttt", ttfamily)
-  , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-  , ("lq", return (str "‘"))
-  , ("rq", return (str "’"))
-  , ("textquoteleft", return (str "‘"))
-  , ("textquoteright", return (str "’"))
-  , ("textquotedblleft", return (str "“"))
-  , ("textquotedblright", return (str "”"))
-  , ("textsuperscript", extractSpaces superscript <$> tok)
-  , ("textsubscript", extractSpaces subscript <$> tok)
-  , ("textbackslash", lit "\\")
-  , ("backslash", lit "\\")
-  , ("slash", lit "/")
-  , ("textbf", extractSpaces strong <$> tok)
-  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
-  , ("underline", underline <$> tok)
-  , ("ldots", lit "…")
-  , ("vdots", lit "\8942")
-  , ("dots", lit "…")
-  , ("mdots", lit "…")
-  , ("sim", lit "~")
-  , ("sep", lit ",")
-  , ("label", rawInlineOr "label" dolabel)
-  , ("ref", rawInlineOr "ref" $ doref "ref")
-  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
-  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
-  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
-  , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
-  , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
-  , ("lettrine", rawInlineOr "lettrine" lettrine)
-  , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
-  , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
-  , ("ensuremath", mathInline . untokenize <$> braced)
-  , ("texorpdfstring", const <$> tok <*> tok)
-  , ("P", lit "¶")
-  , ("S", lit "§")
-  , ("$", lit "$")
-  , ("%", lit "%")
-  , ("&", lit "&")
-  , ("#", lit "#")
-  , ("_", lit "_")
-  , ("{", lit "{")
-  , ("}", lit "}")
-  , ("qed", lit "\a0\x25FB")
-  -- old TeX commands
-  , ("em", extractSpaces emph <$> inlines)
-  , ("it", extractSpaces emph <$> inlines)
-  , ("sl", extractSpaces emph <$> inlines)
-  , ("bf", extractSpaces strong <$> inlines)
-  , ("tt", code . stringify . toList <$> inlines)
-  , ("rm", inlines)
-  , ("itshape", extractSpaces emph <$> inlines)
-  , ("slshape", extractSpaces emph <$> inlines)
-  , ("scshape", extractSpaces smallcaps <$> inlines)
-  , ("bfseries", extractSpaces strong <$> inlines)
-  , ("MakeUppercase", makeUppercase <$> tok)
-  , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
-  , ("uppercase", makeUppercase <$> tok)
-  , ("MakeLowercase", makeLowercase <$> tok)
-  , ("MakeTextLowercase", makeLowercase <$> tok)
-  , ("lowercase", makeLowercase <$> tok)
-  , ("/", pure mempty) -- italic correction
-  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
-                            guard $ not inTableCell
-                            optional rawopt
-                            spaces))
-  , (",", lit "\8198")
-  , ("@", pure mempty)
-  , (" ", lit "\160")
-  , ("ps", pure $ str "PS." <> space)
-  , ("TeX", lit "TeX")
-  , ("LaTeX", lit "LaTeX")
-  , ("bar", lit "|")
-  , ("textless", lit "<")
-  , ("textgreater", lit ">")
-  , ("thanks", skipopts >> note <$> grouped block)
-  , ("footnote", skipopts >> note <$> grouped block)
-  , ("passthrough", tok) -- \passthrough macro used by latex writer
-                         -- for listings
-  , ("verb", doverb)
-  , ("lstinline", dolstinline)
-  , ("mintinline", domintinline)
-  , ("Verb", doverb)
-  , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                  bracedUrl)
-  , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
-  , ("href", do url <- bracedUrl
-                sp
-                link (unescapeURL $ untokenize url) "" <$> tok)
-  , ("includegraphics", do options <- option [] keyvals
-                           src <- braced
-                           mkImage options . unescapeURL . removeDoubleQuotes $
-                               untokenize src)
-  , ("enquote*", enquote True Nothing)
-  , ("enquote", enquote False Nothing)
-  -- foreignquote is supposed to use native quote marks
-  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-  , ("foreignquote", braced >>= enquote False . Just . untokenize)
-  -- hypehnquote uses regular quotes
-  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
-  , ("figurename", doTerm Translations.Figure)
-  , ("prefacename", doTerm Translations.Preface)
-  , ("refname", doTerm Translations.References)
-  , ("bibname", doTerm Translations.Bibliography)
-  , ("chaptername", doTerm Translations.Chapter)
-  , ("partname", doTerm Translations.Part)
-  , ("contentsname", doTerm Translations.Contents)
-  , ("listfigurename", doTerm Translations.ListOfFigures)
-  , ("listtablename", doTerm Translations.ListOfTables)
-  , ("indexname", doTerm Translations.Index)
-  , ("abstractname", doTerm Translations.Abstract)
-  , ("tablename", doTerm Translations.Table)
-  , ("enclname", doTerm Translations.Encl)
-  , ("ccname", doTerm Translations.Cc)
-  , ("headtoname", doTerm Translations.To)
-  , ("pagename", doTerm Translations.Page)
-  , ("seename", doTerm Translations.See)
-  , ("seealsoname", doTerm Translations.SeeAlso)
-  , ("proofname", doTerm Translations.Proof)
-  , ("glossaryname", doTerm Translations.Glossary)
-  , ("lstlistingname", doTerm Translations.Listing)
-  , ("hyperlink", hyperlink)
-  , ("hypertarget", hypertargetInline)
-  -- glossaries package
-  , ("gls", doAcronym "short")
-  , ("Gls", doAcronym "short")
-  , ("glsdesc", doAcronym "long")
-  , ("Glsdesc", doAcronym "long")
-  , ("GLSdesc", doAcronym "long")
-  , ("acrlong", doAcronym "long")
-  , ("Acrlong", doAcronym "long")
-  , ("acrfull", doAcronym "full")
-  , ("Acrfull", doAcronym "full")
-  , ("acrshort", doAcronym "abbrv")
-  , ("Acrshort", doAcronym "abbrv")
-  , ("glspl", doAcronymPlural "short")
-  , ("Glspl", doAcronymPlural "short")
-  , ("glsdescplural", doAcronymPlural "long")
-  , ("Glsdescplural", doAcronymPlural "long")
-  , ("GLSdescplural", doAcronymPlural "long")
-  -- acronyms package
-  , ("ac", doAcronym "short")
-  , ("acf", doAcronym "full")
-  , ("acs", doAcronym "abbrv")
-  , ("acl", doAcronym "long")
-  , ("acp", doAcronymPlural "short")
-  , ("acfp", doAcronymPlural "full")
-  , ("acsp", doAcronymPlural "abbrv")
-  , ("aclp", doAcronymPlural "long")
-  , ("Ac", doAcronym "short")
-  , ("Acf", doAcronym "full")
-  , ("Acs", doAcronym "abbrv")
-  , ("Acl", doAcronym "long")
-  , ("Acp", doAcronymPlural "short")
-  , ("Acfp", doAcronymPlural "full")
-  , ("Acsp", doAcronymPlural "abbrv")
-  , ("Aclp", doAcronymPlural "long")
-  -- hyphenat
-  , ("bshyp", lit "\\\173")
-  , ("fshyp", lit "/\173")
-  , ("dothyp", lit ".\173")
-  , ("colonhyp", lit ":\173")
-  , ("hyp", lit "-")
-  , ("nohyphens", tok)
-  , ("textnhtt", ttfamily)
-  , ("nhttfamily", ttfamily)
-  -- LaTeX colors
-  , ("textcolor", coloredInline "color")
-  , ("colorbox", coloredInline "background-color")
-  -- fontawesome
-  , ("faCheck", lit "\10003")
-  , ("faClose", lit "\10007")
-  -- xspace
-  , ("xspace", doxspace)
-  -- etoolbox
-  , ("ifstrequal", ifstrequal)
-  , ("newtoggle", braced >>= newToggle)
-  , ("toggletrue", braced >>= setToggle True)
-  , ("togglefalse", braced >>= setToggle False)
-  , ("iftoggle", try $ ifToggle >> inline)
-  -- biblatex misc
-  , ("RN", romanNumeralUpper)
-  , ("Rn", romanNumeralLower)
-  -- babel
-  , ("foreignlanguage", foreignlanguage)
-  -- include
-  , ("input", rawInlineOr "input" $ include "input")
-  -- soul package
-  , ("ul", underline <$> tok)
-  -- ulem package
-  , ("uline", underline <$> tok)
-  -- plain tex stuff that should just be passed through as raw tex
-  , ("ifdim", ifdim)
-  -- bibtex
-  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-  , ("mkbibparens",
-       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-  , ("mkbibbrackets",
-       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-  , ("autocap", spanWith nullAttr <$> tok)
-  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-  , ("bibstring",
-       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-         <$> braced)
-  , ("adddot", pure (str "."))
-  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-  , ("addabbrvspace", pure space)
-  , ("hyphen", pure (str "-"))
-  ]
+inlineCommands = M.unions
+  [ inlineLanguageCommands
+  , accentCommands tok
+  , citationCommands inline
+  , siunitxCommands tok
+  , acronymCommands
+  , refCommands
+  , nameCommands
+  , verbCommands
+  , charCommands
+  , rest ]
+ where
+  rest = M.fromList
+    [ ("emph", extractSpaces emph <$> tok)
+    , ("textit", extractSpaces emph <$> tok)
+    , ("textsl", extractSpaces emph <$> tok)
+    , ("textsc", extractSpaces smallcaps <$> tok)
+    , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+    , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+    , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+    , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+    , ("texttt", ttfamily)
+    , ("sout", extractSpaces strikeout <$> tok)
+    , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+    , ("lq", return (str "‘"))
+    , ("rq", return (str "’"))
+    , ("textquoteleft", return (str "‘"))
+    , ("textquoteright", return (str "’"))
+    , ("textquotedblleft", return (str "“"))
+    , ("textquotedblright", return (str "”"))
+    , ("textsuperscript", extractSpaces superscript <$> tok)
+    , ("textsubscript", extractSpaces subscript <$> tok)
+    , ("textbf", extractSpaces strong <$> tok)
+    , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+    , ("underline", underline <$> tok)
+    , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+    , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+    , ("lettrine", rawInlineOr "lettrine" lettrine)
+    , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+    , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+    , ("ensuremath", mathInline . untokenize <$> braced)
+    , ("texorpdfstring", const <$> tok <*> tok)
+    -- old TeX commands
+    , ("em", extractSpaces emph <$> inlines)
+    , ("it", extractSpaces emph <$> inlines)
+    , ("sl", extractSpaces emph <$> inlines)
+    , ("bf", extractSpaces strong <$> inlines)
+    , ("tt", code . stringify . toList <$> inlines)
+    , ("rm", inlines)
+    , ("itshape", extractSpaces emph <$> inlines)
+    , ("slshape", extractSpaces emph <$> inlines)
+    , ("scshape", extractSpaces smallcaps <$> inlines)
+    , ("bfseries", extractSpaces strong <$> inlines)
+    , ("MakeUppercase", makeUppercase <$> tok)
+    , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+    , ("uppercase", makeUppercase <$> tok)
+    , ("MakeLowercase", makeLowercase <$> tok)
+    , ("MakeTextLowercase", makeLowercase <$> tok)
+    , ("lowercase", makeLowercase <$> tok)
+    , ("thanks", skipopts >> note <$> grouped block)
+    , ("footnote", skipopts >> note <$> grouped block)
+    , ("passthrough", tok) -- \passthrough macro used by latex writer
+                           -- for listings
+    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+                    bracedUrl)
+    , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+    , ("href", do url <- bracedUrl
+                  sp
+                  link (unescapeURL $ untokenize url) "" <$> tok)
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
+    , ("enquote*", enquote True Nothing)
+    , ("enquote", enquote False Nothing)
+    -- foreignquote is supposed to use native quote marks
+    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+    , ("foreignquote", braced >>= enquote False . Just . untokenize)
+    -- hypehnquote uses regular quotes
+    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
+    , ("hyperlink", hyperlink)
+    , ("hypertarget", hypertargetInline)
+    -- hyphenat
+    , ("nohyphens", tok)
+    , ("textnhtt", ttfamily)
+    , ("nhttfamily", ttfamily)
+    -- LaTeX colors
+    , ("textcolor", coloredInline "color")
+    , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
+    -- etoolbox
+    , ("ifstrequal", ifstrequal)
+    , ("newtoggle", braced >>= newToggle)
+    , ("toggletrue", braced >>= setToggle True)
+    , ("togglefalse", braced >>= setToggle False)
+    , ("iftoggle", try $ ifToggle >> inline)
+    -- biblatex misc
+    , ("RN", romanNumeralUpper)
+    , ("Rn", romanNumeralLower)
+    -- babel
+    , ("foreignlanguage", foreignlanguage)
+    -- include
+    , ("input", rawInlineOr "input" $ include "input")
+    -- soul package
+    , ("ul", underline <$> tok)
+    -- ulem package
+    , ("uline", underline <$> tok)
+    -- plain tex stuff that should just be passed through as raw tex
+    , ("ifdim", ifdim)
+    -- bibtex
+    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+    , ("mkbibparens",
+         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+    , ("mkbibbrackets",
+         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+    , ("autocap", spanWith nullAttr <$> tok)
+    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+    , ("bibstring",
+         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+           <$> braced)
+    , ("adddot", pure (str "."))
+    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+    , ("addabbrvspace", pure space)
+    , ("hyphen", pure (str "-"))
+    ]
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
@@ -766,9 +597,6 @@ ifToggle = do
                   report $ UndefinedToggle name' pos
   return ()
 
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
 ifstrequal :: (PandocMonad m, Monoid a) => LP m a
 ifstrequal = do
   str1 <- tok
@@ -789,13 +617,6 @@ coloredInline stylename = do
 ttfamily :: PandocMonad m => LP m Inlines
 ttfamily = code . stringify . toList <$> tok
 
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
-  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
-  if parseRaw
-     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
-     else fallback
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
@@ -846,25 +667,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
-  v <- braced
-  let refstr = untokenize v
-  updateState $ \st ->
-    st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
-  v <- braced
-  let refstr = untokenize v
-  return $ linkWith ("",[],[ ("reference-type", cls)
-                           , ("reference", refstr)])
-                    ("#" <> refstr)
-                    ""
-                    (inBrackets $ str refstr)
-
 lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
 lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..66014a77f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+  ( acronymCommands
+  , verbCommands
+  , charCommands
+  , nameCommands
+  , refCommands
+  , rawInlineOr
+  , listingsLanguage
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional)
+import Control.Monad (guard, mzero, mplus)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+                            manyTill, getInput, setInput, incSourceColumn,
+                            option)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+  if parseRaw
+     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+     else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+  v <- braced
+  let refstr = untokenize v
+  updateState $ \st ->
+    st{ sLastLabel = Just refstr }
+  return $ spanWith (refstr,[],[("label", refstr)])
+    $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+  v <- braced
+  let refstr = untokenize v
+  return $ linkWith ("",[],[ ("reference-type", cls)
+                           , ("reference", refstr)])
+                    ("#" <> refstr)
+                    ""
+                    (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  withVerbatimMode $
+    code . untokenize <$>
+      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+  t@(Tok pos toktype txt) <- anyTok
+  case T.findIndex (== stopchar) txt of
+       Nothing -> return t
+       Just i  -> do
+         let (t1, t2) = T.splitAt i txt
+         inp <- getInput
+         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+         return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+  case lookup "language" opts of
+    Nothing  -> Nothing
+    Just l   -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+  options <- option [] keyvals
+  let classes = maybeToList $ listingsLanguage options
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- untokenize <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  let stopchar = if marker == '{' then '}' else marker
+  withVerbatimMode $
+    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+      manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x    = x
+
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+  [ ("verb", doverb)
+  , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
+  , ("Verb", doverb)
+  ]
+
+
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+  [ ("ldots", lit "…")
+  , ("vdots", lit "\8942")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("sep", lit ",")
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  , ("qed", lit "\a0\x25FB")
+  , ("/", pure mempty) -- italic correction
+  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+                            guard $ not inTableCell
+                            optional rawopt
+                            spaces))
+  , (",", lit "\8198")
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("slash", lit "/")
+  -- fontawesome
+  , ("faCheck", lit "\10003")
+  , ("faClose", lit "\10007")
+  -- hyphenat
+  , ("bshyp", lit "\\\173")
+  , ("fshyp", lit "/\173")
+  , ("dothyp", lit ".\173")
+  , ("colonhyp", lit ":\173")
+  , ("hyp", lit "-")
+  ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+  [ ("figurename", doTerm Translations.Figure)
+  , ("prefacename", doTerm Translations.Preface)
+  , ("refname", doTerm Translations.References)
+  , ("bibname", doTerm Translations.Bibliography)
+  , ("chaptername", doTerm Translations.Chapter)
+  , ("partname", doTerm Translations.Part)
+  , ("contentsname", doTerm Translations.Contents)
+  , ("listfigurename", doTerm Translations.ListOfFigures)
+  , ("listtablename", doTerm Translations.ListOfTables)
+  , ("indexname", doTerm Translations.Index)
+  , ("abstractname", doTerm Translations.Abstract)
+  , ("tablename", doTerm Translations.Table)
+  , ("enclname", doTerm Translations.Encl)
+  , ("ccname", doTerm Translations.Cc)
+  , ("headtoname", doTerm Translations.To)
+  , ("pagename", doTerm Translations.Page)
+  , ("seename", doTerm Translations.See)
+  , ("seealsoname", doTerm Translations.SeeAlso)
+  , ("proofname", doTerm Translations.Proof)
+  , ("glossaryname", doTerm Translations.Glossary)
+  , ("lstlistingname", doTerm Translations.Listing)
+  ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+  [ ("label", rawInlineOr "label" dolabel)
+  , ("ref", rawInlineOr "ref" $ doref "ref")
+  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
+  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
+  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+  -- glossaries package
+  [ ("gls", doAcronym "short")
+  , ("Gls", doAcronym "short")
+  , ("glsdesc", doAcronym "long")
+  , ("Glsdesc", doAcronym "long")
+  , ("GLSdesc", doAcronym "long")
+  , ("acrlong", doAcronym "long")
+  , ("Acrlong", doAcronym "long")
+  , ("acrfull", doAcronym "full")
+  , ("Acrfull", doAcronym "full")
+  , ("acrshort", doAcronym "abbrv")
+  , ("Acrshort", doAcronym "abbrv")
+  , ("glspl", doAcronymPlural "short")
+  , ("Glspl", doAcronymPlural "short")
+  , ("glsdescplural", doAcronymPlural "long")
+  , ("Glsdescplural", doAcronymPlural "long")
+  , ("GLSdescplural", doAcronymPlural "long")
+  -- acronyms package
+  , ("ac", doAcronym "short")
+  , ("acf", doAcronym "full")
+  , ("acs", doAcronym "abbrv")
+  , ("acl", doAcronym "long")
+  , ("acp", doAcronymPlural "short")
+  , ("acfp", doAcronymPlural "full")
+  , ("acsp", doAcronymPlural "abbrv")
+  , ("aclp", doAcronymPlural "long")
+  , ("Ac", doAcronym "short")
+  , ("Acf", doAcronym "full")
+  , ("Acs", doAcronym "abbrv")
+  , ("Acl", doAcronym "long")
+  , ("Acp", doAcronymPlural "short")
+  , ("Acfp", doAcronymPlural "full")
+  , ("Acsp", doAcronymPlural "abbrv")
+  , ("Aclp", doAcronymPlural "long")
+  ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+  acro <- braced
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "singular+" <> form)])
+    $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+  acro <- braced
+  let plural = str "s"
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "plural+" <> form)]) $
+   mconcat [str $ untokenize acro, plural]]
+
+
-- 
cgit v1.2.3


From 044bc44fc621e421b74367765022f108494b4e2e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:08:02 -0800
Subject: Moved more into T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 84 +++------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 65 +++++++++++++++++++++++--
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 30 +++++++++++-
 3 files changed, 97 insertions(+), 82 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a27135fd2..4ec038b94 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -34,7 +34,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
+import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
@@ -58,11 +58,12 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
 import Text.Pandoc.Shared
@@ -350,8 +351,7 @@ unescapeURL = T.concat . go . T.splitOn "\\"
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands = M.unions
-  [ inlineLanguageCommands
-  , accentCommands tok
+  [ accentCommands tok
   , citationCommands inline
   , siunitxCommands tok
   , acronymCommands
@@ -359,6 +359,8 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , inlineLanguageCommands tok
+  , biblatexInlineCommands tok
   , rest ]
  where
   rest = M.fromList
@@ -373,12 +375,6 @@ inlineCommands = M.unions
     , ("texttt", ttfamily)
     , ("sout", extractSpaces strikeout <$> tok)
     , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-    , ("lq", return (str "‘"))
-    , ("rq", return (str "’"))
-    , ("textquoteleft", return (str "‘"))
-    , ("textquoteright", return (str "’"))
-    , ("textquotedblleft", return (str "“"))
-    , ("textquotedblright", return (str "”"))
     , ("textsuperscript", extractSpaces superscript <$> tok)
     , ("textsubscript", extractSpaces subscript <$> tok)
     , ("textbf", extractSpaces strong <$> tok)
@@ -447,11 +443,6 @@ inlineCommands = M.unions
     , ("toggletrue", braced >>= setToggle True)
     , ("togglefalse", braced >>= setToggle False)
     , ("iftoggle", try $ ifToggle >> inline)
-    -- biblatex misc
-    , ("RN", romanNumeralUpper)
-    , ("Rn", romanNumeralLower)
-    -- babel
-    , ("foreignlanguage", foreignlanguage)
     -- include
     , ("input", rawInlineOr "input" $ include "input")
     -- soul package
@@ -460,24 +451,6 @@ inlineCommands = M.unions
     , ("uline", underline <$> tok)
     -- plain tex stuff that should just be passed through as raw tex
     , ("ifdim", ifdim)
-    -- bibtex
-    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-    , ("mkbibparens",
-         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-    , ("mkbibbrackets",
-         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-    , ("autocap", spanWith nullAttr <$> tok)
-    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-    , ("bibstring",
-         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-           <$> braced)
-    , ("adddot", pure (str "."))
-    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-    , ("addabbrvspace", pure space)
-    , ("hyphen", pure (str "-"))
     ]
 
 lettrine :: PandocMonad m => LP m Inlines
@@ -502,26 +475,6 @@ alterStr :: (Text -> Text) -> Inline -> Inline
 alterStr f (Str xs) = Str (f xs)
 alterStr _ x = x
 
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
-  babelLang <- untokenize <$> braced
-  case babelLangToBCP47 babelLang of
-       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
-       _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
-  where
-    mk (polyglossia, bcp47Func) =
-      ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  let lang = renderLang $ bcp47Func o
-  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
 hyperlink :: PandocMonad m => LP m Inlines
 hyperlink = try $ do
   src <- untokenize <$> braced
@@ -542,31 +495,6 @@ hypertargetInline = try $ do
   ils <- grouped inline
   return $ spanWith (ref, [], []) ils
 
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
-  str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
-  str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
-  where
-    inBraces = do
-      symbol '{'
-      spaces
-      res <- parser
-      spaces
-      symbol '}'
-      return res
-    parser = do
-      s <- untokenize <$> many1 (satisfyTok isWordTok)
-      let (digits, rest) = T.span isDigit s
-      unless (T.null rest) $
-        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
-      safeRead digits
-
 newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
 newToggle name = do
   updateState $ \st ->
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 66014a77f..37c29188e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -13,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   , verbCommands
   , charCommands
   , nameCommands
+  , biblatexInlineCommands
   , refCommands
   , rawInlineOr
   , listingsLanguage
@@ -23,15 +24,17 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
-import Control.Applicative (optional)
-import Control.Monad (guard, mzero, mplus)
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option)
+                            option, many1)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -127,6 +130,31 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+  str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+  str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+  where
+    inBraces = do
+      symbol '{'
+      spaces
+      res <- parser
+      spaces
+      symbol '}'
+      return res
+    parser = do
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
+      let (digits, rest) = T.span isDigit s
+      unless (T.null rest) $
+        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+      safeRead digits
+
 
 
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -157,6 +185,12 @@ charCommands = M.fromList
   , ("{", lit "{")
   , ("}", lit "}")
   , ("qed", lit "\a0\x25FB")
+  , ("lq", return (str "‘"))
+  , ("rq", return (str "’"))
+  , ("textquoteleft", return (str "‘"))
+  , ("textquoteright", return (str "’"))
+  , ("textquotedblleft", return (str "“"))
+  , ("textquotedblright", return (str "”"))
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
@@ -185,6 +219,31 @@ charCommands = M.fromList
   , ("hyp", lit "-")
   ]
 
+biblatexInlineCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+  -- biblatex misc
+  [ ("RN", romanNumeralUpper)
+  , ("Rn", romanNumeralLower)
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
+  ]
+
 nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 nameCommands = M.fromList
   [ ("figurename", doTerm Translations.Figure)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index adbeaa6d4..24acbdbe4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,15 +15,43 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , inlineLanguageCommands
   )
 where
 import qualified Data.Map as M
+import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, setMeta, str)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+  babelLang <- untokenize <$> braced
+  case babelLangToBCP47 babelLang of
+       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
+       _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+  M.fromList $
+    ("foreignlanguage", foreignlanguage tok) :
+    (mk <$> M.toList polyglossiaLangToBCP47)
+  where
+    mk (polyglossia, bcp47Func) =
+      ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+               => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  let lang = renderLang $ bcp47Func o
+  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
 
 setDefaultLanguage :: PandocMonad m => LP m Blocks
 setDefaultLanguage = do
-- 
cgit v1.2.3


From da5e9e5956aae3ac83edef7831939553360b8964 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:22:42 -0800
Subject: Move enquote commands to T.P.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 22 ++-------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs |  2 --
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 34 +++++++++++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4ec038b94..a4261bbeb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -59,6 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+                                       enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
@@ -293,18 +294,6 @@ quoted' f starter ender = do
 lit :: Text -> LP m Inlines
 lit = pure . str
 
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
-  skipopts
-  let lang = mblang >>= babelLangToBCP47
-  let langspan = case lang of
-                      Nothing -> id
-                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
-  quoteContext <- sQuoteContext <$> getState
-  if starred || quoteContext == InDoubleQuote
-     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
-
 blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
@@ -359,6 +348,7 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , enquoteCommands tok
   , inlineLanguageCommands tok
   , biblatexInlineCommands tok
   , rest ]
@@ -418,14 +408,6 @@ inlineCommands = M.unions
                              src <- braced
                              mkImage options . unescapeURL . removeDoubleQuotes $
                                  untokenize src)
-    , ("enquote*", enquote True Nothing)
-    , ("enquote", enquote False Nothing)
-    -- foreignquote is supposed to use native quote marks
-    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-    , ("foreignquote", braced >>= enquote False . Just . untokenize)
-    -- hypehnquote uses regular quotes
-    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 37c29188e..8bdff58f7 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -155,8 +155,6 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
-
-
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 24acbdbe4..08e217bdb 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , enquoteCommands
   , inlineLanguageCommands
   )
 where
@@ -25,8 +26,37 @@ import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+                            withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+                            singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+        => LP m Inlines
+        -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+  skipopts
+  let lang = mblang >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
+  quoteContext <- sQuoteContext <$> getState
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+                => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+  [ ("enquote*", enquote tok True Nothing)
+  , ("enquote", enquote tok False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+  ]
 
 foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
 foreignlanguage tok = do
-- 
cgit v1.2.3


From 33e4c8dd6c2bbc8109880f43b379d074ceb38391 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:21:32 -0800
Subject: Remove T.P.Readers.LaTeX.Accent.

Incorporate accentCommands into T.P.Readers.LaTeX.Inline.
---
 pandoc.cabal                            |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 ---------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 71 ++++++++++++++++++++++++++++--
 4 files changed, 69 insertions(+), 83 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 211327642..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a4261bbeb..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -49,7 +49,6 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        inlineEnvironment,
@@ -64,6 +63,7 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         accentCommands,
                                          biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
deleted file mode 100644
index f8c53491c..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Accent.hs
+++ /dev/null
@@ -1,78 +0,0 @@
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE OverloadedStrings #-}
-module Text.Pandoc.Readers.LaTeX.Accent
-  ( accentCommands )
-where
-
-import Text.Pandoc.Class
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Builder as B
-import qualified Data.Map as M
-import Data.Text (Text)
-import Data.Maybe (fromMaybe)
-import Text.Pandoc.Parsing
-import qualified Data.Text as T
-import qualified Data.Text.Normalize as Normalize
-
-accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
-accentCommands tok =
-  let accent = accentWith tok
-      lit = pure . str
-  in M.fromList
-  [ ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
-  ]
-
-accentWith :: PandocMonad m
-           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
-accentWith tok combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 8bdff58f7..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Inline
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
@@ -12,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   ( acronymCommands
   , verbCommands
   , charCommands
+  , accentCommands
   , nameCommands
   , biblatexInlineCommands
   , refCommands
@@ -33,11 +35,12 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1)
+                            option, many1, try)
 import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
-import Data.Maybe (maybeToList)
+import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
 import qualified Text.Pandoc.Translations as Translations
 
 rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
@@ -155,6 +158,22 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space] -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       []      -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       _       -> return ils
+
+
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
@@ -163,7 +182,53 @@ verbCommands = M.fromList
   , ("Verb", doverb)
   ]
 
-
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+  in  M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
 
 charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 charCommands = M.fromList
-- 
cgit v1.2.3


From b569b0226d4bd5e0699077089d54fb03d4394b7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:47:17 -0800
Subject: Add T.P.Readers.LaTeX.Include.

---
 pandoc.cabal                             |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 ++++-----------------------
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 ++++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +++++++-
 4 files changed, 87 insertions(+), 52 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 090c28287..11d34a19a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,6 +626,7 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
+                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 552411db8..dd6c2a1fa 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,9 +38,8 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report,
-                                      setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
+                                      report, setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -61,6 +60,8 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
+                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -235,19 +236,10 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -406,8 +398,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL . removeDoubleQuotes $
-                                 untokenize src)
+                             mkImage options . unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -417,8 +409,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -698,39 +688,6 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
new file mode 100644
index 000000000..618a89284
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Include.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Include
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Include
+  ( readFileFromTexinputs
+  , insertIncluded
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Shared (splitTextBy)
+import System.FilePath (takeExtension, addExtension)
+import Control.Monad (when)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParseError))
+import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
+import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
+                            getPosition, addIncludeFile, getIncludeFiles,
+                            dropLatestIncludeFile)
+import Data.Maybe (fromMaybe)
+
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 7b8bca4af..3b37ee50e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try)
-import Data.Char (isDigit)
+                            option, many1, try, lookAhead)
+import Data.Char (isDigit, isLetter)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,6 +50,15 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -280,6 +289,8 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
+  -- xspace
+  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From 92ea8a0cb63241dbc8f89e73a359ac5efca2ab87 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 19:07:16 -0800
Subject: Revert "Add T.P.Readers.LaTeX.Include."

This reverts commit b569b0226d4bd5e0699077089d54fb03d4394b7d.

Memory usage improvement in compilation wasn't very significant.
---
 pandoc.cabal                             |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 +++++++++++++++++++++++----
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 --------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +-------
 4 files changed, 52 insertions(+), 87 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index 11d34a19a..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index dd6c2a1fa..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,8 +38,9 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      report, setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParsecError))
+                                      readFileFromDirs, report,
+                                      setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -60,8 +61,6 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
-import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
-                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -236,10 +235,19 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -398,8 +406,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL .
-                               removeDoubleQuotes $ untokenize src)
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -409,6 +417,8 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -688,6 +698,39 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
deleted file mode 100644
index 618a89284..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Include.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings     #-}
-{- |
-   Module      : Text.Pandoc.Readers.LaTeX.Include
-   Copyright   : Copyright (C) 2006-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
--}
-module Text.Pandoc.Readers.LaTeX.Include
-  ( readFileFromTexinputs
-  , insertIncluded
-  )
-where
-
-import qualified Data.Map as M
-import Data.Text (Text)
-import qualified Data.Text as T
-import Text.Pandoc.Shared (splitTextBy)
-import System.FilePath (takeExtension, addExtension)
-import Control.Monad (when)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error (PandocError(PandocParseError))
-import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
-import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
-                            getPosition, addIncludeFile, getIncludeFiles,
-                            dropLatestIncludeFile)
-import Data.Maybe (fromMaybe)
-
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 3b37ee50e..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try, lookAhead)
-import Data.Char (isDigit, isLetter)
+                            option, many1, try)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,15 +50,6 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -289,8 +280,6 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
-  -- xspace
-  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From 5aa73bd0a2820a0c89b5990dbe53abfdd5ade32d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 7 Mar 2021 15:49:02 -0800
Subject: LaTeX reader: handle table cells containing `&` in `\verb`.

Closes #7129.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs |  7 ++++++-
 test/command/7129.md                   | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7129.md

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 2ea9caf58..7833da081 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -134,6 +134,11 @@ parseTableRow :: PandocMonad m
               -> LP m Row
 parseTableRow blocks inline envname prefsufs = do
   notFollowedBy (spaces *> end_ envname)
+  -- contexts that can contain & that is not colsep:
+  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
+      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
+      canContainAmp _       = False
   -- add prefixes and suffixes in token stream:
   let celltoks (pref, suff) = do
         prefpos <- getPosition
@@ -142,7 +147,7 @@ parseTableRow blocks inline envname prefsufs = do
                      ((lookAhead (controlSeq "parbox") >>
                        void blocks) -- #5711
                       <|>
-                      (lookAhead (controlSeq "begin") >> void inline)
+                      (lookAhead (satisfyTok canContainAmp) >> void inline)
                       <|>
                       (lookAhead (symbol '$') >> void inline))
                   <|>
diff --git a/test/command/7129.md b/test/command/7129.md
new file mode 100644
index 000000000..fef4ca2c3
--- /dev/null
+++ b/test/command/7129.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f latex -t native
+\begin{tabular}{ll} \hline
+  FOO & BAR         \\ \hline
+  foo & \verb|b&r|  \\ \hline
+\end{tabular}
+^D
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignLeft,ColWidthDefault)
+ ,(AlignLeft,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "FOO"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "BAR"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "foo"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Code ("",[],[]) "b&r"]]]])]
+ (TableFoot ("",[],[])
+ [])]
+```
-- 
cgit v1.2.3


From aecbf8156eb7c36c4b41de27797e262c23728db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Apr 2021 21:28:48 -0700
Subject: Remove Text.Pandoc.BCP47 module.

[API change]

Use Lang from UnicodeCollation.Lang instead.
This is a richer implementation of BCP 47.
---
 pandoc.cabal                            |   1 -
 src/Text/Pandoc/App.hs                  |   6 +-
 src/Text/Pandoc/BCP47.hs                |  99 -------------
 src/Text/Pandoc/Citeproc.hs             |  10 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs      |  20 +--
 src/Text/Pandoc/Citeproc/Data.hs        |  12 +-
 src/Text/Pandoc/Class/CommonState.hs    |   2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |   4 +-
 src/Text/Pandoc/Readers/BibTeX.hs       |   9 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 241 ++++++++++++++++----------------
 src/Text/Pandoc/Writers/BibTeX.hs       |   2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  48 +++----
 src/Text/Pandoc/Writers/CslJson.hs      |   7 +-
 src/Text/Pandoc/Writers/Docx.hs         |   2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |   8 +-
 src/Text/Pandoc/Writers/ODT.hs          |   7 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |   8 +-
 src/Text/Pandoc/Writers/Shared.hs       |   2 +-
 20 files changed, 198 insertions(+), 294 deletions(-)
 delete mode 100644 src/Text/Pandoc/BCP47.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/pandoc.cabal b/pandoc.cabal
index b6cbb0d7a..8816767e9 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -601,7 +601,6 @@ library
                    Text.Pandoc.Asciify,
                    Text.Pandoc.Emoji,
                    Text.Pandoc.ImageSize,
-                   Text.Pandoc.BCP47,
                    Text.Pandoc.Class,
                    Text.Pandoc.Citeproc
   other-modules:   Text.Pandoc.App.CommandLineOptions,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6b45e5418..67d3cce7d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
@@ -200,8 +200,8 @@ convertWithOpts opts = do
                     Just f  -> readFileStrict f
 
     case lookupMetaString "lang" (optMetadata opts) of
-           ""      -> setTranslations $ Lang "en" "" "US" []
-           l       -> case parseBCP47 l of
+           ""      -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+           l       -> case parseLang l of
                            Left _   -> report $ InvalidLang l
                            Right l' -> setTranslations l'
 
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
deleted file mode 100644
index 1ecf0bf73..000000000
--- a/src/Text/Pandoc/BCP47.hs
+++ /dev/null
@@ -1,99 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing and rendering BCP47 language identifiers.
--}
-module Text.Pandoc.BCP47 (
-                       getLang
-                     , parseBCP47
-                     , Lang(..)
-                     , renderLang
-                     )
-where
-import Control.Monad (guard)
-import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.DocTemplates (FromContext(..))
-import qualified Data.Text as T
-import qualified Text.Parsec as P
-
--- | Represents BCP 47 language/country code.
-data Lang = Lang{ langLanguage :: T.Text
-                , langScript   :: T.Text
-                , langRegion   :: T.Text
-                , langVariants :: [T.Text] }
-                deriving (Eq, Ord, Show)
-
--- | Render a Lang as BCP 47.
-renderLang :: Lang -> T.Text
-renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
-                    ([langScript lang, langRegion lang] ++ langVariants lang))
-
--- | Parse a BCP 47 string as a Lang.  Currently we parse
--- extensions and private-use fields as "variants," even
--- though officially they aren't.
-parseBCP47 :: T.Text -> Either T.Text Lang
-parseBCP47 lang =
-  case P.parse bcp47 "lang" lang of
-       Right r -> Right r
-       Left e  -> Left $ T.pack $ show e
-  where bcp47 = do
-          language <- pLanguage
-          script <- P.option "" pScript
-          region <- P.option "" pRegion
-          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
-          P.eof
-          return Lang{   langLanguage = language
-                       , langScript = script
-                       , langRegion = region
-                       , langVariants = variants }
-        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
-        pLanguage = do
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toLower $ T.pack cs
-        pScript = P.try $ do
-          P.char '-'
-          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
-          xs <- P.count 3
-                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
-          return $ T.toLower $ T.pack (x:xs)
-        pRegion = P.try $ do
-          P.char '-'
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toUpper $ T.pack cs
-        pVariant = P.try $ do
-          P.char '-'
-          ds <- P.option "" (P.count 1 P.digit)
-          cs <- P.many1 asciiLetter
-          let var = ds ++ cs
-              lv = length var
-          guard $ if null ds
-                     then lv >= 5 && lv <= 8
-                     else lv == 4
-          return $ T.toLower $ T.pack var
-        pExtension = P.try $ do
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          let lcs = length cs
-          guard $ lcs >= 2 && lcs <= 8
-          return $ T.toLower $ T.pack cs
-        pPrivateUse = P.try $ do
-          P.char '-'
-          P.char 'x'
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          guard $ not (null cs) && length cs <= 8
-          let var = "x-" ++ cs
-          return $ T.toLower $ T.pack var
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index af302f782..c9f1806e4 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import qualified Text.Pandoc.BCP47 as BCP47
 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
@@ -630,13 +629,8 @@ removeFinalPeriod ils =
 
 bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
 bcp47LangToIETF bcplang =
-  case BCP47.parseBCP47 bcplang of
+  case parseLang bcplang of
     Left _ -> do
       report $ InvalidLang bcplang
       return Nothing
-    Right lang ->
-      return $ Just
-             $ Lang (BCP47.langLanguage lang)
-                    (if T.null (BCP47.langRegion lang)
-                        then Nothing
-                        else Just (BCP47.langRegion lang))
+    Right lang -> return $ Just lang
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index c0752dadc..510e56f9c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref =
                    [ (", " <>) <$> nameGiven name,
                      nameDroppingParticle name ]
 
-  mblang' = (parseLang <$> getVariableAsText "language") <|> mblang
+  mblang' = case getVariableAsText "language" of
+              Just l  -> either (const Nothing) Just $ parseLang l
+              Nothing -> mblang
 
   titlecase = case mblang' of
-                Just (Lang "en" _) -> titlecase'
+                Just lang | langLanguage lang == "en"
+                                   -> titlecase'
                 Nothing            -> titlecase'
                 _                  ->
                   case variant of
@@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref =
   renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
-defaultLang = Lang "en" (Just "US")
+defaultLang = Lang "en" Nothing (Just "US") [] [] []
 
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
@@ -351,9 +354,7 @@ itemToReference locale variant item = do
   bib item $ do
     let lang = fromMaybe defaultLang $ localeLanguage locale
     modify $ \st -> st{ localeLang = lang,
-                        untitlecase = case lang of
-                                           (Lang "en" _) -> True
-                                           _             -> False }
+                        untitlecase = langLanguage lang == "en" }
 
     id' <- asks identifier
     otherIds <- (Just <$> getRawField "ids")
@@ -711,7 +712,7 @@ itemToReference locale variant item = do
 
 
 bib :: Item -> Bib a -> BibParser a
-bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+bib entry m = fst <$> evalRWST m entry (BibState True defaultLang)
 
 resolveCrossRefs :: Variant -> [Item] -> [Item]
 resolveCrossRefs variant entries =
@@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
-resolveKey' lang@(Lang l _) k =
-  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+resolveKey' lang k =
+  case Map.lookup (langLanguage lang) biblatexStringMap >>=
+        Map.lookup (T.toLower k) of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 40430b0f5..388b9ba62 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ _ _ _ _ = parseLang
-                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
-        ls = T.lines $ TE.decodeUtf8 bs
-     in if length ls > 4
-           then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
-           else id
+    let ls = T.lines $ TE.decodeUtf8 bs
+     in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of
+          Right lang | length ls > 4
+            -> M.insert (langLanguage lang)
+                        (toStringMap $ map (T.splitOn "|") ls)
+          _ -> id
   toStringMap = foldr go' mempty
   go' [term, x, y] = M.insert term (x, y)
   go' _ = id
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 7e1735c2b..0fd094d99 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang)
+import UnicodeCollation.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 293a822a0..76f1fa32b 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError
 toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
 toLang Nothing = return Nothing
 toLang (Just s) =
-  case parseBCP47 s of
+  case parseLang s of
        Left _ -> do
          report $ InvalidLang s
          return Nothing
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 956b9f1f7..b82a81350 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
 readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 851756065..83caf742a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 08e217bdb..b92e6ab57 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import UnicodeCollation.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
@@ -99,133 +99,136 @@ setDefaultLanguage = do
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
index b9ae0c13a..95de6b71f 100644
--- a/src/Text/Pandoc/Writers/BibTeX.hs
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
 writeBibTeX' variant opts (Pandoc meta _) = do
   let mblang = case lookupMetaString "lang" meta of
                  "" -> Nothing
-                 t  -> Just $ parseLang t
+                 t  -> either (const Nothing) Just $ parseLang t
   let refs = case lookupMeta "references" meta of
                Just (MetaList xs) -> mapMaybe metaValueToReference xs
                _ -> []
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3c9975be8..f352c84bc 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47
+import UnicodeCollation.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs
 -- https://tools.ietf.org/html/bcp47#section-2.1
 -- http://wiki.contextgarden.net/Language_Codes
 fromBCP47' :: Maybe Lang -> Maybe Text
-fromBCP47' (Just (Lang "ar" _ "SY" _)     ) = Just "ar-sy"
-fromBCP47' (Just (Lang "ar" _ "IQ" _)     ) = Just "ar-iq"
-fromBCP47' (Just (Lang "ar" _ "JO" _)     ) = Just "ar-jo"
-fromBCP47' (Just (Lang "ar" _ "LB" _)     ) = Just "ar-lb"
-fromBCP47' (Just (Lang "ar" _ "DZ" _)     ) = Just "ar-dz"
-fromBCP47' (Just (Lang "ar" _ "MA" _)     ) = Just "ar-ma"
-fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
-fromBCP47' (Just (Lang "de" _ "DE" _)     ) = Just "de-de"
-fromBCP47' (Just (Lang "de" _ "AT" _)     ) = Just "de-at"
-fromBCP47' (Just (Lang "de" _ "CH" _)     ) = Just "de-ch"
-fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
-fromBCP47' (Just (Lang "en" _ "US" _)     ) = Just "en-us"
-fromBCP47' (Just (Lang "en" _ "GB" _)     ) = Just "en-gb"
-fromBCP47' (Just (Lang "grc"_  _ _)       ) = Just "agr"
-fromBCP47' (Just (Lang "el" _ _ _)        ) = Just "gr"
-fromBCP47' (Just (Lang "eu" _ _ _)        ) = Just "ba"
-fromBCP47' (Just (Lang "he" _ _ _)        ) = Just "il"
-fromBCP47' (Just (Lang "jp" _ _ _)        ) = Just "ja"
-fromBCP47' (Just (Lang "uk" _ _ _)        ) = Just "ua"
-fromBCP47' (Just (Lang "vi" _ _ _)        ) = Just "vn"
-fromBCP47' (Just (Lang "zh" _ _ _)        ) = Just "cn"
-fromBCP47' (Just (Lang l _ _ _)           ) = Just l
-fromBCP47' Nothing                          = Nothing
+fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _))    = Just "deo"
+fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _))    = Just "agr"
+fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _ _ _))           = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _ _ _))           = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _ _ _))           = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _ _ _))           = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _ _ _))           = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _ _ _))           = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _ _ _))           = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _ _ _))           = Just "cn"
+fromBCP47' (Just (Lang l _ _ _ _ _))              = Just l
+fromBCP47' Nothing                                = Nothing
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index a10def95e..395335667 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -34,15 +34,16 @@ import Control.Monad.Identity
 import Citeproc.Locale (getLocale)
 import Citeproc.CslJson
 import Text.Pandoc.Options (WriterOptions)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Aeson.Encode.Pretty         (Config (..), Indent (Spaces),
                                          NumberFormat (Generic),
                                          defConfig, encodePretty')
 
 writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCslJson _opts (Pandoc meta _) = do
-  let lang = maybe (Lang "en" (Just "US")) parseLang
-               (lookupMeta "lang" meta >>= metaValueToText)
+  let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] [])
+               (lookupMeta "lang" meta >>= metaValueToText >>=
+                  either (const Nothing) Just . parseLang)
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 20bcd0324..7781df8e7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import Text.Pandoc.BCP47 (getLang, renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 1c970e6ad..e99bad738 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 871b2692a..437b84120 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
 toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
 toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss")
 toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
 toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
 toPolyglossia (Lang "el" _ _ vars _ _)
@@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
 toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
 toPolyglossia (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars                   = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian")
 toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
+toPolyglossia x                               = (commonFromBcp47 x, "")
 
 -- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
@@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
 toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
+toBabel (Lang "el" _ _ vars _ _)
   | "polyton" `elem` vars               = "polutonikogreek"
 toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
 toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 101b236aa..6fd4cdeb4 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -16,6 +16,7 @@ import Codec.Archive.Zip
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
+import Data.Maybe (fromMaybe)
 import Data.Generics (everywhere', mkT)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
@@ -23,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
-                                   fixDisplayMath)
+                                   fixDisplayMath, getLang)
 import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
                            = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (langRegion lang)
+                           = Attr n (fromMaybe "" $ langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index cf42f2228..6c265090c 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
@@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of
 -- | Convert Pandoc document to string in OpenDocument format.
 writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeOpenDocument opts (Pandoc meta blocks) = do
-  let defLang = Lang "en" "US" "" []
+  let defLang = Lang "en" (Just "US") Nothing [] [] []
   lang <- case lookupMetaString "lang" meta of
             "" -> pure defLang
             s  -> fromMaybe defLang <$> toLang (Just s)
@@ -893,7 +893,7 @@ textStyleAttr m s
                     Map.insert "style:font-name-complex" "Courier New" $ m
     | Language lang <- s
                   = Map.insert "fo:language" (langLanguage lang) .
-                    Map.insert "fo:country" (langRegion lang) $ m
+                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
     | otherwise   = m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
@@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action =
   case lookup "lang" kvs of
        Nothing -> action
        Just l  ->
-         case parseBCP47 l of
+         case parseLang l of
               Right lang -> withTextStyle (Language lang) action
               Left _ -> do
                 report $ InvalidLang l
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fcb47bd5a..a09d18571 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -149,7 +149,7 @@ defField field val (Context m) =
     f _newval oldval = oldval
 
 -- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe Text
+getLang :: WriterOptions -> Meta -> Maybe T.Text
 getLang opts meta =
   case lookupContext "lang" (writerVariables opts) of
         Just s -> Just s
-- 
cgit v1.2.3


From a478a5c4c8753fd0bf272cd540ca197ae146a196 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 17 Apr 2021 11:47:54 -0700
Subject: Update to released unicode-collation, latest citeproc dev version.

Update citeproc test.
---
 cabal.project                           | 11 +----------
 src/Text/Pandoc/App.hs                  |  2 +-
 src/Text/Pandoc/Citeproc/Data.hs        |  2 +-
 src/Text/Pandoc/Class/CommonState.hs    |  2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs      |  2 +-
 src/Text/Pandoc/Writers/Docx.hs         |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ODT.hs          |  2 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |  2 +-
 stack.yaml                              |  5 ++---
 test/command/pandoc-citeproc-320a.md    |  8 ++++----
 16 files changed, 20 insertions(+), 30 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/cabal.project b/cabal.project
index fa17a20a6..77bc8ef88 100644
--- a/cabal.project
+++ b/cabal.project
@@ -5,14 +5,5 @@ flags: +embed_data_files
 source-repository-package
   type: git
   location: https://github.com/jgm/citeproc
-  tag: b42857be658b8f2649e989e061978e304986f853
+  tag: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 
-source-repository-package
-  type: git
-  location: https://github.com/jgm/unicode-collation
-  tag: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
-
--- source-repository-package
---  type: git
---  location: https://github.com/jgm/citeproc
---  tag: d44e24696ab444090d0e63e321c3a573f68b2e74
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 67d3cce7d..4e8c9f2ab 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 388b9ba62..848a83a1e 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import UnicodeCollation.Lang (Lang(..), parseLang)
+import Text.Collate.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 0fd094d99..796a4afd5 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang)
+import Text.Collate.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 76f1fa32b..7559cd7cd 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
+import Text.Collate.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 83caf742a..203dab83c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index b92e6ab57..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import UnicodeCollation.Lang (Lang(..), renderLang)
+import Text.Collate.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index f352c84bc..f14b1d894 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 1f10c9d04..02b141250 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -493,4 +493,4 @@ isSectionAttr DocBook4 ("os",_) = True
 isSectionAttr DocBook4 ("revision",_) = True
 isSectionAttr DocBook4 ("security",_) = True
 isSectionAttr DocBook4 ("vendor",_) = True
-isSectionAttr _ (_,_) = False
\ No newline at end of file
+isSectionAttr _ (_,_) = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 7781df8e7..749ad9a21 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e99bad738..8b1f3df1d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 437b84120..0ba68b74e 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang
     toBabel
   ) where
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 
 
 -- In environments \Arabic instead of \arabic is used
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 6fd4cdeb4..e4eb4fd25 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -24,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 6c265090c..34a3a4aa5 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
diff --git a/stack.yaml b/stack.yaml
index 159bf74b9..7bc33fa43 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -16,11 +16,10 @@ extra-deps:
 - texmath-0.12.2
 - random-1.2.0
 - xml-conduit-1.9.1.1
+- unicode-collation-0.1
 # - citeproc-0.3.0.9
 - git: https://github.com/jgm/citeproc
-  commit: b42857be658b8f2649e989e061978e304986f853
-- git: https://github.com/jgm/unicode-collation
-  commit: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
+  commit: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-17.5
diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md
index 1c3b47de0..79dacfa10 100644
--- a/test/command/pandoc-citeproc-320a.md
+++ b/test/command/pandoc-citeproc-320a.md
@@ -56,6 +56,10 @@ n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
 Uch, Ann. n.d.
 :::
 
+::: {#ref-item4 .csl-entry}
+'Udhrī, Jamīl al-. n.d.
+:::
+
 ::: {#ref-item1 .csl-entry}
 ʾUdhrī, Jamīl al-. n.d.
 :::
@@ -68,10 +72,6 @@ Uch, Ann. n.d.
 \'Udhrī, Jamīl al-. n.d.
 :::
 
-::: {#ref-item4 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
-:::
-
 ::: {#ref-item5 .csl-entry}
 'Udhrī, Jamīl al-. n.d.
 :::
-- 
cgit v1.2.3


From 6e45607f9948f45b2e94f54b4825b667ca0d5441 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 1 May 2021 13:17:45 -0700
Subject: Change reader types, allowing better tracking of source positions.

Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.

As a result, the readers had no way of knowing which file
was the source of any particular bit of text.  This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).

Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class.  A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`.  The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).

Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides.  Modified parsers to use a `Sources` as stream
[API change].

The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.

In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.

T.P.Error: showPos, do not print "-" as source name.
---
 .hlint.yaml                               |   1 +
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/App.hs                    |   8 +-
 src/Text/Pandoc/App/Opt.hs                |   2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs        |  14 +-
 src/Text/Pandoc/Error.hs                  |  39 ++-
 src/Text/Pandoc/Logging.hs                |   8 +-
 src/Text/Pandoc/Parsing.hs                | 474 ++++++++++++++++++------------
 src/Text/Pandoc/Readers.hs                |  97 +++---
 src/Text/Pandoc/Readers/BibTeX.hs         |  13 +-
 src/Text/Pandoc/Readers/CSV.hs            |  14 +-
 src/Text/Pandoc/Readers/CommonMark.hs     |  70 +++--
 src/Text/Pandoc/Readers/Creole.hs         |  11 +-
 src/Text/Pandoc/Readers/CslJson.hs        |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs        |  11 +-
 src/Text/Pandoc/Readers/DokuWiki.hs       |  17 +-
 src/Text/Pandoc/Readers/FB2.hs            |   9 +-
 src/Text/Pandoc/Readers/HTML.hs           |  33 ++-
 src/Text/Pandoc/Readers/Haddock.hs        |  12 +-
 src/Text/Pandoc/Readers/Ipynb.hs          |   8 +-
 src/Text/Pandoc/Readers/JATS.hs           |  11 +-
 src/Text/Pandoc/Readers/Jira.hs           |  16 +-
 src/Text/Pandoc/Readers/LaTeX.hs          |  23 +-
 src/Text/Pandoc/Readers/LaTeX/Citation.hs |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |  12 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs    |  15 +-
 src/Text/Pandoc/Readers/Man.hs            |  29 +-
 src/Text/Pandoc/Readers/Markdown.hs       |  49 +--
 src/Text/Pandoc/Readers/MediaWiki.hs      |  13 +-
 src/Text/Pandoc/Readers/Metadata.hs       |  26 +-
 src/Text/Pandoc/Readers/Muse.hs           |  17 +-
 src/Text/Pandoc/Readers/Native.hs         |  12 +-
 src/Text/Pandoc/Readers/OPML.hs           |  13 +-
 src/Text/Pandoc/Readers/Org.hs            |  11 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs    |   3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs    |   2 +-
 src/Text/Pandoc/Readers/RST.hs            |  76 ++---
 src/Text/Pandoc/Readers/Roff.hs           |  34 +--
 src/Text/Pandoc/Readers/TWiki.hs          |  12 +-
 src/Text/Pandoc/Readers/Textile.hs        | 172 +++++------
 src/Text/Pandoc/Readers/TikiWiki.hs       |  12 +-
 src/Text/Pandoc/Readers/Txt2Tags.hs       |  12 +-
 src/Text/Pandoc/Readers/Vimwiki.hs        |  19 +-
 src/Text/Pandoc/Shared.hs                 |   1 +
 src/Text/Pandoc/Sources.hs                | 195 ++++++++++++
 test/Tests/Readers/Markdown.hs            |   4 +-
 46 files changed, 1025 insertions(+), 617 deletions(-)
 create mode 100644 src/Text/Pandoc/Sources.hs

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/.hlint.yaml b/.hlint.yaml
index 350794803..ad0f7ddb9 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -9,6 +9,7 @@
 # Ignore some builtin hints
 #
 - ignore: {name: "Avoid lambda"}
+- ignore: {name: "Use bimap"}
 - ignore: {name: "Eta reduce"}
 - ignore: {name: "Evaluate"}
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
diff --git a/pandoc.cabal b/pandoc.cabal
index 8ea3aa681..de7951c54 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -513,6 +513,7 @@ library
                    Text.Pandoc.Options,
                    Text.Pandoc.Extensions,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.Sources,
                    Text.Pandoc.MediaBag,
                    Text.Pandoc.Error,
                    Text.Pandoc.Filter,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 96e4b5f47..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -160,9 +160,11 @@ convertWithOpts opts = do
                                     else optTabStop opts)
 
 
-    let readSources :: [FilePath] -> PandocIO Text
-        readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
-                              mapM readSource srcs
+    let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+        readSources srcs =
+          mapM (\fp -> do
+                   t <- readSource fp
+                   return (if fp == "-" then "" else fp, convertTabs t)) srcs
 
 
     outputSettings <- optToOutputSettings opts
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index c72f63464..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
     runEverything p =
-      runPure (P.readWithM p (def :: P.ParserState) "")
+      runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 510e56f9c..f6833000c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
 -- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant           -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+                 => Variant           -- ^ bibtex or biblatex
                  -> Locale            -- ^ Locale
                  -> (Text -> Bool)    -- ^ Filter on citation ids
-                 -> Text              -- ^ bibtex/biblatex text
+                 -> a                 -- ^ bibtex/biblatex text
                  -> Either ParseError [Reference Inlines]
 readBibtexString variant locale idpred contents = do
   case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" contents of
+           "" (toSources contents) of
           Left err -> Left err
           Right xs -> return xs
 
@@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
 
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
 
 data Item = Item{ identifier :: Text
                 , sourcePos  :: SourcePos
@@ -804,7 +805,7 @@ bibEntries = do
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
 
 bibComment :: BibParser ()
 bibComment = do
@@ -829,6 +830,9 @@ bibString = do
   updateState (\(l,m) -> (l, Map.insert k v m))
   return ()
 
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
 inBraces :: BibParser Text
 inBraces = do
   char '{'
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 8102f04cc..81eb41f85 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
+import Data.List (sortOn)
 import qualified Data.Text as T
+import Data.Ord (Down(..))
 import GHC.Generics (Generic)
 import Network.HTTP.Client (HttpException)
 import System.Exit (ExitCode (..), exitWith)
 import System.IO (stderr)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
 import Text.Printf (printf)
 import Text.Parsec.Error
 import Text.Parsec.Pos hiding (Line)
 import Text.Pandoc.Shared (tshow)
 import Citeproc (CiteprocError, prettyCiteprocError)
 
-type Input = Text
-
 data PandocError = PandocIOError Text IOError
                  | PandocHttpError Text HttpException
                  | PandocShouldNeverHappenError Text
                  | PandocSomeError Text
                  | PandocParseError Text
-                 | PandocParsecError Input ParseError
+                 | PandocParsecError Sources ParseError
                  | PandocMakePDFError Text
                  | PandocOptionError Text
                  | PandocSyntaxMapError Text
@@ -81,22 +82,28 @@ renderError e =
       "Please report this to pandoc's developers: " <> s
     PandocSomeError s -> s
     PandocParseError s -> s
-    PandocParsecError input err' ->
+    PandocParsecError (Sources inputs) err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
             errColumn = sourceColumn errPos
-            ls = T.lines input <> [""]
-            errorInFile = if length ls > errLine - 1
-                            then T.concat ["\n", ls !! (errLine - 1)
-                                          ,"\n", T.replicate (errColumn - 1) " "
-                                          ,"^"]
-                        else ""
-        in  "\nError at " <> tshow  err' <>
-                     -- if error comes from a chunk or included file,
-                     -- then we won't get the right text this way:
-                     if sourceName errPos == "source"
-                        then errorInFile
-                        else ""
+            errFile = sourceName errPos
+            errorInFile =
+              case sortOn (Down . sourceLine . fst)
+                      [ (pos,t)
+                        | (pos,t) <- inputs
+                        , sourceName pos == errFile
+                        , sourceLine pos <= errLine
+                      ] of
+                []  -> ""
+                ((pos,txt):_) ->
+                  let ls = T.lines txt <> [""]
+                      ln = errLine - sourceLine pos
+                   in if length ls > ln - 1
+                         then T.concat ["\n", ls !! (ln - 1)
+                                       ,"\n", T.replicate (errColumn - 1) " "
+                                       ,"^"]
+                         else ""
+        in  "\nError at " <> tshow  err' <> errorInFile
     PandocMakePDFError s -> s
     PandocOptionError s -> s
     PandocSyntaxMapError s -> s
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index efd2188f1..8c7292b69 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -241,9 +241,11 @@ instance ToJSON LogMessage where
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
      show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
-  where sn = if sourceName pos == "source" || sourceName pos == ""
-                then ""
-                else sourceName pos ++ " "
+  where
+    sn' = sourceName pos
+    sn = if sn' == "source" || sn' == "" || sn' == "-"
+            then ""
+            else sn' ++ " "
 
 encodeLogMessages :: [LogMessage] -> BL.ByteString
 encodeLogMessages ms =
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 37ab0adaa..11c4c7a62 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,6 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
 import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
                   isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
@@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -399,11 +460,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
 nonspaceChar = satisfy (not . isSpaceChar)
 
 isSpaceChar :: Char -> Bool
@@ -451,21 +518,24 @@ isSpaceChar '\r' = True
 isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -473,18 +543,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -493,7 +571,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -501,39 +580,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos " chunk"
+  setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -542,7 +623,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -551,7 +632,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -606,7 +687,7 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
@@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
@@ -677,7 +758,7 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
@@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
   many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
             (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -733,7 +814,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -746,7 +827,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -758,30 +839,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -794,19 +882,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -815,7 +903,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -834,37 +922,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
   ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
   ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -876,7 +964,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -885,7 +973,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -896,7 +984,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -919,10 +1007,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -932,11 +1020,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -944,7 +1032,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1050,7 +1139,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
 gridTableHeader True _ = do
   optional blanklines
   dashes <- gridDashedLines '-'
@@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: Monad m
-          => ParserT Text st m a -- ^ parser
-          -> st                  -- ^ initial state
-          -> Text                -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
 testStringWith parser str = UTF8.putStrLn $ tshow $
-                            readWith parser defaultParserState str
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
   choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 singleQuoted inlineParser = do
@@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
      (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
    <|> pure "\8217"
 
-doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 doubleQuoted inlineParser = do
@@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1449,7 +1548,7 @@ singleQuoteStart = do
     charOrRef "'\8216\145"
     void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
@@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
 
 doubleQuoteStart :: (HasLastStrPosition st,
                      HasQuoteContext st m,
-                     Stream s m Char)
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
@@ -1465,21 +1564,21 @@ doubleQuoteStart = do
   try $ do charOrRef "\"\8220\147"
            void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
 
-doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 doubleCloseQuote = B.str "\8221" <$ char '"'
 
-ellipses :: Stream s m Char
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1506,7 +1605,7 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
         => ParserT s st m (Bool, Text)
 citeKey = try $ do
   guard =<< notAfterString
@@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
 insertIncludedFile blocks totoks dirs f =
   runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
 
+-- TODO: replace this with something using addToSources.
 -- | Parse content of include file as future blocks. Circular includes result in
 -- an @PandocParseError@.
 insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
+                    => ParserT Sources st m (Future st Blocks)
                     -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
+                    -> ParserT Sources st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7ae9db34f..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE MonoLocalBinds      #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
@@ -72,6 +73,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.Extensions
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
@@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.JATS (readJATS)
 import Text.Pandoc.Readers.Jira (readJira)
 import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.Muse
 import Text.Pandoc.Readers.Native
@@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+                                ReaderOptions -> a -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
 
 -- | Association list of formats and readers.
 readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native"       , TextReader readNative)
-           ,("json"         , TextReader readJSON)
-           ,("markdown"     , TextReader readMarkdown)
-           ,("markdown_strict" , TextReader readMarkdown)
-           ,("markdown_phpextra" , TextReader readMarkdown)
-           ,("markdown_github" , TextReader readMarkdown)
-           ,("markdown_mmd",  TextReader readMarkdown)
-           ,("commonmark"   , TextReader readCommonMark)
-           ,("commonmark_x" , TextReader readCommonMark)
-           ,("creole"       , TextReader readCreole)
-           ,("dokuwiki"     , TextReader readDokuWiki)
-           ,("gfm"          , TextReader readCommonMark)
-           ,("rst"          , TextReader readRST)
-           ,("mediawiki"    , TextReader readMediaWiki)
-           ,("vimwiki"      , TextReader readVimwiki)
-           ,("docbook"      , TextReader readDocBook)
-           ,("opml"         , TextReader readOPML)
-           ,("org"          , TextReader readOrg)
-           ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
-           ,("html"         , TextReader readHtml)
-           ,("jats"         , TextReader readJATS)
-           ,("jira"         , TextReader readJira)
-           ,("latex"        , TextReader readLaTeX)
-           ,("haddock"      , TextReader readHaddock)
-           ,("twiki"        , TextReader readTWiki)
-           ,("tikiwiki"     , TextReader readTikiWiki)
-           ,("docx"         , ByteStringReader readDocx)
-           ,("odt"          , ByteStringReader readOdt)
-           ,("t2t"          , TextReader readTxt2Tags)
-           ,("epub"         , ByteStringReader readEPUB)
-           ,("muse"         , TextReader readMuse)
-           ,("man"          , TextReader readMan)
-           ,("fb2"          , TextReader readFB2)
-           ,("ipynb"        , TextReader readIpynb)
-           ,("csv"          , TextReader readCSV)
-           ,("csljson"      , TextReader readCslJson)
-           ,("bibtex"       , TextReader readBibTeX)
-           ,("biblatex"     , TextReader readBibLaTeX)
+readers = [("native"       , TextReader readNative)
+          ,("json"         , TextReader readJSON)
+          ,("markdown"     , TextReader readMarkdown)
+          ,("markdown_strict" , TextReader readMarkdown)
+          ,("markdown_phpextra" , TextReader readMarkdown)
+          ,("markdown_github" , TextReader readMarkdown)
+          ,("markdown_mmd",  TextReader readMarkdown)
+          ,("commonmark"   , TextReader readCommonMark)
+          ,("commonmark_x" , TextReader readCommonMark)
+          ,("creole"       , TextReader readCreole)
+          ,("dokuwiki"     , TextReader readDokuWiki)
+          ,("gfm"          , TextReader readCommonMark)
+          ,("rst"          , TextReader readRST)
+          ,("mediawiki"    , TextReader readMediaWiki)
+          ,("vimwiki"      , TextReader readVimwiki)
+          ,("docbook"      , TextReader readDocBook)
+          ,("opml"         , TextReader readOPML)
+          ,("org"          , TextReader readOrg)
+          ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
+          ,("html"         , TextReader readHtml)
+          ,("jats"         , TextReader readJATS)
+          ,("jira"         , TextReader readJira)
+          ,("latex"        , TextReader readLaTeX)
+          ,("haddock"      , TextReader readHaddock)
+          ,("twiki"        , TextReader readTWiki)
+          ,("tikiwiki"     , TextReader readTikiWiki)
+          ,("docx"         , ByteStringReader readDocx)
+          ,("odt"          , ByteStringReader readOdt)
+          ,("t2t"          , TextReader readTxt2Tags)
+          ,("epub"         , ByteStringReader readEPUB)
+          ,("muse"         , TextReader readMuse)
+          ,("man"          , TextReader readMan)
+          ,("fb2"          , TextReader readFB2)
+          ,("ipynb"        , TextReader readIpynb)
+          ,("csv"          , TextReader readCSV)
+          ,("csljson"      , TextReader readCslJson)
+          ,("bibtex"       , TextReader readBibTeX)
+          ,("biblatex"     , TextReader readBibLaTeX)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
@@ -173,9 +176,13 @@ getReader s =
                      return (r, exts)
 
 -- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
-         => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
-  case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readJSON _ s =
+  case eitherDecode' . BL.fromStrict . UTF8.fromText
+                     . sourcesToText . toSources $ s of
        Right doc -> return doc
-       Left e    -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+       Left e    -> throwError $ PandocParseError ("JSON parse error: "
+                                                   <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
   mblangEnv <- lookupEnv "LANG"
   let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
-                            runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
 import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
 readCommonMark opts s
-  | isEnabled Ext_yaml_metadata_block opts
-  , "---" `T.isPrefixOf` s = do
-       let metaValueParser = do
-             inp <- option "" $ take1WhileP (const True)
-             case runIdentity
-                    (commonmarkWith (specFor opts) "metadata value" inp) of
-                    Left _ -> mzero
-                    Right (Cm bls :: Cm () Blocks)
-                             -> return $ return $ B.toMetaValue bls
-       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
-                             pos <- getPosition
-                             return (meta, pos))
-                         defaultParserState "YAML metadata" s
-       case res of
-         Left _ -> readCommonMarkBody opts s
-         Right (meta, pos) -> do
-           let dropLines 0 = id
-               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
-           let metaLines = sourceLine pos - 1
-           let body = T.replicate metaLines "\n" <> dropLines metaLines s
-           Pandoc _ bs <- readCommonMarkBody opts body
-           return $ Pandoc (runF meta defaultParserState) bs
-  | otherwise = readCommonMarkBody opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          pos <- getPosition
+                          return (meta, pos))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, pos) -> do
+        -- strip off metadata section and parse body
+        let body = dropWhile (\t -> tokPos t < pos) toks
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
 
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
   | isEnabled Ext_sourcepos opts =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
   | otherwise =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 
@@ -539,11 +540,15 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents
-              (TL.fromStrict . handleInstructions $ crFilter inp)
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.XML.Light
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
     Left msg -> throwError $ PandocXMLError "" msg
     Right el ->  do
       (bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (
-    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
     htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
   let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _                = False
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
 import Data.List (intersperse)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -52,10 +53,14 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
-            parseXMLContents (TL.fromStrict $ crFilter inp)
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -641,7 +642,7 @@ opt = do
   parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 -- block elements:
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
       runParserT (mconcat <$> many inline) st "bracketed option" toks
     case parsed of
       Right result -> return result
-      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
   , untokenize
   , untoken
   , totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
 
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                void $ count (T.length (untokenize toks')) anyChar
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
 >   let t = T.pack s in untokenize (tokenize "random" t) == t
 -}
 
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
    Copyright   : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
 import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
 
 type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
         setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -146,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1390,7 +1391,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
     s <- rawLaTeXInline
     return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
 import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text st m (Future st Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
 yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text st m (Future st [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
 normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                   => ParserT Text st m (Future st MetaValue)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text st m (Future st MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
   | otherwise = Nothing
 
 yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                => ParserT Text st m (Future st MetaValue)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text st m (Future st MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
 yamlMap :: (PandocMonad m, HasLastStrPosition st)
-        => ParserT Text st m (Future st MetaValue)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text st m (Future st (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
 
 -- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
 yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st MetaValue)
-              -> ParserT Text st m (Future st Meta)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
 yamlMetaBlock parser = try $ do
   string "---"
   blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
   optional blanklines
   yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
 
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
@@ -46,10 +47,14 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
                      Left msg -> throwError $ PandocXMLError "" msg
                      Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -456,7 +463,6 @@ includeDirective top fields body = do
   let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
   let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
   oldPos <- getPosition
-  oldInput <- getInput
   containers <- stateContainers <$> getState
   when (f `elem` containers) $
     throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
        Nothing   -> case lookup "literal" fields of
                          Just _  -> return $ B.rawBlock "rst" contents'
                          Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
+                           addToSources (initialPos (T.unpack f))
+                              (contents' <> "\n")
                            updateState $ \s -> s{ stateContainers =
                                          tail $ stateContainers s }
-                           return bs
+                           return mempty
 
 
 --
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1250,13 +1254,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
 attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
                      (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index e389c1727..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
                        (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                        <> go (T.drop 1 s2)
 
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
 -- | Strip out DOS line endings.
 crFilter :: T.Text -> T.Text
 crFilter = T.filter (/= '\r')
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Sources
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+  ( Sources(..)
+  , ToSources(..)
+  , UpdateSourcePos(..)
+  , sourcesToText
+  , initialSourceName
+  , addToSources
+  , ensureFinalNewlines
+  , addToInput
+  , satisfy
+  , oneOf
+  , noneOf
+  , anyChar
+  , char
+  , string
+  , newline
+  , space
+  , spaces
+  , letter
+  , digit
+  , hexDigit
+  , alphaNum
+  )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions.  It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+  deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+  uncons (Sources []) = return Nothing
+  uncons (Sources ((pos,t):rest)) =
+    case T.uncons t of
+      Nothing -> uncons (Sources rest)
+      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+  toSources :: a -> Sources
+
+instance ToSources Text where
+  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+  toSources = Sources
+            . map (\(fp,t) ->
+                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+  toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+  curpos <- P.getPosition
+  Sources xs <- P.getInput
+  let xs' = case xs of
+               [] -> []
+               ((_,t'):rest) -> (curpos,t'):rest
+  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+                    -> Sources
+                    -> Sources
+ensureFinalNewlines n (Sources xs) =
+  case NonEmpty.nonEmpty xs of
+    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+    Just lst ->
+      case NonEmpty.last lst of
+        (spos, t) ->
+          case T.length (T.takeWhileEnd (=='\n') t) of
+            len | len >= n -> Sources xs
+                | otherwise -> Sources (NonEmpty.init lst ++
+                                        [(spos,
+                                          t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+  updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+   updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+   updateSourcePos pos c sources =
+     case sources of
+       Sources [] -> updatePosChar pos c
+       Sources ((_,t):(pos',_):_)
+         | T.null t  -> pos'
+       Sources _ ->
+           case c of
+             '\n' -> incSourceLine (setSourceColumn pos 1) 1
+             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+             _    -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+  Sources xs <- P.getInput
+  case xs of
+    [] -> P.setInput $ Sources [(initialPos "",t)]
+    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+  matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+     => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+hexDigit = satisfy isHexDigit
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 6e38da21a..f055ab197 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
         , testGroup "lhs"
           [ test (purely $ readMarkdown def{ readerExtensions = enableExtension
                        Ext_literate_haskell pandocExtensions })
-              "inverse bird tracks and html" $
-              "> a\n\n< b\n\n<div>\n"
+              "inverse bird tracks and html"
+              $ ("> a\n\n< b\n\n<div>\n" :: Text)
               =?> codeBlockWith ("",["haskell","literate"],[]) "a"
                   <>
                   codeBlockWith ("",["haskell"],[]) "b"
-- 
cgit v1.2.3


From 5736b331d8ecaa12cc3e2712211ada37c665a93a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 May 2021 16:14:49 -0700
Subject: LaTeX reader: better support for `\xspace`.

Previously we only supported it in inline contexts; now
we support it in all contexts, including math.

Partially addresses #7299.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 12 ------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 21 +++++++++++++++++++--
 test/command/4442.md                     |  2 +-
 test/command/7299.md                     | 23 +++++++++++++++++++++++
 4 files changed, 43 insertions(+), 15 deletions(-)
 create mode 100644 test/command/7299.md

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f90d562ae..2ace18d1b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -232,16 +232,6 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -417,8 +407,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 35ce3509d..b6804a825 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -464,7 +464,7 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st) $
     getInput >>= doMacros' 1 >>= setInput
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
@@ -526,7 +526,7 @@ doMacros' n inp =
         $ throwError $ PandocMacroLoop name
       macros <- sMacros <$> getState
       case M.lookup name macros of
-           Nothing -> mzero
+           Nothing -> trySpecialMacro name ts
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
@@ -554,6 +554,23 @@ doMacros' n inp =
                    ExpandWhenUsed    -> doMacros' (n' + 1) result
                    ExpandWhenDefined -> return result
 
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+  ts' <- doMacros' 1 ts
+  case ts' of
+    Tok pos Word t : _
+      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+    _ -> return ts'
+trySpecialMacro _ _ = mzero
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+  case T.uncons t of
+       Just (c, _) | isAlphaNum c -> True
+       _           -> False
+
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 
diff --git a/test/command/4442.md b/test/command/4442.md
index 8574fe759..447073406 100644
--- a/test/command/4442.md
+++ b/test/command/4442.md
@@ -5,5 +5,5 @@
 ^D
 \newcommand{\myFruit}{Mango\xspace}
 
-Mango\xspace is the king of fruits.
+Mango is the king of fruits.
 ```
diff --git a/test/command/7299.md b/test/command/7299.md
new file mode 100644
index 000000000..0847c40ce
--- /dev/null
+++ b/test/command/7299.md
@@ -0,0 +1,23 @@
+```
+% pandoc -f latex -t plain
+$1-{\ensuremath{r}\xspace}$
+^D
+1 − r
+```
+
+```
+% pandoc -f latex -t plain
+\newcommand{\foo}{Foo\xspace}
+
+$\text{\foo bar}$
+^D
+Foo bar
+```
+
+```
+% pandoc -f latex -t plain
+a\xspace b
+^D
+a b
+```
+
-- 
cgit v1.2.3


From 8437a4a002210a33ee721c58f5f95605898a8e1b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 08:15:48 -0700
Subject: LaTeX reader: support `\pm` in `SI{..}`.

Closes #6620.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 4 +++-
 test/command/6620.md                     | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1952f4e1a..c4fb06700 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,7 +45,9 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize <$> braced)
+doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
+  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
+        convertPM t = t
 
 tonum :: Text -> Inlines
 tonum value =
diff --git a/test/command/6620.md b/test/command/6620.md
index 314200d30..00643e57c 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -7,9 +7,12 @@
 \SI{0.135(21)}{\m}
 
 \SI{12.3(60)}{\m}
+
+\SI{10.0 \pm 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From a366bd6abccd18a49f0033de850ddd53830cc5ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:03:29 -0700
Subject: LaTeX reader: Fix parsing of `+-` in siunitx numbers.

See #6658.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 11 +++++++----
 test/command/6620.md                     |  5 ++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index c4fb06700..5e140ef7a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,9 +45,7 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
-  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
-        convertPM t = t
+doSInum = skipopts *> (tonum . untokenize <$> braced)
 
 tonum :: Text -> Inlines
 tonum value =
@@ -74,12 +72,16 @@ parseNumPart =
   parseDecimalNum <|>
   parseComma <|>
   parsePlusMinus <|>
+  parsePM <|>
   parseI <|>
   parseExp <|>
   parseX <|>
   parseSpace
  where
-  parseDecimalNum = do
+  parseDecimalNum, parsePlusMinus, parsePM,
+    parseComma, parseI, parseX,
+    parseExp, parseSpace :: Parser Text () Inlines
+  parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
     basenum <- (pref <>) . T.pack
                 <$> many1 (satisfy (\c -> isDigit c || c == '.'))
@@ -100,6 +102,7 @@ parseNumPart =
                                                | otherwise -> "." <> t
   parseComma = str "." <$ char ','
   parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
   parseParens =
     char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
   parseI = str "i" <$ char 'i'
diff --git a/test/command/6620.md b/test/command/6620.md
index 00643e57c..b19cd1972 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,10 +9,13 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
+
+\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
-<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From 5dc917da3ed997c6e48e22bde242f0f8e1ae5333 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:11:26 -0700
Subject: LaTeX reader siunitx: add leading 0 to numbers starting with .

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 7 +++++--
 test/command/6620.md                     | 3 ---
 test/command/6658.md                     | 9 +++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 test/command/6658.md

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 5e140ef7a..1474329d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -83,8 +83,11 @@ parseNumPart =
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
-    basenum <- (pref <>) . T.pack
-                <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+    let basenum = pref <> T.pack
+                    (case basenum' of
+                      '.':_ -> '0':basenum'
+                      _ -> basenum')
     uncertainty <- option mempty $ T.pack <$> parseParens
     if T.null uncertainty
        then return $ str basenum
diff --git a/test/command/6620.md b/test/command/6620.md
index b19cd1972..e448ca6b5 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,13 +9,10 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
-
-\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
 <p>10.0 ± 3.3 ms</p>
-<p>10.0 ± 3.3 ms</p>
 ```
diff --git a/test/command/6658.md b/test/command/6658.md
new file mode 100644
index 000000000..bcd174465
--- /dev/null
+++ b/test/command/6658.md
@@ -0,0 +1,9 @@
+```
+pandoc -f latex
+\SI{10.0 +- 3.3}{\ms}
+
+\num{.3e45}
+^D
+<p>10.0 ± 3.3 ms</p>
+<p>0.3 × 10<sup>45</sup></p>
+```
-- 
cgit v1.2.3


From bc5058234feab7646f58dc01379b4eadf95bf411 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:18:23 -0700
Subject: LaTeX reader sinuitx: fix + sign on ang.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 9 ++++++---
 test/command/6658.md                     | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1474329d4..72f81dcde 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -118,11 +118,14 @@ doSIang :: PandocMonad m => LP m Inlines
 doSIang = do
   skipopts
   ps <- T.splitOn ";" . untokenize <$> braced
+  let dropPlus t = case T.uncons t of
+                     Just ('+',t') -> t'
+                     _ -> t
   case ps ++ repeat "" of
     (d:m:s:_) -> return $
-      (if T.null d then mempty else str d <> str "\xb0") <>
-      (if T.null m then mempty else str m <> str "\x2032") <>
-      (if T.null s then mempty else str s <> str "\x2033")
+      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
     _ -> return mempty
 
 -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
diff --git a/test/command/6658.md b/test/command/6658.md
index bcd174465..0a8512f85 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -3,7 +3,10 @@ pandoc -f latex
 \SI{10.0 +- 3.3}{\ms}
 
 \num{.3e45}
+
+\ang{+10;+3;}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
+<p>10°3′</p>
 ```
-- 
cgit v1.2.3


From 4e990a8cf9207f2315d6a55a45c93c2857663316 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 10:12:44 -0700
Subject: LaTeX/siunitx: fix parsing of `\cubic` etc. See #6658.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 85 +++++++++++++++++++-------------
 test/command/6658.md                     |  3 ++
 2 files changed, 53 insertions(+), 35 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 72f81dcde..63ab7267d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
   ( siunitxCommands )
@@ -154,40 +155,55 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
-  Tok _ (CtrlSeq name) _ <- anyControlSeq
-  case name of
-    "square" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "2"
-    "cubic" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "3"
-    "raisetothe" -> do
-       n <- tok
-       unit <- siUnit tok
-       return $ unit <> superscript n
-    _ ->
-       case M.lookup name siUnitMap of
-            Just il ->
-              option il $
-                choice
-                 [ (il <> superscript "2") <$ controlSeq "squared"
-                 , (il <> superscript "3") <$ controlSeq "cubed"
-                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
-                 ]
-            Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
-         return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
-         return (str t))
+siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
+siUnit tok = mconcat <$> many1 siUnitPart
+ where
+  siUnitPart :: LP m Inlines
+  siUnitPart =
+        (siPrefix <*> siUnitPart)
+    <|> (do u <- siBase <|> tok
+            option u $ siSuffix <*> pure u)
+  siPrefix :: LP m (Inlines -> Inlines)
+  siPrefix =
+       (do _ <- controlSeq "per"
+           skipopts -- TODO handle option
+           return (str "/" <>))
+   <|> (do _ <- controlSeq "square"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubic"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "raisetothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siSuffix :: LP m (Inlines -> Inlines)
+  siSuffix =
+       (do _ <- controlSeq "squared"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubed"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "tothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siBase :: LP m Inlines
+  siBase = mconcat <$> many1
+    ((try
+       (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+           case M.lookup name siUnitMap of
+              Just il -> pure il
+              Nothing -> fail "not a unit command"))
+    <|> (do Tok _ Word t <- satisfyTok isWordTok
+            return $ str t)
+    <|> (symbol '^' *> (superscript <$> tok))
+    <|> (symbol '_' *> (subscript <$> tok))
+    <|> (str "\xa0" <$ symbol '.')
+    <|> (str "\xa0" <$ symbol '~')
+     )
 
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
@@ -347,7 +363,6 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("per", str "/")
   , ("peta", str "P")
   , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
diff --git a/test/command/6658.md b/test/command/6658.md
index 0a8512f85..549610992 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -5,8 +5,11 @@ pandoc -f latex
 \num{.3e45}
 
 \ang{+10;+3;}
+
+\si{\gram\per\cubic\centi\metre}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
 <p>10°3′</p>
+<p>g/cm<sup>3</sup></p>
 ```
-- 
cgit v1.2.3


From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 12:06:15 -0700
Subject: LaTeX reader: More siunitx improvements.  Closes #6658.

There's still one slight divergence from the siunitx behavior:
we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm
not going to worry about that.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |   3 +-
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++----------
 test/command/6658.md                     |  72 ++++++++++++++--
 3 files changed, 161 insertions(+), 52 deletions(-)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index b6804a825..1c77eb299 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -806,7 +806,8 @@ withRaw parser = do
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 63ab7267d..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -10,27 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
-
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
 
 siunitxCommands :: PandocMonad m
                  => LP m Inlines -> M.Map Text (LP m Inlines)
 siunitxCommands tok = M.fromList
-  [ ("si", skipopts *> dosi tok)
+  [ ("si", dosi tok)
   , ("SI", doSI tok)
   , ("SIrange", doSIrange True tok)
   , ("numrange", doSIrange False tok)
   , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
   , ("num", doSInum)
   , ("ang", doSIang)
   ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -65,9 +70,29 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
@@ -83,7 +108,7 @@ parseNumPart =
     parseComma, parseI, parseX,
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
     basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
     let basenum = pref <> T.pack
                     (case basenum' of
@@ -155,20 +180,30 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = mconcat <$> many1 siUnitPart
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
  where
   siUnitPart :: LP m Inlines
-  siUnitPart =
-        (siPrefix <*> siUnitPart)
-    <|> (do u <- siBase <|> tok
-            option u $ siSuffix <*> pure u)
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
   siPrefix :: LP m (Inlines -> Inlines)
   siPrefix =
-       (do _ <- controlSeq "per"
-           skipopts -- TODO handle option
-           return (str "/" <>))
-   <|> (do _ <- controlSeq "square"
+       (do _ <- controlSeq "square"
            skipopts
            return (<> superscript "2"))
    <|> (do _ <- controlSeq "cubic"
@@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "raisetothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
   siSuffix :: LP m (Inlines -> Inlines)
   siSuffix =
@@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "tothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
   siBase :: LP m Inlines
-  siBase = mconcat <$> many1
+  siBase =
     ((try
        (do Tok _ (CtrlSeq name) _ <- anyControlSeq
-           case M.lookup name siUnitMap of
-              Just il -> pure il
-              Nothing -> fail "not a unit command"))
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
     <|> (do Tok _ Word t <- satisfyTok isWordTok
             return $ str t)
-    <|> (symbol '^' *> (superscript <$> tok))
-    <|> (symbol '_' *> (subscript <$> tok))
-    <|> (str "\xa0" <$ symbol '.')
-    <|> (str "\xa0" <$ symbol '~')
      )
 
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
+
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
   [ ("fg", str "fg")
@@ -303,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -311,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -363,24 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
diff --git a/test/command/6658.md b/test/command/6658.md
index 549610992..96700c8fe 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -1,15 +1,75 @@
 ```
-pandoc -f latex
-\SI{10.0 +- 3.3}{\ms}
+pandoc -f latex -t html
+\num{12345,67890}
+
+\num{1+-2i}
 
 \num{.3e45}
 
-\ang{+10;+3;}
+\num{1.654 x 2.34 x 3.430}
+
+\si{kg.m.s^{-1}}
+
+\si{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second}
+
+\numlist{10;20;30}
+
+\SIlist{0.13;0.67;0.80}{\milli\metre}
+
+\numrange{10}{20}
+
+\SIrange{0.13}{0.67}{\milli\metre}
+
+\ang{10}
+
+\ang{1;2;3}
+
+\ang{;;1}
+
+\ang{+10;;}
+
+\ang{-0;1;}
+
+\si{kg.m/s^2}
+
+\si{g_{polymer}~mol_{cat}.s^{-1}}
+
+\si{\kilo\gram\metre\per\square\second}
 
 \si{\gram\per\cubic\centi\metre}
+
+\si{\square\volt\cubic\lumen\per\farad}
+
+\si{\metre\squared\per\gray\cubic\lux}
+
+\si{\henry\second}
 ^D
-<p>10.0 ± 3.3 ms</p>
+<p>12345.67890</p>
+<p>1 ± 2i</p>
 <p>0.3 × 10<sup>45</sup></p>
-<p>10°3′</p>
-<p>g/cm<sup>3</sup></p>
+<p>1.654 × 2.34 × 3.430</p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m/s</p>
+<p>kg m/A/s</p>
+<p>10, 20, &amp; 30</p>
+<p>0.13 mm, 0.67 mm, &amp; 0.80 mm</p>
+<p>10–20</p>
+<p>0.13 mm–0.67 mm</p>
+<p>10°</p>
+<p>1°2′3″</p>
+<p>1″</p>
+<p>10°</p>
+<p>-0°1′</p>
+<p>kg m/s<sup>2</sup></p>
+<p>g<sub>polymer</sub> mol<sub>cat</sub> s<sup>−1</sup></p>
+<p>kg m s<sup>−2</sup></p>
+<p>g cm<sup>−3</sup></p>
+<p>V<sup>2</sup> lm<sup>3</sup> F<sup>−1</sup></p>
+<p>m<sup>2</sup> Gy<sup>−1</sup> lx<sup>3</sup></p>
+<p>H s</p>
 ```
-- 
cgit v1.2.3


From 81eadfd99ad3e905b806cc6c80ab0fea0185286f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 May 2021 22:50:35 -0700
Subject: LaTeX reader: improve `\def` and implement `\newif`.

- Improve parsing of `\def` macros.  We previously set "verbatim mode"
  even for parsing the initial `\def`; this caused problems for things
  like
  ```
  \def\foo{\def\bar{BAR}}
  \foo
  \bar
  ```
- Implement `\newif`.
- Add tests.
---
 src/Text/Pandoc/Readers/LaTeX/Macro.hs   | 59 ++++++++++++++++++++++++--------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 19 +++++++++-
 test/command/newif.md                    | 55 +++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100644 test/command/newif.md

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 607f5438c..5495a8e74 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -14,6 +14,7 @@ import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
 import Control.Applicative ((<|>), optional)
 import qualified Data.Map as M
 import Data.Text (Text)
+import qualified Data.Text as T
 
 macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
 macroDef constructor = do
@@ -22,9 +23,11 @@ macroDef constructor = do
       guardDisabled Ext_latex_macros)
      <|> return mempty
   where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
           guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+           mapM_ (\(name, macro') ->
+                   updateState (\s -> s{ sMacros = M.insert name macro'
+                                          (sMacros s) })) nameMacroPairs
         environmentDef = do
           mbenv <- newenvironment
           case mbenv of
@@ -40,7 +43,7 @@ macroDef constructor = do
         -- @\newcommand{\envname}[n-args][default]{begin}@
         -- @\newcommand{\endenvname}@
 
-letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
   (name, contents) <- withVerbatimMode $ do
@@ -53,18 +56,47 @@ letmacro = do
     contents <- bracedOrToken
     return (name, contents)
   contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
+  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
 
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
+  controlSeq "def"
   withVerbatimMode $ do
-    controlSeq "def"
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
     contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+  controlSeq "newif"
+  withVerbatimMode $ do
+    Tok pos (CtrlSeq name) _ <- anyControlSeq
+    -- \def\iffoo\iffalse
+    -- \def\footrue{\def\iffoo\iftrue}
+    -- \def\foofalse{\def\iffoo\iffalse}
+    let base = T.drop 2 name
+    return [ (name, Macro ExpandWhenUsed [] Nothing
+                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+           , (base <> "true",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+                   ])
+           , (base <> "false",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+                   ])
+           ]
 
 argspecArg :: PandocMonad m => LP m ArgSpec
 argspecArg = do
@@ -77,10 +109,9 @@ argspecPattern =
                               (toktype' == Symbol || toktype' == Word) &&
                               (txt /= "{" && txt /= "\\" && txt /= "}")))
 
-newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
 newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
                              controlSeq "renewcommand" <|>
                              controlSeq "providecommand" <|>
                              controlSeq "DeclareMathOperator" <|>
@@ -112,9 +143,9 @@ newcommand = do
         Just macro
           | mtype == "newcommand" -> do
               report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+              return [(name, macro)]
+          | mtype == "providecommand" -> return [(name, macro)]
+        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
 
 newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
 newenvironment = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 1c77eb299..a17b1f324 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -113,7 +113,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
--- import Debug.Trace
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -563,8 +562,26 @@ trySpecialMacro "xspace" ts = do
     Tok pos Word t : _
       | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
     _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
 trySpecialMacro _ _ = mzero
 
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+  case res' of
+    Left _ -> Prelude.fail "Could not parse conditional"
+    Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+                    *> anyTok)
+  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+                 <|> ([] <$ controlSeq "fi")
+  rest <- getInput
+  return $ (if b then ifToks else elseToks) ++ rest
+
 startsWithAlphaNum :: Text -> Bool
 startsWithAlphaNum t =
   case T.uncons t of
diff --git a/test/command/newif.md b/test/command/newif.md
new file mode 100644
index 000000000..f444f14c9
--- /dev/null
+++ b/test/command/newif.md
@@ -0,0 +1,55 @@
+```
+% pandoc -f latex -t plain
+\iftrue
+should print
+\iftrue
+should print
+\else
+should not print
+\fi
+\else
+should not print
+\fi
+
+\iffalse
+should not print
+\else
+\iftrue
+should print
+\else
+should not print
+\fi
+\fi
+
+\newif\ifepub
+
+\ifepub
+should not print
+\fi
+
+\epubtrue
+
+\ifepub
+should print
+\else
+should not print
+\fi
+
+\epubfalse
+
+\ifepub
+should not print
+\else
+should print
+\fi
+^D
+should print
+
+should print
+
+should print
+
+should print
+
+should print
+```
-- 
cgit v1.2.3


From ac0a9da6d85e9b7a73973a20019caa324b2c1aff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Jul 2021 13:01:45 -0700
Subject: Improved parsing of raw LaTeX from Text streams (rawLaTeXParser).

We now use source positions from the token stream to tell us
how much of the text stream to consume.  Getting this to
work required a few other changes to make token source positions
accurate.

Closes #7434.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  9 +++-----
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 39 ++++++++++++++++++++++++++++----
 test/command/7434.md                     | 15 ++++++++++++
 3 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 test/command/7434.md

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9e14c159a..31c8d9095 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -136,8 +136,7 @@ rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -167,8 +166,7 @@ rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -182,8 +180,7 @@ rawLaTeXInline = do
 inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a17b1f324..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
@@ -28,6 +29,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , applyMacros
   , tokenize
   , tokenizeSources
+  , getInputTokens
   , untokenize
   , untoken
   , totoks
@@ -246,18 +248,23 @@ withVerbatimMode parser = do
        updateState $ \st -> st{ sVerbatimMode = False }
        return result
 
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
                => [Tok] -> Bool -> LP m a -> LP m a
                -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
   let lstate' = lstate { sMacros = extractMacros pstate }
+  let setStartPos = case toks of
+                      Tok pos _ _ : _ -> setPosition pos
+                      _ -> return ()
+  let preparser = setStartPos >> parser
   let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+                            lstate "chunk" toks
   case res' of
        Left _    -> mzero
-       Right toks' -> do
+       Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
                                         ts <- many (satisfyTok (const True))
@@ -268,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                void $ count (T.length (untokenize toks')) anyChar
+                let skipTilPos stopPos = do
+                      anyChar
+                      pos <- getPosition
+                      if pos >= stopPos
+                         then return ()
+                         else skipTilPos stopPos
+                skipTilPos endpos
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -306,6 +319,17 @@ tokenizeSources = concatMap tokenizeSource . unSources
  where
    tokenizeSource (pos, t) = totoks pos t
 
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+  pos <- getPosition
+  ss <- getInput
+  return $
+    case ss of
+      Sources [] -> []
+      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
@@ -433,8 +457,13 @@ parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
 parseFromToks parser toks = do
   oldInput <- getInput
   setInput toks
+  oldpos <- getPosition
+  case toks of
+     Tok pos _ _ : _ -> setPosition pos
+     _ -> return ()
   result <- disablingWithRaw parser
   setInput oldInput
+  setPosition oldpos
   return result
 
 disablingWithRaw :: PandocMonad m => LP m a -> LP m a
@@ -458,7 +487,7 @@ satisfyTok f = do
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
         updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
+        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
diff --git a/test/command/7434.md b/test/command/7434.md
new file mode 100644
index 000000000..be8732bc2
--- /dev/null
+++ b/test/command/7434.md
@@ -0,0 +1,15 @@
+```
+% pandoc -f markdown -t native
+\begin{proof}
+\newcommand{\x}{\left.\right.}
+\x
+\end{proof}
+
+1234567890abcdefghi
+
+[\*\a](x)
+^D
+[RawBlock (Format "tex") "\\begin{proof}\n\\newcommand{\\x}{\\left.\\right.}\n\\left.\\right.\n\\end{proof}"
+,Para [Str "1234567890abcdefghi"]
+,Para [Link ("",[],[]) [Str "*",RawInline (Format "tex") "\\a"] ("x","")]]
+```
-- 
cgit v1.2.3


From 493522c5626464fb7a27216ccee381af4a199eef Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 12:04:43 -0700
Subject: LaTeX reader: Support `\cline` in LaTeX tables.

Closes #7442.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text/Pandoc/Readers/LaTeX')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 7833da081..f56728fe1 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -41,6 +41,7 @@ hline :: PandocMonad m => LP m ()
 hline = try $ do
   spaces
   controlSeq "hline" <|>
+    (controlSeq "cline" <* braced) <|>
     -- booktabs rules:
     controlSeq "toprule" <|>
     controlSeq "bottomrule" <|>
-- 
cgit v1.2.3