aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-12-12 17:28:52 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-12-12 17:28:52 -0800
commit44120ea7165546152af88fd442c52ab0f201052e (patch)
tree34324b8b450b33f74437621dcf7087665dc0b78c /src
parent60d383e27e84022142ab666d4648a666cc5a026a (diff)
downloadpandoc-44120ea7165546152af88fd442c52ab0f201052e.tar.gz
Implemented `east_asian_line_breaks` extension.
Text.Pandoc.Options: Added `Ext_east_asian_line_breaks` constructor to `Extension` (API change). This extension is like `ignore_line_breaks`, but smarter -- it only ignores line breaks between two East Asian wide characters. This makes it better suited for writing with a mix of East Asian and non-East Asian scripts. Closes #2586.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs16
2 files changed, 17 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 9f27f46f9..7dd47cd59 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -106,6 +106,8 @@ data Extension =
| Ext_subscript -- ^ Subscript using ~this~ syntax
| Ext_hard_line_breaks -- ^ All newlines become hard line breaks
| Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
+ | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
+ -- East Asian wide characters
| Ext_literate_haskell -- ^ Enable literate Haskell conventions
| Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
| Ext_emoji -- ^ Support emoji like :smile:
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0b7faadb7..999ab11de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Generic (bottomUp)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Yaml as Yaml
@@ -51,6 +52,7 @@ import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Text.Pandoc.Options
import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
@@ -356,7 +358,19 @@ parseMarkdown = do
st <- getState
let meta = runF (stateMeta' st) st
let Pandoc _ bs = B.doc $ runF blocks st
- return $ Pandoc meta bs
+ eastAsianLineBreaks <- option False $
+ True <$ guardEnabled Ext_east_asian_line_breaks
+ return $ (if eastAsianLineBreaks
+ then bottomUp softBreakFilter
+ else id) $ Pandoc meta bs
+
+softBreakFilter :: [Inline] -> [Inline]
+softBreakFilter (x:SoftBreak:y:zs) =
+ case (stringify x, stringify y) of
+ (xs@(_:_), (c:_))
+ | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+ _ -> x:SoftBreak:y:zs
+softBreakFilter xs = xs
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do