diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-12-12 17:28:52 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-12-12 17:28:52 -0800 |
commit | 44120ea7165546152af88fd442c52ab0f201052e (patch) | |
tree | 34324b8b450b33f74437621dcf7087665dc0b78c /src/Text/Pandoc/Readers | |
parent | 60d383e27e84022142ab666d4648a666cc5a026a (diff) | |
download | pandoc-44120ea7165546152af88fd442c52ab0f201052e.tar.gz |
Implemented `east_asian_line_breaks` extension.
Text.Pandoc.Options: Added `Ext_east_asian_line_breaks` constructor to
`Extension` (API change).
This extension is like `ignore_line_breaks`, but smarter -- it
only ignores line breaks between two East Asian wide characters.
This makes it better suited for writing with a mix of East Asian
and non-East Asian scripts.
Closes #2586.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0b7faadb7..999ab11de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -51,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -356,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do |