aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/XMLParser.hs
blob: 8ad22a66ab42e59e36c59897261c3435718ecb92 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.XMLParser
   Copyright   : Copyright (C) 2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Bridge to allow using xml-conduit's parser with xml-light's types.
-}
module Text.Pandoc.XMLParser
  ( parseXMLElement
  , parseXMLContents
  , module Text.XML.Light.Types
  ) where

import qualified Control.Exception as E
import qualified Text.XML as Conduit
import Text.XML.Unresolved (InvalidEventStream(..))
import qualified Text.XML.Light as Light
import Text.XML.Light.Types
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Map as M
import Data.Maybe (mapMaybe)

-- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElement :: TL.Text -> Either T.Text Light.Element
parseXMLElement t =
  elementToElement .  Conduit.documentRoot <$>
    either (Left . T.pack . E.displayException) Right
    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)

parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
parseXMLContents t =
  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
    Left e ->
      case E.fromException e of
        Just (ContentAfterRoot _) ->
          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
        _ -> Left . T.pack . E.displayException $ e
    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]

elementToElement :: Conduit.Element -> Light.Element
elementToElement (Conduit.Element name attribMap nodes) =
  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
 where
  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
              M.toList attribMap
  nameToQname (Conduit.Name localName mbns mbpref) =
    case mbpref of
      Nothing | "xmlns:" `T.isPrefixOf` localName ->
           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
                       (Just "xmlns")
      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
                       (T.unpack <$> mbpref)

nodeToContent :: Conduit.Node -> Maybe Light.Content
nodeToContent (Conduit.NodeElement el) =
  Just (Light.Elem (elementToElement el))
nodeToContent (Conduit.NodeContent t) =
  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
nodeToContent _ = Nothing