从XmlReader为节点构建XPath

本文关键字:构建 XPath 节点 XmlReader | 更新日期: 2023-09-27 18:06:14

我正在编写一个应用程序,它解析来自各种来源的动态xml并遍历xml并返回所有唯一元素。

考虑到Xml文件有时非常大,由于内存限制,我使用XmlReader解析Xml结构。

public IDictionary<string, int> Discover(string filePath)
    {
        Dictionary<string, string> nodeTable = new Dictionary<string, string>();
        using (XmlReader reader = XmlReader.Create(filePath))
        {
            while (!reader.EOF)
            {
                if (reader.NodeType == XmlNodeType.Element)
                {
                    if (!nodeTable.ContainsKey(reader.LocalName))
                    {
                        nodeTable.Add(reader.LocalName,  reader.Depth);
                    }
                }
                reader.Read();
            }
        }
        Debug.WriteLine("The node table has {0} items.", nodeTable.Count);

        return nodeTable;
    }

这样做效果很好,性能也很好,但是最后的难题还没有解决,我正在尝试为每个元素生成XPath。

现在,使用像这样的东西一开始看起来很直接。

var elements = new Stack<string>();
while (reader.Read())
{
    switch (reader.NodeType)
    {
        case XmlNodeType.Element:
            elements.Push(reader.LocalName);
            break;
        case XmlNodeType.EndElement:
            elements.Pop();
            break;
        case XmlNodeType.Text:
            path = string.Join("/", elements.Reverse());
            break;
    }
}

但这只给了我一部分解决方案。假设我希望返回树中包含数据的每个节点的XPath,并检测给定节点树是否包含嵌套的数据集合。

<customers>
  <customer id=2>
    <name>ted smith</name>
    <addresses>
      <address1>
           <line1></line1>
      </address1>
      <address2>
           <line1></line1>
           <line2></line2>
      </address2>
    </addresses>
  </customer>
  <customer id=322>
    <name>smith mcsmith</name>
    <addresses>
      <address1>
           <line1></line1>
           <line2></line2>
      </address1>
      <address2>
           <line1></line1>
           <line2></line2>
      </address2>
    </addresses>
  </customer>
</customers>

请记住,数据是完全动态的,模式是未知的。

所以输出应该包括

/customer/name
/customer/address1/line1
/customer/address1/line2
/customer/address2/line1
/customer/address2/line2

从XmlReader为节点构建XPath

我喜欢使用递归方法而不是push/pop。参见下面的代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.IO;
namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string input =
                "<customers>" +
                  "<customer id='"2'">" +
                    "<name>ted smith</name>" +
                    "<addresses>" +
                      "<address1>" +
                           "<line1></line1>" +
                      "</address1>" +
                      "<address2>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address2>" +
                    "</addresses>" +
                  "</customer>" +
                  "<customer id='"322'">" +
                    "<name>smith mcsmith</name>" +
                    "<addresses>" +
                      "<address1>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address1>" +
                      "<address2>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address2>" +
                    "</addresses>" +
                  "</customer>" +
                "</customers>";
            StringReader sReader = new StringReader(input);
            XmlReader reader = XmlReader.Create(sReader);
            Node root = new Node();
            ReadNode(reader, root);
        }
        static bool ReadNode(XmlReader reader, Node node)
        {
            Boolean done = false;
            Boolean endElement = false;
            while(done = reader.Read())
            {
                switch (reader.NodeType)
                {
                    case XmlNodeType.Element:
                        if (node.name.Length == 0)
                        {
                            node.name = reader.Name;
                            GetAttrubutes(reader, node);
                        }
                        else
                        {
                            Node newNode = new Node();
                            newNode.name = reader.Name;
                            if (node.children == null)
                            {
                                node.children = new List<Node>();
                            }
                            node.children.Add(newNode);
                            GetAttrubutes(reader, newNode);
                            done = ReadNode(reader, newNode);
                        }
                        break;
                    case XmlNodeType.EndElement:
                        endElement = true;
                        break;
                    case XmlNodeType.Text:
                        node.text = reader.Value;
                        break;
                    case XmlNodeType.Attribute:
                        if (node.attributes == null)
                        {
                            node.attributes = new Dictionary<string, string>();
                        }
                        node.attributes.Add(reader.Name, reader.Value);
                        break;
                }
                if (endElement)
                    break;
            }
            return done;
        }
        static void GetAttrubutes(XmlReader reader, Node node)
        {
            for (int i = 0; i < reader.AttributeCount; i++)
            {
                if (i == 0) node.attributes = new Dictionary<string, string>();
                reader.MoveToNextAttribute();
                node.attributes.Add(reader.Name, reader.Value);
            }
        }
    }
    public class Node
    {
        public string name = string.Empty;
        public string text = string.Empty;
        public Dictionary<string, string> attributes = null;
        public List<Node> children = null;
    }
}
​