XMLReader 跳过特定行
本文关键字:XMLReader | 更新日期: 2023-09-27 18:35:07
这是我的方法
private void ParseXML()
{
int pubid = 1;
settings.DtdProcessing = DtdProcessing.Parse;
using (reader = XmlReader.Create(FileName, settings))
{
while (reader.Read())
{
if (reader.IsStartElement())
{
switch (reader.Name.Trim().ToLower())
{
case "book":
book = new Book();
book.Pubid = pubid;
book.Pubtype = "book";
book.Pubkey = reader.GetAttribute("key");
ParseBook(reader, book);
pubid++;
break;
case "article":
article = new Article();
article.Pubid = pubid;
article.Pubkey = reader.GetAttribute("key");
article.Pubtype = "article";
ParseArticle(reader, article);
pubid++;
break;
case "incollection":
incollection = new Incollection();
incollection.Pubid = pubid;
incollection.Pubkey = reader.GetAttribute("key");
ParseIncollection(reader, incollection);
pubid++;
break;
case "inproceedings":
inproceeding = new Inproceedings();
inproceeding.Pubid = pubid;
inproceeding.Pubtype = "inproceeding";
inproceeding.Pubkey = reader.GetAttribute("key");
ParseInproceedings(reader, inproceeding);
pubid++;
break;
}
}
}
}
}
我正在解析这个文件。http://dblp.uni-trier.de/xml/
但是,我已经使用其他解析器检查了xml,似乎incollection元素在xml中。
但是,当我运行这段代码时,我的案例"incollection"没有被触发。其他工作正常。
这是 1.2GB 的 xml 文件。
调试甚至不会命中 in 集合 = new incollection,因此没有错误
Firefox 报告此错误:
XML Parsing Error: undefined entity
Location: http://dblp.uni-trier.de/xml/dblp.xml
Line Number 26, Column 37:
<journal>technical Report 248, ETH Zürich, Dept. of Computer Science</journal>
------------------------------------^
错误字符为 ü
ü
也许您应该考虑使用允许与号的CDATA...
<![CDATA[
This is some text with ampersands & other funny characters. >>
]]>
编辑:阅读此文档阅读-xml-with-an-into-c-sharp-xmldocument-object