如何使用linq格式化xml

本文关键字:xml 格式化 linq 何使用 | 更新日期: 2023-09-27 18:29:24

这里我使用linq创建一个xml,但没有获得所需的格式。这是我的代码

List<string> listvalue = new List<string>();
listvalue.Add("http://example.com/sample.html");
listvalue.Add("http://example.com/new.html");
foreach (string url in listvalue)
{
    var document = new HtmlWeb().Load(url);
    var urls = document.DocumentNode.Descendants("img")
                                    .Select(e => e.GetAttributeValue("src", null))
                                    .Where(s => !String.IsNullOrEmpty(s));
    List<string> asList = urls.ToList();
    GenerateXml(url, asList);                       
}

protected void GenerateXml(string url, List<string> listitems)  //generateXml
{
    XNamespace nsSitemap = "http://www.sitemaps.org/schemas/sitemap/0.9";
    XNamespace nsImage = "http://www.google.com/schemas/sitemap-image/1.1";
    var sitemap = new XDocument(new XDeclaration("1.0", "UTF-8", ""));
    var urlSet = new XElement(nsSitemap + "urlset",
        new XAttribute("xmlns", nsSitemap),
        new XAttribute(XNamespace.Xmlns + "image", nsImage),
        new XElement(nsSitemap + "url",
        new XElement(nsSitemap + "loc", url),
        from urlNode in listitems
        select new XElement(nsImage + "image",
               new XElement(nsImage + "loc", urlNode)
           )));
    sitemap.Add(urlSet);
    sitemap.Save(System.Web.HttpContext.Current.Server.MapPath("/Static/sitemaps/Sitemap-image.xml"));
}

我需要以下格式的

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
  <url>
    <loc>http://example.com/sample.html</loc>
    <image:image>
      <image:loc>http://example.com/image.jpg</image:loc>
    </image:image>
    <image:image>
      <image:loc>http://example.com/photo.jpg</image:loc>
    </image:image>
  </url>
<url>
    <loc>http://example.com/new.html</loc>
    <image:image>
      <image:loc>http://example.com/newimage.jpg</image:loc>
    </image:image>
    <image:image>
      <image:loc>http://example.com/newphoto.jpg</image:loc>
    </image:image>
  </url>
</urlset>

但是在这里我得到了一个单独的url标签。如何做到这一点?有什么建议吗?

如何使用linq格式化xml

听起来这实际上只是一种在调用GenerateXml之前想要获取所有URL(来自所有源文档)的情况,并记住每个URL的来源。这很简单:

var sources = new List<string>
{
    "http://example.com/sample.html",
    "http://example.com/new.html"
};
var imagesBySource = sources
    .ToDictionary(source => source,
                  source => new HtmlWeb().Load(url)
                               .DocumentNode.Descendants("img")
                               .Select(e => e.GetAttributeValue("src", null))
                               .Where(s => !String.IsNullOrEmpty(s))
                               .ToList());
GenerateXml(imagesBySource);

然后,您需要将GenerateXml更改为Dictionary<string, List<string>>。类似(未经测试):

protected void GenerateXml(Dictionary<string, List<string>> imagesByUrl)
{    
    XNamespace nsSitemap = "http://www.sitemaps.org/schemas/sitemap/0.9";
    XNamespace nsImage = "http://www.google.com/schemas/sitemap-image/1.1";
    var sitemap = new XDocument(new XDeclaration("1.0", "UTF-8", ""));
    var urlSet = new XElement(nsSitemap + "urlset",
        new XAttribute("xmlns", nsSitemap),
        new XAttribute(XNamespace.Xmlns + "image", nsImage),
        imagesByUrl.Select(entry => 
            new XElement(nsSitemap + "url",
                new XElement(nsSitemap + "loc", entry.Key),
                from urlNode in entry.Value
                select new XElement(nsImage + "image",
                    new XElement(nsImage + "loc", urlNode)
                )
        )
    );
    sitemap.Add(urlSet);
    var path = HttpContext.Current.Server.MapPath("/Static/sitemaps/Sitemap-image.xml");
    sitemap.Save(path);
}

请注意,这并不能保证源的顺序得到保留。如果需要,您可能应该创建一个具有UrlImages属性的类,并将这些属性的列表传递给GenerateXml