html lagilitypack节点和子节点
本文关键字:子节点 节点 lagilitypack html | 更新日期: 2023-09-27 18:18:37
我正在尝试使用htmllagilitypack解析复杂的html
<tr>
<td width=0%>Artist:</td><td width=23% class='colour'><a href='/p/beatmaplist?q=Akiakane'>Akiakane</a></td>
<td width=0%>Circle Size:</td><td width=23% class='colour'><div class='starfield' style='width:140px'><div class='active' style='width:56px'></div></div></td>
<td width=0%>Approach Rate:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:126px'></div></div></td>
</tr>
<tr>
<td width=0%>Title:</td><td class="colour"><a href='/p/beatmaplist?q=FlashBack'>FlashBack</a></td>
<td width=0%>HP Drain:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:84px'></div></div></td>
<td width=0%><strong>Star Difficulty</strong>:</td><td width=23% class='colour'><div class='starfield' style='width:140px'><div class='active' style='width:72.9650211334px'></div></div> (5.21)</td>
</tr>
<tr>
<td width=0%>Creator:</td><td class="colour"><a href='/u/231111'>Kiiwa<a/></td>
<td width=0%>Accuracy:</td><td class="colour"><div class='starfield' style='width:140px'><div class='active' style='width:98px'></div></div></td>
<td width=0%>Length:</td><td class="colour">3:13 (2:49 drain)</td>
</tr>
<tr>
<td width=0%>Source:</td><td class="colour"><a href='/p/beatmaplist?q='></a></td>
<td width=0%>Genre:</td><td class="colour"><a href='/p/beatmaplist?g=4'>Rock</a> (<a href='/p/beatmaplist?la=3'>Japanese</a>)</td>
<td width=0%>BPM:</td><td class="colour">185</td>
</tr>
<tr>
<td width=0%>Tags:</td><td class="colour"><a href="/p/beatmaplist?q=j-pop">j-pop</a> <a href="/p/beatmaplist?q=beren">beren</a> <a href="/p/beatmaplist?q=collaboration">collaboration</a> <a href="/p/beatmaplist?q=collab">collab</a> <a href="/p/beatmaplist?q=boroboro">boroboro</a> <a href="/p/beatmaplist?q=na">na</a> <a href="/p/beatmaplist?q=ikizama">ikizama</a> <a href="/p/beatmaplist?q=niki">niki</a> <a href="/p/beatmaplist?q=niconicodouga">niconicodouga</a> <a href="/p/beatmaplist?q=toysfactory">toysfactory</a> </td>
<td width=0%>User Rating:</td><td class="colour">
<table width="100%" height="20px" style="color:#fff;">
<tr>
<td style="background-color:#BC2036;text-align:right;border:solid 1px #82000B;" width="3.37522441652">93</td>
<td style="background-color:#78AB23;text-align:left;border:solid 1px #718F0A;" width="96.6965888689">2,692</td>
</tr>
每个tr应该是一个包含td作为属性的对象。即.
public class SongInfo
{
public string CS { get; set; }
public string AR { get; set; }
public string HP { get; set; }
public string STAR { get; set; }
public string LENGTH { get; set; }
public string BPM { get; set; }
}
所以,在这个上下文中,它应该是这样的:
CS should be "Circle Size: (starfield style % divided by active style %)"
AR should be "Approach Rate: (starfield style % divided by active style %)"
HP should be "HP Drain: (starfield style % divided by active style %)"
STAR should be "Star Difficulty: (starfield style % divided by active style %)"
LENGTH should be "Length: 3:13"
BPM should be "BPM: 185"
当我输入(starfield style %除以active style %)时,我指的是这段代码:
<div class='starfield' style='width:140px'><div class='active'style='width:56px'></div>
那么在这种情况下,它应该是2.5,因为140/56 = 2.5
我的第一个想法是这样的:
foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("//tr"))
{
foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes("//td[@width]=0%"))
{
}
}
但是老实说,我不知道如何使用htmllagilitypack,因为我根本没有真正使用过它。
有可能做到我所要求的吗?
我认为您还不够努力,因为您实际上只是从这个线程复制粘贴了一些代码,甚至没有尝试查看xpath。
很多html代码是相似的。我为你制定了完整的解决方案。请通读一遍。还要阅读Html Agility Pack文档和xpath。第一个初始Xpath是错误的。它应该是:"//td[@width='0%']"。你可以使用"//td"(但是下面的例子使用//td[@width='0%']"),然后你必须使用其他方法找到相关的。在下面的解决方案中,我使用了每个
的innertextpublic class SongInfo
{
public string CS { get; set; }
public string AR { get; set; }
public string HP { get; set; }
public string STAR { get; set; }
public string LENGTH { get; set; }
public string BPM { get; set; }
}
class MainClass
{
public static void Main(string[] args)
{
SongInfo song = new SongInfo();
HtmlDocument doc = new HtmlDocument();
doc.Load("da.html");
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//td[@width='0%']");
foreach (HtmlNode n in nodes)
{
if (n.InnerText.ToLower().Contains("circle size:"))
{
song.CS = n.InnerText+ " " + Convert.ToString(AlmostAnything(n.NextSibling));
}
if (n.InnerText.ToLower().Contains("approach rate:"))
{
song.AR = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling));
}
if (n.InnerText.ToLower().Contains("hp drain:"))
{
song.HP = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling));
}
if (n.InnerText.ToLower().Contains("star difficulty:"))
{
song.STAR = n.InnerText + " " + Convert.ToString(AlmostAnything(n.NextSibling));
}
if (n.InnerText.ToLower().Contains("length:"))
{
song.LENGTH = NextSiblingText(n);
}
if (n.InnerText.ToLower().Contains("bpm:"))
{
song.BPM = NextSiblingText(n);
}
}
PrintSong(song);
}
private static string NextSiblingText(HtmlNode n)
{
return n.NextSibling.InnerText;
}
private static int AlmostAnything(HtmlNode n)
{
string starfield="" , activefield = "";
HtmlDocument temp = new HtmlDocument();
temp.LoadHtml(n.InnerHtml);
foreach (HtmlNode hN in temp.DocumentNode.SelectNodes("//div"))
{
if (hN.GetAttributeValue("class", "not found") == "starfield")
{
starfield = hN.GetAttributeValue("style", "style not found");
}
if (hN.GetAttributeValue("class", "not found") == "active")
{
activefield = hN.GetAttributeValue("style", "style not found");
}
}
double result = ConvertStringToNum(starfield) / ConvertStringToNum(activefield);
return Convert.ToInt32(result);
}
private static double ConvertStringToNum(string s)
{
string temp="";
for (int i = 0; i < s.Length; i++)
{
if (Char.IsNumber(s[i]))
{
temp += s[i];
for (i = i + 1; i < s.Length; i++)
{
if (Char.IsNumber(s[i]))
{
temp += s[i];
}
else
{
return Convert.ToDouble(temp);
}
}
}
}
return -1;
}
private static void PrintSong(SongInfo s)
{
Console.WriteLine(s.CS);
Console.WriteLine(s.AR);
Console.WriteLine(s.HP);
Console.WriteLine(s.STAR);
Console.WriteLine(s.LENGTH);
Console.WriteLine(s.BPM);
}
}