使用C#-HtmlAgilityPack从HTML表中提取特定内容
本文关键字:提取 C#-HtmlAgilityPack HTML 使用 | 更新日期: 2023-09-27 18:28:10
C#-HtmlAgilityPack
-我想使用HtmlAgilityPack(显示图片)和插入dataGridView1(显示代码c#)从HTML表中提取特定内容
+
-切换第3列中第2列的顺序(显示图片)
=++=++=+
显示图片这就是我想要的
=>点击显示图片
============++===++======++
我的测试代码(不好的代码)此代码提取所有内容
=+=+=++
// Clear Datagridview
dataGridView1.DataSource = null;
HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
// Load a file
html.Load(@test.html);
var headers = html.DocumentNode.SelectNodes("//p[4]/table[1]/tr[1]/th");
DataTable table = new DataTable();
// Create columns from th
foreach (HtmlNode header in headers)
{
table.Columns.Add(header.InnerText);
}
// Select rows with td elements
foreach (var row in html.DocumentNode.SelectNodes("//p[4]/table[1]/tr[td]"))
{
table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
}
// Show Result
dataGridView1.DataSource = table;
============++===++======++
代码页Html
=+=+=++
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
</head>
<body>
<center><font size="+2">Title_Test Title_Test</font></center>
<p><font size="+1"><b><span class="yyyyyy">2_Title_test </span><font color="#000000">NAME
PROJET</font></b></font></p>
<p>
<table>
<tr>
<td><b>Testtable</b></td>
<td>:</td>
<td>oo</td>
</tr>
<tr>
<td><b>Testtable2TesttableTesttable</b></td>
<td>:</td>
<td>uu</td>
</tr>
<tr>
<td><b>Testtable3</b></td>
<td>:</td>
<td>iii</td>
</tr>
</table>
</p>
<p><font size="+1"><b><a name="GGGGGGGGG"></a>InfoTest_InfoTest_InfoTest</b></font></p>
<p><b>testtesttesttesttesttesttesttesttesttest </b>.<br />
<table border="1" bordercolor="#808080" cellpadding="2">
<tr valign="center">
<th align="middle">Column0</th>
<th align="middle">Column1</th>
<th align="middle">Column2</th>
<th align="middle">Column3</th>
<th align="middle">Column4</th>
<th align="middle">Column5</th>
<th align="middle">Column6</th>
<th align="middle">Column7</th>
</tr>
<tr valign="center">
<td align="left">pola</td>
<td align="right">111</td>
<td align="right">po111</td>
<td align="right">1111</td>
<td align="right">po1111</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right">NV</td>
</tr>
<tr valign="center">
<td align="left">yato</td>
<td align="right">222</td>
<td align="right">ya222</td>
<td align="right">2222</td>
<td align="right">ya2222</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right">NV</td>
</tr>
<tr valign="center">
<td align="left">romaz</td>
<td align="right">333</td>
<td align="right">ro333</td>
<td align="right">3333</td>
<td align="right">ro3333</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right"></td>
</tr>
<tr valign="center">
<td align="left">anik</td>
<td align="right">444</td>
<td align="right">an444</td>
<td align="right">4444</td>
<td align="right">an4444</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right"></td>
</tr>
<tr valign="center">
<td align="left">kilwa</td>
<td align="right">555</td>
<td align="right">ki555</td>
<td align="right">5555</td>
<td align="right">ki5555</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right"></td>
</tr>
<tr valign="center">
<td align="left">sekil</td>
<td align="right">666</td>
<td align="right">se666</td>
<td align="right">5555</td>
<td align="right">se6666</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right"></td>
</tr>
<tr valign="center">
<td align="left">janit</td>
<td align="right">777</td>
<td align="right">ja777</td>
<td align="right">7777</td>
<td align="right">ja7777</td>
<td align="right">NN</td>
<td align="right">VV</td>
<td align="right"></td>
</tr>
</table>
</p>
</body>
</html>
============++===++======++
谢谢,,我在等
也许先提取一个表?
HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
// Load a file
html.Load(@"c:'OneDrive'Work'MS Projects'text.html");
HtmlNode table = html.DocumentNode.SelectSingleNode("//table[@border='1']");
DataTable dt = new DataTable();
var rows = table.SelectNodes("tr");
for (int i = 0; i < rows.Count; ++i)
{
//if row = then these are headers
if (i == 0)
{
var cols = rows[i].SelectNodes("th");
dt.Columns.Add(new DataColumn(cols[0].InnerText.ToString()));
dt.Columns.Add(new DataColumn(cols[1].InnerText.ToString()));
dt.Columns.Add(new DataColumn(cols[3].InnerText.ToString()));
dt.Columns.Add(new DataColumn(cols[2].InnerText.ToString()));
dt.Columns.Add(new DataColumn(cols[4].InnerText.ToString()));
}
//row>0 then data
else
{
var cols = rows[i].SelectNodes("td");
DataRow dr = dt.NewRow();
dr[0] = cols[0].InnerText.ToString();
dr[1] = cols[1].InnerText.ToString();
dr[2] = cols[3].InnerText.ToString();
dr[3] = cols[2].InnerText.ToString();
dr[4] = cols[4].InnerText.ToString();
dt.Rows.Add(dr);
}
}