CSV 到 XML 的转换
本文关键字:转换 XML CSV | 更新日期: 2023-09-27 18:34:25
程序应该将csv文件作为输入和输出XML。守则如下
private static void ConvertCSVToXML()
{
string[] source = File.ReadAllLines("data.csv");
string RootNameStartTag = "<" + Path.GetFileName("''CSVTOXML''CSV-XML''bin''Debug''data.csv") + ">";
RootNameStartTag = RootNameStartTag.Replace(".csv", "");
string RootNameEndTag = RootNameStartTag.Insert(1, "/");
StreamWriter writeFile = new StreamWriter("Output.xml");
string[] headers = source[0].Split(',');
source = source.Where(w => w != source[0]).ToArray();
string[] fields = new string[] { };
XElement xmlElement ;
for (int i = 0; i < source.Length; i++)
{
writeFile.WriteLine(RootNameStartTag);
fields = source[i].Split(',');
for (int j = 0; j < fields.Length; j++)
{
xmlElement = new XElement(new XElement(headers[j], fields[j]));
writeFile.Write(xmlElement);
writeFile.WriteLine();
}
writeFile.WriteLine(RootNameEndTag);
fields = null;
}
}
上述代码的唯一问题是它根据逗号 (,) 拆分数据,因此如果我在 csv 中有行A,"DEF,XYZ,GHI","FDNFB,dfhjd"
then field[0]=A field[1]="DEF field[3]=XYZ field[4]=GHI" field[5]="FDNB field[6]=dfhjd"但我需要输出为字段[0]=A 字段[1]=DEF,XYZ,GHI 字段[2]=FDNFB,dfhjd请帮助我根据上述模式进行拆分
Net 内置的TextFieldParser
处理带引号的字段。 它位于Microsoft.VisualBasic.FileIO
中,但可以从 c# 或任何其他 .Net 语言中使用。 以下测试代码:
public static class TextFieldParserTest
{
public static void Test()
{
var csv = @"""DEF,XYZ,GHI"",""FDNFB,dfhjd""";
using (var stream = new StringReader(csv))
using (TextFieldParser parser = new TextFieldParser(stream))
{
parser.SetDelimiters(new string[] { "," });
parser.HasFieldsEnclosedInQuotes = true; // Actually already default
while (!parser.EndOfData)
{
string[] fields = parser.ReadFields();
Debug.WriteLine(fields.Length);
foreach (var field in fields)
Debug.WriteLine(field);
}
}
}
}
提供以下输出:
2
DEF,XYZ,GHI
FDNFB,dfhjd
请参阅以下解决方案 [ 当 CSV 同时包含字符和数字数据时,将 CSV 转换为 XML ]
他建议使用正则表达式来解析使用SplitCSV(line)而不是行的CSV行。Split(",")<</strong>
Cinchoo ETL - 一个开源库简化了CSV到XML文件转换的过程。
对于示例 CSV:
Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC
使用下面的代码,您可以生成 Xml
string csv = @"Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC";
StringBuilder sb = new StringBuilder();
using (var p = ChoCSVReader.LoadText(csv).WithFirstLineHeader())
{
using (var w = new ChoXmlWriter(sb)
.Configure(c => c.RootName = "Emps")
.Configure(c => c.NodeName = "Emp")
)
{
w.Write(p);
}
}
Console.WriteLine(sb.ToString());
输出 XML:
<Emps>
<Emp>
<Id>1</Id>
<Name>Tom</Name>
<City>NY</City>
</Emp>
<Emp>
<Id>2</Id>
<Name>Mark</Name>
<City>NJ</City>
</Emp>
<Emp>
<Id>3</Id>
<Name>Lou</Name>
<City>FL</City>
</Emp>
<Emp>
<Id>4</Id>
<Name>Smith</Name>
<City>PA</City>
</Emp>
<Emp>
<Id>5</Id>
<Name>Raj</Name>
<City>DC</City>
</Emp>
</Emps>
查看代码项目文章以获取更多帮助。
免责声明:我是这个库的作者。
这似乎是一个很好的选择,可能会对您的问题进行排序:http://msdn.microsoft.com/en-GB/library/bb387090.aspx
// Create the text file.
string csvString = @"GREAL,Great Lakes Food Market,Howard Snyder,Marketing Manager,(503) 555-7555,2732 Baker Blvd.,Eugene,OR,97403,USA
HUNGC,Hungry Coyote Import Store,Yoshi Latimer,Sales Representative,(503) 555-6874,City Center Plaza 516 Main St.,Elgin,OR,97827,USA
LAZYK,Lazy K Kountry Store,John Steel,Marketing Manager,(509) 555-7969,12 Orchestra Terrace,Walla Walla,WA,99362,USA
LETSS,Let's Stop N Shop,Jaime Yorres,Owner,(415) 555-5938,87 Polk St. Suite 5,San Francisco,CA,94117,USA";
File.WriteAllText("cust.csv", csvString);
// Read into an array of strings.
string[] source = File.ReadAllLines("cust.csv");
XElement cust = new XElement("Root",
from str in source
let fields = str.Split(',')
select new XElement("Customer",
new XAttribute("CustomerID", fields[0]),
new XElement("CompanyName", fields[1]),
new XElement("ContactName", fields[2]),
new XElement("ContactTitle", fields[3]),
new XElement("Phone", fields[4]),
new XElement("FullAddress",
new XElement("Address", fields[5]),
new XElement("City", fields[6]),
new XElement("Region", fields[7]),
new XElement("PostalCode", fields[8]),
new XElement("Country", fields[9])
)
)
);
Console.WriteLine(cust);
此代码生成以下输出:
Xml
<Root>
<Customer CustomerID="GREAL">
<CompanyName>Great Lakes Food Market</CompanyName>
<ContactName>Howard Snyder</ContactName>
<ContactTitle>Marketing Manager</ContactTitle>
<Phone>(503) 555-7555</Phone>
<FullAddress>
<Address>2732 Baker Blvd.</Address>
<City>Eugene</City>
<Region>OR</Region>
<PostalCode>97403</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="HUNGC">
<CompanyName>Hungry Coyote Import Store</CompanyName>
<ContactName>Yoshi Latimer</ContactName>
<ContactTitle>Sales Representative</ContactTitle>
<Phone>(503) 555-6874</Phone>
<FullAddress>
<Address>City Center Plaza 516 Main St.</Address>
<City>Elgin</City>
<Region>OR</Region>
<PostalCode>97827</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="LAZYK">
<CompanyName>Lazy K Kountry Store</CompanyName>
<ContactName>John Steel</ContactName>
<ContactTitle>Marketing Manager</ContactTitle>
<Phone>(509) 555-7969</Phone>
<FullAddress>
<Address>12 Orchestra Terrace</Address>
<City>Walla Walla</City>
<Region>WA</Region>
<PostalCode>99362</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
<Customer CustomerID="LETSS">
<CompanyName>Let's Stop N Shop</CompanyName>
<ContactName>Jaime Yorres</ContactName>
<ContactTitle>Owner</ContactTitle>
<Phone>(415) 555-5938</Phone>
<FullAddress>
<Address>87 Polk St. Suite 5</Address>
<City>San Francisco</City>
<Region>CA</Region>
<PostalCode>94117</PostalCode>
<Country>USA</Country>
</FullAddress>
</Customer>
</Root>
编辑我以前没有看到第一个问题。首先对 CSV 进行一些预处理,替换列分隔符。
使用这个:
var filePath = "Your csv file path here including name";
var newFilePath = filePath + ".tmp";
using (StreamReader vReader = new StreamReader(filePath))
{
using (StreamWriter vWriter = new StreamWriter(newFilePath, false, Encoding.ASCII))
{
int vLineNumber = 0;
while (!vReader.EndOfStream)
{
string vLine = vReader.ReadLine();
vWriter.WriteLine(ReplaceLine(vLine, vLineNumber++));
}
}
}
File.Delete(filePath);
File.Move(newFilePath, filePath);
Dts.TaskResult = (int)ScriptResults.Success;
}
protected string ReplaceLine(string Line, int LineNumber)
{
var newLine = Line.Replace("'",'"", "|");
newLine = newLine.Replace(",'"", "|");
newLine = newLine.Replace("'",", "|");
return newLine;
}
我对 Excel 生成的 CSV 文件遇到了完全相同的问题。问题是(这很好)如果字段内容包含分隔符,则内容将像您的示例中一样被引用(如果内容也包含引号字符,则加倍)。
我也没有使用现成的解析器,而是按如下方式实现它:
private string[] ParseLine(string line, char fieldSeparator, char? textSeparator)
{
List<string> items = new List<string>();
StringBuilder itemBuilder = new StringBuilder();
bool textSeparatorFound = false;
for (int i = 0; i < line.Length; i++)
{
// Get current character
char currentChar = line[i];
// In case it is a field separator...
if (currentChar == fieldSeparator)
{
// a) Did we recognize a quote before => Add the character to the item
if (textSeparatorFound)
{
itemBuilder.Append(currentChar);
}
// b) We're not within an open quote => We've finished a field
else
{
string item = itemBuilder.ToString();
itemBuilder.Remove(0, itemBuilder.Length);
// Replace doubled text separators
if (textSeparator != null)
{
string replaceWhat = String.Concat(textSeparator, textSeparator);
string replaceWith = textSeparator.ToString();
item = item.Replace(replaceWhat, replaceWith);
}
items.Add(item);
}
}
// If it is a quote character
else if (currentChar == textSeparator)
{
// a) If we have no open quotation, we open one
if (!textSeparatorFound)
{
textSeparatorFound = true;
}
// b) If we have an open quotation we have to decide whether to close it or not
else
{
// If this character is followed by the field separator or the end of the string,
// this ends a quoted block. Otherwise we just add it to the output to
// handle quoted quotes.
if (i < line.Length - 1 && line[i + 1] != fieldSeparator)
itemBuilder.Append(currentChar);
else
textSeparatorFound = false;
}
}
// All other characters are appended to the current item
else
itemBuilder.Append(currentChar);
}
// All other text is just appended
if (itemBuilder.Length > 0)
{
string item = itemBuilder.ToString();
itemBuilder.Remove(0, itemBuilder.Length);
// Remember to replace quoted quotes
if (textSeparator != null)
{
string replaceWhat = String.Concat(textSeparator, textSeparator);
string replaceWith = textSeparator.ToString();
item = item.Replace(replaceWhat, replaceWith);
}
items.Add(item.Trim());
}
return items.ToArray();
}
CSV的问题在于它是一种不规则的语言。这意味着字符具有不同的含义,具体取决于字符流中它们之前或之后的内容。正如您所看到的,使用字符串进行拆分。拆分方法无法正确识别用引号转义的字段中的逗号。
虽然可以使用正则表达式对CSV行进行粗略解析,并回顾和展望技术,但这些技术通常是有缺陷和缓慢的。这是因为正则表达式是为常规语言设计的。更好的方法是使用像这样的简单函数简单地解析字符:
using System;
using System.Collections.Generic;
using System.Text;
class Program
{
static void Main(string[] args)
{
IList<string> fields = ParseCSVLine("text,'"text with quote('"'") and comma (,)'",text");
foreach (string field in fields)
{
Console.WriteLine(field);
}
}
public static IList<string> ParseCSVLine(string csvLine)
{
List<string> result = new List<string>();
StringBuilder buffer = new StringBuilder(csvLine.Length);
bool inQuotes = false;
char lastChar = ''0';
foreach (char c in csvLine)
{
switch (c)
{
case '"':
if (inQuotes)
{
inQuotes = false;
}
else
{
// This next if handles the case where
// we have a doubled up quote
if (lastChar == '"')
{
buffer.Append('"');
}
inQuotes = true;
}
break;
case ',':
if (inQuotes)
{
buffer.Append(',');
}
else
{
result.Add(buffer.ToString());
buffer.Clear();
}
break;
default:
buffer.Append(c);
break;
}
lastChar = c;
}
result.Add(buffer.ToString());
return result;
}
}
以上输出:
text
text with quote(") and comma (,)
text