CSV 到 XML 的转换

本文关键字:转换 XML CSV | 更新日期: 2023-09-27 18:34:25

程序应该将csv文件作为输入和输出XML。守则如下

    private static void ConvertCSVToXML()
    {
        string[] source = File.ReadAllLines("data.csv");
        string RootNameStartTag = "<" + Path.GetFileName("''CSVTOXML''CSV-XML''bin''Debug''data.csv") + ">";
        RootNameStartTag = RootNameStartTag.Replace(".csv", "");
        string RootNameEndTag = RootNameStartTag.Insert(1, "/");
        StreamWriter writeFile = new StreamWriter("Output.xml");
        string[] headers = source[0].Split(',');
        source = source.Where(w => w != source[0]).ToArray();
        string[] fields = new string[] { };
        XElement xmlElement ;          
        for (int i = 0; i < source.Length; i++)
        {           
            writeFile.WriteLine(RootNameStartTag);
            fields = source[i].Split(',');                
            for (int j = 0; j < fields.Length; j++)
            {
                xmlElement = new XElement(new XElement(headers[j], fields[j]));
                writeFile.Write(xmlElement);
                writeFile.WriteLine();
            }
            writeFile.WriteLine(RootNameEndTag);
            fields = null;  
        }
    }

上述代码的唯一问题是它根据逗号 (,) 拆分数据,因此如果我在 csv 中有行A,"DEF,XYZ,GHI","FDNFB,dfhjd"

then field[0]=A field[1]="DEF field[3]=XYZ field[4]=GHI" field[

5]="FDNB field[6]=dfhjd"但我需要输出为字段[0]=A 字段[1]=DEF,XYZ,GHI 字段[2]=FDNFB,dfhjd请帮助我根据上述模式进行拆分

CSV 到 XML 的转换

.

Net 内置的TextFieldParser处理带引号的字段。 它位于Microsoft.VisualBasic.FileIO中,但可以从 c# 或任何其他 .Net 语言中使用。 以下测试代码:

public static class TextFieldParserTest
{
    public static void Test()
    {
        var csv = @"""DEF,XYZ,GHI"",""FDNFB,dfhjd""";
        using (var stream = new StringReader(csv))
        using (TextFieldParser parser = new TextFieldParser(stream))
        {
            parser.SetDelimiters(new string[] { "," });
            parser.HasFieldsEnclosedInQuotes = true; // Actually already default
            while (!parser.EndOfData)
            {
                string[] fields = parser.ReadFields();
                Debug.WriteLine(fields.Length);
                foreach (var field in fields)
                    Debug.WriteLine(field); 
            }
        }
    }
}

提供以下输出:

2
DEF,XYZ,GHI
FDNFB,dfhjd

请参阅以下解决方案 [ 当 CSV 同时包含字符和数字数据时,将 CSV 转换为 XML ]

他建议使用正则表达式来解析使用SplitCSV(line)而不是行的CSV行。Split(",")<</strong>

div class="answers">

Cinchoo ETL - 一个开源库简化了CSV到XML文件转换的过程。

对于示例 CSV:

Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC

使用下面的代码,您可以生成 Xml

string csv = @"Id, Name, City
    1, Tom, NY
    2, Mark, NJ
    3, Lou, FL
    4, Smith, PA
    5, Raj, DC";
StringBuilder sb = new StringBuilder();
using (var p = ChoCSVReader.LoadText(csv).WithFirstLineHeader())
{
    using (var w = new ChoXmlWriter(sb)
        .Configure(c => c.RootName = "Emps")
        .Configure(c => c.NodeName = "Emp")
        )
    {
        w.Write(p);
    }
}
Console.WriteLine(sb.ToString());

输出 XML:

<Emps>
  <Emp>
    <Id>1</Id>
    <Name>Tom</Name>
    <City>NY</City>
  </Emp>
  <Emp>
    <Id>2</Id>
    <Name>Mark</Name>
    <City>NJ</City>
  </Emp>
  <Emp>
    <Id>3</Id>
    <Name>Lou</Name>
    <City>FL</City>
  </Emp>
  <Emp>
    <Id>4</Id>
    <Name>Smith</Name>
    <City>PA</City>
  </Emp>
  <Emp>
    <Id>5</Id>
    <Name>Raj</Name>
    <City>DC</City>
  </Emp>
</Emps>

查看代码项目文章以获取更多帮助。

免责声明:我是这个库的作者。

这似乎是一个很好的选择,可能会对您的问题进行排序:http://msdn.microsoft.com/en-GB/library/bb387090.aspx

// Create the text file.
string csvString = @"GREAL,Great Lakes Food Market,Howard Snyder,Marketing Manager,(503) 555-7555,2732 Baker Blvd.,Eugene,OR,97403,USA
HUNGC,Hungry Coyote Import Store,Yoshi Latimer,Sales Representative,(503) 555-6874,City Center Plaza 516 Main St.,Elgin,OR,97827,USA
LAZYK,Lazy K Kountry Store,John Steel,Marketing Manager,(509) 555-7969,12 Orchestra Terrace,Walla Walla,WA,99362,USA
LETSS,Let's Stop N Shop,Jaime Yorres,Owner,(415) 555-5938,87 Polk St. Suite 5,San Francisco,CA,94117,USA";
File.WriteAllText("cust.csv", csvString);
// Read into an array of strings.
string[] source = File.ReadAllLines("cust.csv");
XElement cust = new XElement("Root",
    from str in source
    let fields = str.Split(',')
    select new XElement("Customer",
        new XAttribute("CustomerID", fields[0]),
        new XElement("CompanyName", fields[1]),
        new XElement("ContactName", fields[2]),
        new XElement("ContactTitle", fields[3]),
        new XElement("Phone", fields[4]),
        new XElement("FullAddress",
            new XElement("Address", fields[5]),
            new XElement("City", fields[6]),
            new XElement("Region", fields[7]),
            new XElement("PostalCode", fields[8]),
            new XElement("Country", fields[9])
        )
    )
);
Console.WriteLine(cust);

此代码生成以下输出:

Xml
    <Root>
      <Customer CustomerID="GREAL">
        <CompanyName>Great Lakes Food Market</CompanyName>
        <ContactName>Howard Snyder</ContactName>
        <ContactTitle>Marketing Manager</ContactTitle>
        <Phone>(503) 555-7555</Phone>
        <FullAddress>
          <Address>2732 Baker Blvd.</Address>
          <City>Eugene</City>
          <Region>OR</Region>
          <PostalCode>97403</PostalCode>
          <Country>USA</Country>
        </FullAddress>
      </Customer>
      <Customer CustomerID="HUNGC">
        <CompanyName>Hungry Coyote Import Store</CompanyName>
        <ContactName>Yoshi Latimer</ContactName>
        <ContactTitle>Sales Representative</ContactTitle>
        <Phone>(503) 555-6874</Phone>
        <FullAddress>
          <Address>City Center Plaza 516 Main St.</Address>
          <City>Elgin</City>
          <Region>OR</Region>
          <PostalCode>97827</PostalCode>
          <Country>USA</Country>
        </FullAddress>
      </Customer>
      <Customer CustomerID="LAZYK">
        <CompanyName>Lazy K Kountry Store</CompanyName>
        <ContactName>John Steel</ContactName>
        <ContactTitle>Marketing Manager</ContactTitle>
        <Phone>(509) 555-7969</Phone>
        <FullAddress>
          <Address>12 Orchestra Terrace</Address>
          <City>Walla Walla</City>
          <Region>WA</Region>
          <PostalCode>99362</PostalCode>
          <Country>USA</Country>
        </FullAddress>
      </Customer>
      <Customer CustomerID="LETSS">
        <CompanyName>Let's Stop N Shop</CompanyName>
        <ContactName>Jaime Yorres</ContactName>
        <ContactTitle>Owner</ContactTitle>
        <Phone>(415) 555-5938</Phone>
        <FullAddress>
          <Address>87 Polk St. Suite 5</Address>
          <City>San Francisco</City>
          <Region>CA</Region>
          <PostalCode>94117</PostalCode>
          <Country>USA</Country>
        </FullAddress>
      </Customer>
    </Root>

编辑我以前没有看到第一个问题。首先对 CSV 进行一些预处理,替换列分隔符。

使用这个:

    var filePath = "Your csv file path here including name";
    var newFilePath = filePath + ".tmp";
    using (StreamReader vReader = new StreamReader(filePath))
    {
        using (StreamWriter vWriter = new StreamWriter(newFilePath, false, Encoding.ASCII))
        {
            int vLineNumber = 0;
            while (!vReader.EndOfStream)
            {
                string vLine = vReader.ReadLine();
                vWriter.WriteLine(ReplaceLine(vLine, vLineNumber++));
            }
        }
    }
    File.Delete(filePath);
    File.Move(newFilePath, filePath);
    Dts.TaskResult = (int)ScriptResults.Success;
}
protected string ReplaceLine(string Line, int LineNumber)
{
    var newLine = Line.Replace("'",'"", "|");
    newLine = newLine.Replace(",'"", "|");
    newLine = newLine.Replace("'",", "|");
    return newLine;
}

我对 Excel 生成的 CSV 文件遇到了完全相同的问题。问题是(这很好)如果字段内容包含分隔符,则内容将像您的示例中一样被引用(如果内容也包含引号字符,则加倍)。

我也没有使用现成的解析器,而是按如下方式实现它:

    private string[] ParseLine(string line, char fieldSeparator, char? textSeparator)
    {
        List<string> items = new List<string>();
        StringBuilder itemBuilder = new StringBuilder();
        bool textSeparatorFound = false;
        for (int i = 0; i < line.Length; i++)
        {
            // Get current character
            char currentChar = line[i];
            // In case it is a field separator...
            if (currentChar == fieldSeparator)
            {
                // a) Did we recognize a quote before => Add the character to the item
                if (textSeparatorFound)
                {
                    itemBuilder.Append(currentChar);
                }
                // b) We're not within an open quote => We've finished a field
                else
                {
                    string item = itemBuilder.ToString();
                    itemBuilder.Remove(0, itemBuilder.Length);
                    // Replace doubled text separators
                    if (textSeparator != null)
                    {
                        string replaceWhat = String.Concat(textSeparator, textSeparator);
                        string replaceWith = textSeparator.ToString();
                        item = item.Replace(replaceWhat, replaceWith);
                    }
                    items.Add(item);
                }
            }
            // If it is a quote character
            else if (currentChar == textSeparator)
            {
                // a) If we have no open quotation, we open one
                if (!textSeparatorFound)
                {
                    textSeparatorFound = true;
                }
                // b) If we have an open quotation we have to decide whether to close it or not
                else
                {
                    // If this character is followed by the field separator or the end of the string, 
                    // this ends a quoted block. Otherwise we just add it to the output to
                    // handle quoted quotes.
                    if (i < line.Length - 1 && line[i + 1] != fieldSeparator)
                        itemBuilder.Append(currentChar);
                    else
                        textSeparatorFound = false;
                }
            }
            // All other characters are appended to the current item
            else
                itemBuilder.Append(currentChar);
        }
        // All other text is just appended
        if (itemBuilder.Length > 0)
        {
            string item = itemBuilder.ToString();
            itemBuilder.Remove(0, itemBuilder.Length);
            // Remember to replace quoted quotes
            if (textSeparator != null)
            {
                string replaceWhat = String.Concat(textSeparator, textSeparator);
                string replaceWith = textSeparator.ToString();
                item = item.Replace(replaceWhat, replaceWith);
            }
            items.Add(item.Trim());
        }
        return items.ToArray();
    }

CSV的问题在于它是一种不规则的语言。这意味着字符具有不同的含义,具体取决于字符流中它们之前或之后的内容。正如您所看到的,使用字符串进行拆分。拆分方法无法正确识别用引号转义的字段中的逗号。

虽然可以使用正则表达式对CSV行进行粗略解析,并回顾和展望技术,但这些技术通常是有缺陷和缓慢的。这是因为正则表达式是为常规语言设计的。更好的方法是使用像这样的简单函数简单地解析字符:

using System;
using System.Collections.Generic;
using System.Text;
    class Program
    {
        static void Main(string[] args)
        {
            IList<string> fields = ParseCSVLine("text,'"text with quote('"'") and comma (,)'",text");
            foreach (string field in fields)
            {
                Console.WriteLine(field);
            }
        }
        public static IList<string> ParseCSVLine(string csvLine)
        {
            List<string> result = new List<string>();
            StringBuilder buffer = new StringBuilder(csvLine.Length);
            bool inQuotes = false;
            char lastChar = ''0';
            foreach (char c in csvLine)
            {
                switch (c)
                {
                    case '"':
                        if (inQuotes)
                        {
                            inQuotes = false;
                        }
                        else
                        {
                            // This next if handles the case where 
                            // we have a doubled up quote
                            if (lastChar == '"')
                            {
                                buffer.Append('"');
                            }
                            inQuotes = true;
                        }
                        break;
                    case ',':
                        if (inQuotes)
                        {
                            buffer.Append(',');
                        }
                        else
                        {
                            result.Add(buffer.ToString());
                            buffer.Clear();
                        }
                        break;
                    default:
                        buffer.Append(c);
                        break;
                }
                lastChar = c;
            }
            result.Add(buffer.ToString());
            return result;
        }
    }

以上输出:

text
text with quote(") and comma (,)
text