正在从csv文件中读取

本文关键字:读取 文件 csv | 更新日期: 2023-09-27 18:22:48

用户输入是一个csv文件,其数据如下:

SiteID,Format,Title,Category,Quantity,StartPrice,BuyItNowPrice,Duration,Description,PicURL
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 1,14111,1,,341,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 2,14111,1,,342,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 3,14111,1,,343,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 4,14111,1,,344,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"

这正是一个csv文件数据

为了读取这些数据,我尝试了以下代码:

private static List<TempBulkCSVItems> ProcessCSV(string fileName)
    {
        List<TempBulkCSVItems> tbcil = new List<TempBulkCSVItems>();
        //Set up our variables 
        string Feedback = string.Empty;
        string line = string.Empty;
        string[] strArray;
        DataTable dt = new DataTable();
        DataRow row;
        // work out where we should split on comma, but not in a sentance
        Regex r = new Regex(",(?=(?:[^'"]*'"[^'"]*'")*(?![^'"]*'"))");
        //Set the filename in to our stream
        StreamReader sr = new StreamReader(fileName);
        //Read the first line and split the string at , with our regular express in to an array
        line = sr.ReadLine();
        strArray = r.Split(line);
        //For each item in the new split array, dynamically builds our Data columns. Save us having to worry about it.
        Array.ForEach(strArray, s => dt.Columns.Add(new DataColumn()));

        //Read each line in the CVS file until it's empty
        while ((line = sr.ReadLine()) != null)
        {
            line = line.Replace(",'"", ",").Replace("'"", "");
            if (line.Length > 0)
            {
                row = dt.NewRow();
                //add our current value to our data row
                row.ItemArray = r.Split(line);
                dt.Rows.Add(row);
                TempBulkCSVItems tbci = new TempBulkCSVItems();
                tbci.SiteID = row[0].ToString();
                tbci.Format = row[1].ToString();
                tbci.Title = row[2].ToString();
                tbci.Category = row[3].ToString();
                tbci.Quantity = row[4].ToString();
                tbci.StartPrice = row[5].ToString();
                tbci.BuyItNowPrice = row[6].ToString();
                tbci.Duration = row[7].ToString();
                tbci.Description = row[8].ToString();
                tbci.PicURL = row[9].ToString();
                tbcil.Add(tbci);
            }
        }
        //Tidy Streameader up
        sr.Dispose();
        //return a the new DataTable
        return tbcil;
    }

我发现的问题是,我正在阅读readline()的一行。另一点是描述和PicURL字段包含multiple line和多个comma (,)。这就是为什么它违反了我的代码。但在这种情况下,解决方案是什么?

正在从csv文件中读取

您可以读取CSV文件中的值,如下所示:

using (TextFieldParser parser = new TextFieldParser(@"c:'temp'test.csv"))
{
    parser.TextFieldType = FieldType.Delimited;
    parser.SetDelimiters(",");
    while (!parser.EndOfData) 
    {
        //Processing row
        string[] fields = parser.ReadFields();
        foreach (string field in fields) 
        {
            //TODO: Process field
        }
    }
}

请参阅:使用C#读取CSV文件

以下是可以用于进行这种类型解析的内容。

public static IEnumerable<IList<string>> ParseDelimitedLines(
    this IEnumerable<string> lines,
    char delimiter,
    char? singleEscape,
    char? beginEndEscape)
{
    var row = new List<string>();
    var currentItem = new StringBuilder();
    bool previousSingleEscape = false;
    bool insideEscape = false;
    bool needsAppendLine = false;
    foreach (var line in lines)
    {
        previousSingleEscape = false;
        if (needsAppendLine)
        {
            currentItem.AppendLine();
            needsAppendLine = false;
        }
        foreach (char c in line)
        {
            if (c == beginEndEscape && !previousSingleEscape)
            {
                insideEscape = !insideEscape;
            }
            if (c == delimiter && !previousSingleEscape && !insideEscape)
            {
                row.Add(currentItem.ToString());
                currentItem.Clear();
                continue;
            }
            previousSingleEscape = c == singleEscape && !previousSingleEscape;
            if(!previousSingleEscape)
                currentItem.Append(c);
        }
        if (!insideEscape && !previousSingleEscape)
        {
            row.Add(currentItem.ToString());
            yield return row;
            row = new List<string>();
            currentItem.Clear();
        }
        else
        {
            needsAppendLine = true;
        }
    }
    if (insideEscape || previousSingleEscape)
    {
        row.Add(currentItem.ToString());
        yield return row;
    }
}

接下来的

string text = @"This,is,simple,stuff
Now,""it,gets"",harder
But,wait',there,'""is,more'""
And,this'
way,to,do,newline
And,""another
way"",fin";
int r = 0;
foreach (
    var row in text.Split(new[] { Environment.NewLine }, StringSplitOptions.None).ParseDelimitedLines(',', '''', '"'))
{
    Console.WriteLine("Row " + ++r);
    int c = 0;
    foreach (var item in row)
    {
        Console.WriteLine("Column " + ++c + ": <<" + item + ">>");
    }
}

将输出

Row 1
Column 1: <<This>>
Column 2: <<is>>
Column 3: <<simple>>
Column 4: <<stuff>>
Row 2
Column 1: <<Now>>
Column 2: <<"it,gets">>
Column 3: <<harder>>
Row 3
Column 1: <<But>>
Column 2: <<wait,there>>
Column 3: <<"is>>
Column 4: <<more">>
Row 4
Column 1: <<And>>
Column 2: <<this
way>>
Column 3: <<to>>
Column 4: <<do>>
Column 5: <<newline>>
Row 5
Column 1: <<And>>
Column 2: <<"another
way">>
Column 3: <<fin>>

你可以像这个一样使用它

var rows = File.ReadLines("yourFile.txt").ParseDelimitedLines(',', '''', '"');
foreach(var row in rows)
{
    string column1 = row[0];
    ...
}