如何通过 LINQ 直接从 CSV 文件中查询和提取数据

本文关键字:查询 提取 数据 文件 CSV LINQ 何通过 | 更新日期: 2023-09-27 17:56:42

我几乎不使用LINQ,在LINQ中也不好。

我开发了显示CSR明智和时间间隔明智数据的输出像下面一样

+----------+--------------------+----------+----------+---------------+-------------+
| CSR Name |   Time Interval    | Incoming | OutGoing | Call Transfer | Missed Call |
+----------+--------------------+----------+----------+---------------+-------------+
| ACCTS    | 14:30:01--15:00:00 |        0 |        0 |             0 |           0 |
| ACCTS    | 15:00:01--15:30:00 |        0 |        0 |             1 |           0 |
| ACCTS    | 13:30:01--14:00:00 |        5 |        2 |             0 |           2 |
| ACCTS    | 14:00:01--14:30:00 |        1 |        0 |             0 |           0 |
| ACCTS    | 16:30:01--17:00:00 |        0 |        3 |             0 |           0 |
| ACCTS    | 17:00:01--17:30:00 |        4 |        0 |             1 |           2 |
| Christy  | 14:30:01--15:00:00 |        1 |        5 |             0 |           0 |
| Christy  | 15:00:01--15:30:00 |        2 |        7 |             1 |           0 |
| Christy  | 13:30:01--14:00:00 |        0 |       11 |             0 |           2 |
| SUZY     | 14:30:01--15:00:00 |        1 |        0 |             0 |           0 |
| SUZY     | 15:00:01--15:30:00 |        0 |        0 |             2 |           2 |
| SUZY     | 13:30:01--14:00:00 |        2 |        1 |             0 |           0 |
+----------+--------------------+----------+----------+---------------+-------------+

在这里,我给出了我的 C# 程序代码,它带来了上述输出。

private void FetchData(string tableName)
{
    TimeSpan tsStart, tsEnd;
    string strSql = "";
    srcTable = new DataTable();
    srcTable.TableName = "data";
    srcTable.Columns.Add("CSR Name");
    srcTable.Columns.Add("Time");
    srcTable.Columns.Add("Incoming Calls");
    srcTable.Columns.Add("Outgoing Calls");
    srcTable.Columns.Add("Calls Transfer");
    srcTable.Columns.Add("Missed Calls");
    DataRow dr = null;
    int incall = 0, outcall = 0, transfercall = 0, totmisscall = 0;
    bool flag = true;
    string StartTime = "", EndTime = "";
    string DayOfWeek = DateTime.Parse(dtVal.Value.ToString()).ToString("ddd");
    string st_Time = string.Empty, end_Time = string.Empty;
    
    st_Time="08:00:00";
    end_Time="17:30:00";
    
    // GetAllCSR function return datatable with all distinct CSR name excluding name start with VM and Voice Mail
    DataTable dtCSRName = GetAllCSR(tableName,txtCSRName.Text);
    string strCSRName = "";
    if (dtCSRName != null)
    {
        foreach (DataRow row in dtCSRName.Rows)
        {
            if (st_Time.Trim() != "" && end_Time.Trim() != "")
            {
                tsStart = new TimeSpan(DateTime.Parse(st_Time.Trim()).Hour, DateTime.Parse(st_Time.Trim()).Minute, DateTime.Parse(st_Time.Trim()).Second);
                tsEnd = new TimeSpan(DateTime.Parse(end_Time.Trim()).Hour, DateTime.Parse(end_Time.Trim()).Minute, DateTime.Parse(end_Time.Trim()).Second);
            }
            else
            {
                tsStart = new TimeSpan(09, 00, 00);
                tsEnd = new TimeSpan(17, 30, 0);
            }
    
    // iterate in all CSR name
            if (row["party1name"] != DBNull.Value)
            {
                strCSRName = row["party1name"].ToString();
                
                // iterate in all time interval like 08:00:00 to 08:30:00, 08:30:00 to 09:00:00, 09:00:00 to 09:30:00.....17:00:00 to 17:30:00
                while (tsStart <= tsEnd)
                {
                    if (!flag)
                    {
                        tsStart = new TimeSpan(tsStart.Hours, tsStart.Minutes, int.Parse("01"));
                    }
                    flag = false;
                    StartTime = tsStart.ToString();
                    tsStart = tsStart.Add(new TimeSpan(00, 30, 00));
                    EndTime = (tsStart.Hours >= 10 ? tsStart.Hours.ToString() : ("0" + tsStart.Hours.ToString())) + ":" + (tsStart.Minutes >= 10 ? tsStart.Minutes.ToString() : ("0" + tsStart.Minutes.ToString())) + ":00";
                    strSql = "select (select count(*) as incoming from " + tableName + " where direction='I' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and Is_Internal=0 and continuation=0 and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as incoming, ";
                    strSql = strSql + "(select count(*) as OutGoing from " + tableName + " ";
                    strSql = strSql + "where direction='O' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and Is_Internal=0 and continuation=0  and party1name not in ('Voice Mail') ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as OutGoing, ";
                    strSql = strSql + "(select count(*) as CallTransfer from " + tableName + " ";
                    strSql = strSql + "where continuation=1  and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' and party1name not in ('Voice Mail') ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as CallTransfer; ";
                    strSql = strSql + "SELECT count(*) as UnansweredCalls_DuringBusinessHours from ";
                    strSql = strSql + tableName + " where direction='I' and " + Environment.NewLine;
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)= '00:00:00' and [Ring duration]>0 " + Environment.NewLine;
                    strSql = strSql + "and party1name='" + strCSRName + "'" + Environment.NewLine;
                    if (Business.CurrentCountry.CountryCode == "US" || Business.CurrentCountry.CountryCode == "MX" || Business.CurrentCountry.CountryCode == "ES" || Business.CurrentCountry.CountryCode == "NL" || Business.CurrentCountry.CountryCode == "PL")
                    {
                        strSql = strSql + " and Park_Time=0";
                    }
                    strSql = strSql + Environment.NewLine;
                    DataSet oDS = Common.GetDataSet(strSql, "");
                    if (oDS.Tables.Count > 0)
                    {
                        if (oDS.Tables[0].Rows.Count > 0)
                        {
                            dr = srcTable.NewRow();
                            dr[0] = strCSRName;
                            dr[1] = StartTime + "--" + EndTime;
                            if (oDS.Tables[0].Rows[0]["incoming"] != DBNull.Value)
                            {
                                dr[2] = oDS.Tables[0].Rows[0]["incoming"].ToString();
                                incall = incall + int.Parse(oDS.Tables[0].Rows[0]["incoming"].ToString());
                            }
                            else
                            {
                                dr[2] = "0";
                            }
                            if (oDS.Tables[0].Rows[0]["OutGoing"] != DBNull.Value)
                            {
                                dr[3] = oDS.Tables[0].Rows[0]["OutGoing"].ToString();
                                outcall = outcall + int.Parse(oDS.Tables[0].Rows[0]["OutGoing"].ToString());
                            }
                            else
                            {
                                dr[3] = "0";
                            }
                            if (oDS.Tables[0].Rows[0]["CallTransfer"] != DBNull.Value)
                            {
                                dr[4] = oDS.Tables[0].Rows[0]["CallTransfer"].ToString();
                                transfercall = transfercall + int.Parse(oDS.Tables[0].Rows[0]["CallTransfer"].ToString());
                            }
                            else
                            {
                                dr[4] = "0";
                            }
                        }
                        if (oDS.Tables[1].Rows.Count > 0)
                        {
                            if (oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"] != DBNull.Value)
                            {
                                dr[5] = oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString();
                                totmisscall = totmisscall + int.Parse(oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString());
                            }
                        }
                        srcTable.Rows.Add(dr);
                    }
                }
            }
        }
    }
    txtIncoming.Text = incall.ToString();
    txtOutGoing.Text = outcall.ToString();
    txtCallTransfer.Text = transfercall.ToString();
    txtMissCall.Text = totmisscall.ToString();
    //srcTable.DefaultView.Sort = "[CSR Name], [Time]";
    DataSet oDs = new DataSet();
    oDs.Tables.Add(srcTable);
    this.outlookGrid1.ExpandIcon = global::BBA.Properties.Resources.Collapse;
    this.outlookGrid1.CollapseIcon = global::BBA.Properties.Resources.Expand;
    
    if (srcTable.Rows.Count > 0)
    {
        outlookGrid1.BindData(oDs, "data");
        View = "BoundCategory";
        DataGridViewCellEventArgs evt = new DataGridViewCellEventArgs(0, -1);
        object sender = new object();
        dgResult_CellClick(sender, evt);
        outlookGrid1.ExpandAll();
    }
    else
    {
        outlookGrid1.Rows.Clear();
        MessageBox.Show("No data found");
    }
}

那我在做什么?我首先读取一个 CSV 文件,然后将 CSV 文件数据插入数据库表,然后使用条件获取这些数据并在 2 个循环中形成实际数据。

上面的代码和过程工作正常但速度很慢。我被告知只使用 LINQ 来读取 csf 文件数据并形成输出。我在 LINQ 中很弱,所以需要您的帮助来形成正确的 linq 查询以获得准确的输出。

通过这种方式,到目前为止,我尝试形成 LINQ 查询,但老实说无法正确安排它。这是我的 linq 查询,需要添加更多内容才能成为完整代码。在这里,我正在粘贴我的工作linq查询,该查询很慢,但不会抛出错误。

void Main()
{
    var csvlines = File.ReadAllLines(@"M:'smdr(backup08-06-2015).csv");
    var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());
    
    var users = csvLinesData.Select(data => new User
    {
        CSRName = data[12],
        Incoming = csvLinesData.Count(w => w[4] == "I" 
                                        &&  w[8] == "0"
                                        &&  w[10] == "0"
                                        &&  w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),
                                        
        outgoing = csvLinesData.Count(w => w[4] == "O"
                                        && w[8] == "0"
                                        && w[10] == "0"
                                        &&  w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),
    })
    .Where(u => !u.CSRName.Contains("VM") && u.CSRName != "Voice Mail").OrderBy(u=> u.CSRName)
    .ToList();
    users.Dump();
}
class User
{
    public string CSRName;
    public int outgoing;
    public int Incoming;
    
}

因此,我请求所有 LINQ 专家查看我的 C# 代码并帮助我编写完整的 linq 查询,该查询无需循环迭代即可带来相同的输出。

这是我的 CSV 文件 URL http://s000.tinyupload.com/?file_id=22225722172003614028任何人都可以从中下载。 只需点击测试.csv文件链接,如果有人感兴趣。 谢谢

如何通过 LINQ 直接从 CSV 文件中查询和提取数据

我个人并不认为 Linq 会比一个格式良好的 for 循环快得多。(除非您正在利用PLinq)我认为更大的问题是您的数据模型可能需要不同的设计。我要做的是创建一个仅表示行及其重要详细信息的新类。

class CallInformation {
    public DateTime CallStart { get; private set; }
    public Boolean IsOutGoing {get; private set; }
    public String CSRName {get; private set; }
    public int InComingCount { get; set; }
    public int OutgoingCount { get; set; }
    public CallInformation(String[] parts) {
        IsOutGoing = parts[4] == "O";
        CallStart = DateTime.Parse(parts[0]);
        CSRName = parts[12];
    }
    //... Continue with the important properties
}

现在 IO 流也可以提高性能,但为了清楚起见,我将排除它。在进行排序和组合项目时通常比搜索要快得多。如果可以避免繁重的字符串比较/创建,则通常可以大大提高性能。

从你的这一部分

Dictionary<String, CallInformation> callDictionary = new Dictionary<String,CallInformation>();
var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());
foreach(string[] parts in csvLinesData) {
     //Then place this call into a sortedlist or Dictionary. 
     //Here i am counting up the incoming and outgoing calls. 
     if(callDictionary.containsKey(parts[12])) {
         if(parts[4] == "I") {
             callDictionary[parts[12]].InComingCount++;
         } else { 
             callDictionary[parts[12]].OutGoingCount++; 
         }
     } else {
         //Construct your new object based on this row.
         CallInformation call = new CallInformation(parts);
         callDictionary.add(call.CSRName, call);
     }
}

排序列表可以根据"开始时间"或其他字段进行排序。 控制您的输出。我个人很难理解您正在分组和处理的确切方法。我个人认为,您的表现是基于这样一个事实,即您正在查询数据集以获取永远不会存在的信息。但是请让我知道您如何对信息进行分组。我认为这更多的是您将如何对信息进行分组。

如果您尝试根据调用的持续时间来设置调用,那么您可以使用 DateTime 部分来确定将其放入字典中的哪个"存储桶"中,而不是我上面所做的 CSR 名称(通过调整call.CSRName,containsKey[12]),您可以创建持续时间字符串。通过使用日期时间对象,您可以基于时差而不是字符串比较进行比较。

若要从此集合中提取数据,实际上需要使用 Linq 来执行该

像这样的查询

 callDictionary.Where((keyValuePair) => { return keyValuePair.Value.CSRName != "VM" && keyValuePair.Value.CSRName != "VoiceMail"; }).OrderBy(o.Value.StartTime);

OrderBy 子句实际上取决于您的外观 由于我们已经"反转"了事物,因此我们可以按开始时间排序,为了获得更精细的控制,您可能希望StartTime是 TimeDuration 的开始。 任何 1 个特定项目,它将大致正确。您还可以添加另一个 OrderBy 以按CSRName对其进行排序。

类似于下面的代码片段:

private static IEnumerable<MyData> Parse(String csvFile, bool hasHeaderLine = false, char separator = ',') {
    return Read(csvFile)
        .Skip(hasHeaderLine ? 1 : 0)
        .Select(line => line.Split(separator, StringSplitOptions.None))
        .Select(parts => new MyData(
            parts[0],
            DateTime.Parse(parts[1]),
            TimeSpan.Parse(parts[2]),
            uint.Parse(parts[3]),
            uint.Parse(parts[4]),
            uint.Parse(parts[5]),
            uint.Parse(parts[6])
        ));
}
private static IEnumerable<String> Read(String csvFile) {
    using (var reader = new StreamReader(csvFile)) {
        String line;
        while ((line = reader.ReadLine()) != null)
            yield return line;
    }
}
private class MyData {
    private readonly string name;
    private readonly DateTime startTime;
    private readonly TimeSpan duration;
    private readonly uint incomingCalls, outgoingCalls, callTransfers, missedCalls;
    
    public MyData(string name, DateTime startTime, TimeSpan duration, uint incomingCalls, uint outgoingCalls, uint callTransfers, uint missedCalls) {
        this.name = name;
        this.startTime = startTime;
        this.duration = duration;
        this.incomingCalls = incomingCalls;
        this.outgoingCalls = outgoingCalls;
        this.callTransfers = callTransfers;
        this.missedCalls = missedCalls;
    }
    public string Name => this.name;
    public DateTime StartTime => this.startTime;
    public TimeSpan Duration => this.duration;
    public uint IncomingCalls => this.incomingCalls;
    public uint OutgoingCalls => this.outgoingCalls;
    public uint CallTransfers => this.callTransfers;
    public uint MissedCalls => this.missedCalls;
    public DateTime EndTime => this.startTime + this.duration;
}