如何通过 LINQ 直接从 CSV 文件中查询和提取数据
本文关键字:查询 提取 数据 文件 CSV LINQ 何通过 | 更新日期: 2023-09-27 17:56:42
我几乎不使用LINQ,在LINQ中也不好。
我开发了显示CSR明智和时间间隔明智数据的输出像下面一样
+----------+--------------------+----------+----------+---------------+-------------+
| CSR Name | Time Interval | Incoming | OutGoing | Call Transfer | Missed Call |
+----------+--------------------+----------+----------+---------------+-------------+
| ACCTS | 14:30:01--15:00:00 | 0 | 0 | 0 | 0 |
| ACCTS | 15:00:01--15:30:00 | 0 | 0 | 1 | 0 |
| ACCTS | 13:30:01--14:00:00 | 5 | 2 | 0 | 2 |
| ACCTS | 14:00:01--14:30:00 | 1 | 0 | 0 | 0 |
| ACCTS | 16:30:01--17:00:00 | 0 | 3 | 0 | 0 |
| ACCTS | 17:00:01--17:30:00 | 4 | 0 | 1 | 2 |
| Christy | 14:30:01--15:00:00 | 1 | 5 | 0 | 0 |
| Christy | 15:00:01--15:30:00 | 2 | 7 | 1 | 0 |
| Christy | 13:30:01--14:00:00 | 0 | 11 | 0 | 2 |
| SUZY | 14:30:01--15:00:00 | 1 | 0 | 0 | 0 |
| SUZY | 15:00:01--15:30:00 | 0 | 0 | 2 | 2 |
| SUZY | 13:30:01--14:00:00 | 2 | 1 | 0 | 0 |
+----------+--------------------+----------+----------+---------------+-------------+
在这里,我给出了我的 C# 程序代码,它带来了上述输出。
private void FetchData(string tableName)
{
TimeSpan tsStart, tsEnd;
string strSql = "";
srcTable = new DataTable();
srcTable.TableName = "data";
srcTable.Columns.Add("CSR Name");
srcTable.Columns.Add("Time");
srcTable.Columns.Add("Incoming Calls");
srcTable.Columns.Add("Outgoing Calls");
srcTable.Columns.Add("Calls Transfer");
srcTable.Columns.Add("Missed Calls");
DataRow dr = null;
int incall = 0, outcall = 0, transfercall = 0, totmisscall = 0;
bool flag = true;
string StartTime = "", EndTime = "";
string DayOfWeek = DateTime.Parse(dtVal.Value.ToString()).ToString("ddd");
string st_Time = string.Empty, end_Time = string.Empty;
st_Time="08:00:00";
end_Time="17:30:00";
// GetAllCSR function return datatable with all distinct CSR name excluding name start with VM and Voice Mail
DataTable dtCSRName = GetAllCSR(tableName,txtCSRName.Text);
string strCSRName = "";
if (dtCSRName != null)
{
foreach (DataRow row in dtCSRName.Rows)
{
if (st_Time.Trim() != "" && end_Time.Trim() != "")
{
tsStart = new TimeSpan(DateTime.Parse(st_Time.Trim()).Hour, DateTime.Parse(st_Time.Trim()).Minute, DateTime.Parse(st_Time.Trim()).Second);
tsEnd = new TimeSpan(DateTime.Parse(end_Time.Trim()).Hour, DateTime.Parse(end_Time.Trim()).Minute, DateTime.Parse(end_Time.Trim()).Second);
}
else
{
tsStart = new TimeSpan(09, 00, 00);
tsEnd = new TimeSpan(17, 30, 0);
}
// iterate in all CSR name
if (row["party1name"] != DBNull.Value)
{
strCSRName = row["party1name"].ToString();
// iterate in all time interval like 08:00:00 to 08:30:00, 08:30:00 to 09:00:00, 09:00:00 to 09:30:00.....17:00:00 to 17:30:00
while (tsStart <= tsEnd)
{
if (!flag)
{
tsStart = new TimeSpan(tsStart.Hours, tsStart.Minutes, int.Parse("01"));
}
flag = false;
StartTime = tsStart.ToString();
tsStart = tsStart.Add(new TimeSpan(00, 30, 00));
EndTime = (tsStart.Hours >= 10 ? tsStart.Hours.ToString() : ("0" + tsStart.Hours.ToString())) + ":" + (tsStart.Minutes >= 10 ? tsStart.Minutes.ToString() : ("0" + tsStart.Minutes.ToString())) + ":00";
strSql = "select (select count(*) as incoming from " + tableName + " where direction='I' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
strSql = strSql + "and Is_Internal=0 and continuation=0 and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' ";
strSql = strSql + "and party1name='" + strCSRName + "') as incoming, ";
strSql = strSql + "(select count(*) as OutGoing from " + tableName + " ";
strSql = strSql + "where direction='O' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
strSql = strSql + "and Is_Internal=0 and continuation=0 and party1name not in ('Voice Mail') ";
strSql = strSql + "and party1name='" + strCSRName + "') as OutGoing, ";
strSql = strSql + "(select count(*) as CallTransfer from " + tableName + " ";
strSql = strSql + "where continuation=1 and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' and party1name not in ('Voice Mail') ";
strSql = strSql + "and party1name='" + strCSRName + "') as CallTransfer; ";
strSql = strSql + "SELECT count(*) as UnansweredCalls_DuringBusinessHours from ";
strSql = strSql + tableName + " where direction='I' and " + Environment.NewLine;
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)= '00:00:00' and [Ring duration]>0 " + Environment.NewLine;
strSql = strSql + "and party1name='" + strCSRName + "'" + Environment.NewLine;
if (Business.CurrentCountry.CountryCode == "US" || Business.CurrentCountry.CountryCode == "MX" || Business.CurrentCountry.CountryCode == "ES" || Business.CurrentCountry.CountryCode == "NL" || Business.CurrentCountry.CountryCode == "PL")
{
strSql = strSql + " and Park_Time=0";
}
strSql = strSql + Environment.NewLine;
DataSet oDS = Common.GetDataSet(strSql, "");
if (oDS.Tables.Count > 0)
{
if (oDS.Tables[0].Rows.Count > 0)
{
dr = srcTable.NewRow();
dr[0] = strCSRName;
dr[1] = StartTime + "--" + EndTime;
if (oDS.Tables[0].Rows[0]["incoming"] != DBNull.Value)
{
dr[2] = oDS.Tables[0].Rows[0]["incoming"].ToString();
incall = incall + int.Parse(oDS.Tables[0].Rows[0]["incoming"].ToString());
}
else
{
dr[2] = "0";
}
if (oDS.Tables[0].Rows[0]["OutGoing"] != DBNull.Value)
{
dr[3] = oDS.Tables[0].Rows[0]["OutGoing"].ToString();
outcall = outcall + int.Parse(oDS.Tables[0].Rows[0]["OutGoing"].ToString());
}
else
{
dr[3] = "0";
}
if (oDS.Tables[0].Rows[0]["CallTransfer"] != DBNull.Value)
{
dr[4] = oDS.Tables[0].Rows[0]["CallTransfer"].ToString();
transfercall = transfercall + int.Parse(oDS.Tables[0].Rows[0]["CallTransfer"].ToString());
}
else
{
dr[4] = "0";
}
}
if (oDS.Tables[1].Rows.Count > 0)
{
if (oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"] != DBNull.Value)
{
dr[5] = oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString();
totmisscall = totmisscall + int.Parse(oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString());
}
}
srcTable.Rows.Add(dr);
}
}
}
}
}
txtIncoming.Text = incall.ToString();
txtOutGoing.Text = outcall.ToString();
txtCallTransfer.Text = transfercall.ToString();
txtMissCall.Text = totmisscall.ToString();
//srcTable.DefaultView.Sort = "[CSR Name], [Time]";
DataSet oDs = new DataSet();
oDs.Tables.Add(srcTable);
this.outlookGrid1.ExpandIcon = global::BBA.Properties.Resources.Collapse;
this.outlookGrid1.CollapseIcon = global::BBA.Properties.Resources.Expand;
if (srcTable.Rows.Count > 0)
{
outlookGrid1.BindData(oDs, "data");
View = "BoundCategory";
DataGridViewCellEventArgs evt = new DataGridViewCellEventArgs(0, -1);
object sender = new object();
dgResult_CellClick(sender, evt);
outlookGrid1.ExpandAll();
}
else
{
outlookGrid1.Rows.Clear();
MessageBox.Show("No data found");
}
}
那我在做什么?我首先读取一个 CSV 文件,然后将 CSV 文件数据插入数据库表,然后使用条件获取这些数据并在 2 个循环中形成实际数据。
上面的代码和过程工作正常但速度很慢。我被告知只使用 LINQ 来读取 csf 文件数据并形成输出。我在 LINQ 中很弱,所以需要您的帮助来形成正确的 linq 查询以获得准确的输出。
通过这种方式,到目前为止,我尝试形成 LINQ 查询,但老实说无法正确安排它。这是我的 linq 查询,需要添加更多内容才能成为完整代码。在这里,我正在粘贴我的工作linq查询,该查询很慢,但不会抛出错误。
void Main()
{
var csvlines = File.ReadAllLines(@"M:'smdr(backup08-06-2015).csv");
var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());
var users = csvLinesData.Select(data => new User
{
CSRName = data[12],
Incoming = csvLinesData.Count(w => w[4] == "I"
&& w[8] == "0"
&& w[10] == "0"
&& w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),
outgoing = csvLinesData.Count(w => w[4] == "O"
&& w[8] == "0"
&& w[10] == "0"
&& w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),
})
.Where(u => !u.CSRName.Contains("VM") && u.CSRName != "Voice Mail").OrderBy(u=> u.CSRName)
.ToList();
users.Dump();
}
class User
{
public string CSRName;
public int outgoing;
public int Incoming;
}
因此,我请求所有 LINQ 专家查看我的 C# 代码并帮助我编写完整的 linq 查询,该查询无需循环迭代即可带来相同的输出。
这是我的 CSV 文件 URL http://s000.tinyupload.com/?file_id=22225722172003614028
任何人都可以从中下载。 只需点击测试.csv文件链接,如果有人感兴趣。 谢谢
我个人并不认为 Linq 会比一个格式良好的 for 循环快得多。(除非您正在利用PLinq)我认为更大的问题是您的数据模型可能需要不同的设计。我要做的是创建一个仅表示行及其重要详细信息的新类。
class CallInformation {
public DateTime CallStart { get; private set; }
public Boolean IsOutGoing {get; private set; }
public String CSRName {get; private set; }
public int InComingCount { get; set; }
public int OutgoingCount { get; set; }
public CallInformation(String[] parts) {
IsOutGoing = parts[4] == "O";
CallStart = DateTime.Parse(parts[0]);
CSRName = parts[12];
}
//... Continue with the important properties
}
现在 IO 流也可以提高性能,但为了清楚起见,我将排除它。在进行排序和组合项目时通常比搜索要快得多。如果可以避免繁重的字符串比较/创建,则通常可以大大提高性能。
从你的这一部分
Dictionary<String, CallInformation> callDictionary = new Dictionary<String,CallInformation>();
var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());
foreach(string[] parts in csvLinesData) {
//Then place this call into a sortedlist or Dictionary.
//Here i am counting up the incoming and outgoing calls.
if(callDictionary.containsKey(parts[12])) {
if(parts[4] == "I") {
callDictionary[parts[12]].InComingCount++;
} else {
callDictionary[parts[12]].OutGoingCount++;
}
} else {
//Construct your new object based on this row.
CallInformation call = new CallInformation(parts);
callDictionary.add(call.CSRName, call);
}
}
排序列表可以根据"开始时间"或其他字段进行排序。 控制您的输出。我个人很难理解您正在分组和处理的确切方法。我个人认为,您的表现是基于这样一个事实,即您正在查询数据集以获取永远不会存在的信息。但是请让我知道您如何对信息进行分组。我认为这更多的是您将如何对信息进行分组。
如果您尝试根据调用的持续时间来设置调用,那么您可以使用 DateTime 部分来确定将其放入字典中的哪个"存储桶"中,而不是我上面所做的 CSR 名称(通过调整call.CSRName,
和containsKey[12]
),您可以创建持续时间字符串。通过使用日期时间对象,您可以基于时差而不是字符串比较进行比较。
若要从此集合中提取数据,实际上需要使用 Linq 来执行该
像这样的查询
callDictionary.Where((keyValuePair) => { return keyValuePair.Value.CSRName != "VM" && keyValuePair.Value.CSRName != "VoiceMail"; }).OrderBy(o.Value.StartTime);
OrderBy 子句实际上取决于您的外观 由于我们已经"反转"了事物,因此我们可以按开始时间排序,为了获得更精细的控制,您可能希望StartTime
是 TimeDuration 的开始。 任何 1 个特定项目,它将大致正确。您还可以添加另一个 OrderBy 以按CSRName
对其进行排序。
类似于下面的代码片段:
private static IEnumerable<MyData> Parse(String csvFile, bool hasHeaderLine = false, char separator = ',') {
return Read(csvFile)
.Skip(hasHeaderLine ? 1 : 0)
.Select(line => line.Split(separator, StringSplitOptions.None))
.Select(parts => new MyData(
parts[0],
DateTime.Parse(parts[1]),
TimeSpan.Parse(parts[2]),
uint.Parse(parts[3]),
uint.Parse(parts[4]),
uint.Parse(parts[5]),
uint.Parse(parts[6])
));
}
private static IEnumerable<String> Read(String csvFile) {
using (var reader = new StreamReader(csvFile)) {
String line;
while ((line = reader.ReadLine()) != null)
yield return line;
}
}
private class MyData {
private readonly string name;
private readonly DateTime startTime;
private readonly TimeSpan duration;
private readonly uint incomingCalls, outgoingCalls, callTransfers, missedCalls;
public MyData(string name, DateTime startTime, TimeSpan duration, uint incomingCalls, uint outgoingCalls, uint callTransfers, uint missedCalls) {
this.name = name;
this.startTime = startTime;
this.duration = duration;
this.incomingCalls = incomingCalls;
this.outgoingCalls = outgoingCalls;
this.callTransfers = callTransfers;
this.missedCalls = missedCalls;
}
public string Name => this.name;
public DateTime StartTime => this.startTime;
public TimeSpan Duration => this.duration;
public uint IncomingCalls => this.incomingCalls;
public uint OutgoingCalls => this.outgoingCalls;
public uint CallTransfers => this.callTransfers;
public uint MissedCalls => this.missedCalls;
public DateTime EndTime => this.startTime + this.duration;
}