使LINQ到一个字典变成一个方法体
本文关键字:一个 方法 字典 LINQ | 更新日期: 2023-09-27 17:49:37
我使用两个几乎相同的(唯一的例外是一个得到.ToDictinct()
,而另一个没有)调用。是否有可能将它们变成一个方法,我可以在一个地方调用和更改?
private void Splitter1(string[] file)
{
tempDict = file
.SelectMany(i => File.ReadAllLines(i)
.SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '{', '[', '(', '}', ']', ')',
'<', '>', '-', '=', '/', '"', ';', ':', '+', '_', '*' }, StringSplitOptions.RemoveEmptyEntries))
.AsParallel()
.Select(word => word.ToLower())
.Where(word => !StopWords.Contains(word))
.Where(word => !PopulatNetworkWords.Contains(word))
.Where(word => !word.All(char.IsDigit))
.Distinct())
.GroupBy(word => word)
.ToDictionary(g => g.Key, g => g.Count());
}
private void Splitter2(string[] file)
{
tempDict = file
.SelectMany(i => File.ReadAllLines(i)
.SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '{', '[', '(', '}', ']', ')',
'<', '>', '-', '=', '/', '"', ';', ':', '+', '_', '*' }, StringSplitOptions.RemoveEmptyEntries)))
.AsParallel()
.Select(word => word.ToLower())
.Where(word => !StopWords.Contains(word))
.Where(word => !PopulatNetworkWords.Contains(word))
.Where(word => !word.All(char.IsDigit))
.GroupBy(word => word)
.ToDictionary(g => g.Key, g => g.Count());
}
由于两者之间的区别在于Distinct()
是否被调用,并且由于Distinct()
既作用于并返回IEnumerable<T>
(或作用于并返回IQueryable<T>
)。然后首先创建相应的IEnumerable<T>
,然后决定是否用调用Distinct()
的结果替换它,然后继续:
private void Splitter(string[] file, bool distinct)
{
IEnumerable<string> query = file
.SelectMany(i => File.ReadAllLines(i)
.SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '{', '[', '(', '}', ']', ')',
'<', '>', '-', '=', '/', '"', ';', ':', '+', '_', '*' }, StringSplitOptions.RemoveEmptyEntries))
.AsParallel()
.Select(word => word.ToLower())
.Where(word => !StopWords.Contains(word))
.Where(word => !PopulatNetworkWords.Contains(word))
.Where(word => !word.All(char.IsDigit));
if(distinct)
query = query.Distinct());
return query
.GroupBy(word => word)
.ToDictionary(g => g.Key, g => g.Count());
}
(顺便说一下,您可能会发现较新的ReadLines
比ReadAllLines
工作得更好,特别是对于大文件。ReadAllLines
立即将所有行读入内存,而不是在使用它们时读取它们,因此它浪费了大量内存并延迟了处理)。
为什么不这样呢:
private void Splitter1(string[] file, bool useDistinct = false))
{
tempDict = file
.SelectMany(i => File.ReadAllLines(i)
.SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '{', '[', '(', '}', ']', ')',
'<', '>', '-', '=', '/', '"', ';', ':', '+', '_', '*' }, StringSplitOptions.RemoveEmptyEntries))
.AsParallel()
.Select(word => word.ToLower())
.Where(word => !StopWords.Contains(word))
.Where(word => !PopulatNetworkWords.Contains(word))
.Where(word => !word.All(char.IsDigit))
.Select(x => useDistinct ? x.Distinct() : x)
.GroupBy(word => word)
.ToDictionary(g => g.Key, g => g.Count());
}
由于Linq延迟执行,您可以在单独的语句中构建子句。
private void Splitter1(string[] file, bool distinct)
{
var query = file.SelectMany (i => File.ReadAllLines(i)
.SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '{', '[', '(', '}', ']', ')',
'<', '>', '-', '=', '/', '"', ';', ':', '+', '_', '*' }, StringSplitOptions.RemoveEmptyEntries))
.AsParallel()
.Select(word => word.ToLower())
.Where(word => !StopWords.Contains(word))
.Where(word => !PopulatNetworkWords.Contains(word))
.Where(word => !word.All(char.IsDigit)));
if (distinct)
{
query = query.Distinct();
}
query.GroupBy(word => word)
.ToDictionary(g => g.Key, g => g.Count());
}
我没有测试这段代码,所以你需要调整它。但是,其基本思想是延迟执行允许您根据逻辑更改查询。