LINQ 联接 HashSet 中的实体,Join vs Dictionary vs HashSet 性能
本文关键字:vs HashSet Join 性能 Dictionary 实体 联接 LINQ | 更新日期: 2023-09-27 17:57:02
我有HashSet,每个存储T,我编写了一个测试应用程序来比较我能想到的不同关系算法,但是我对我得到的结果并不满意。
是否存在更有效的实现对象关系的方法,然后我正在测试的一次?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
namespace LINQTests
{
class Program
{
static void Main(string[] args)
{
HashSet<User> UserTable = new HashSet<User>();
HashSet<UserProperty> UserPropertyTable = new HashSet<UserProperty>();
#region Lets create some dummy data.
Console.WriteLine("Please wait while creating dummy data, this can take a while...");
Console.WriteLine("");
int rows = 1000000;
for(int x = 0; x < rows; x++)
{
Random rnd = new Random();
// Add new user.
User user = new User(string.Format("Joachim-{0}", x));
if(!UserTable.Add(user))
{
// throw new Exception.
}
else
{
UserProperty age = new UserProperty(user, "Age", rnd.Next(25, 30).ToString());
if(!UserPropertyTable.Add(age))
{
// throw new Exception.
}
UserProperty sex = new UserProperty(user, "Sex", "Male");
if (!UserPropertyTable.Add(sex))
{
// throw new Exception.
}
UserProperty location = new UserProperty(user, "Location", "Norway");
if (!UserPropertyTable.Add(location))
{
// throw new Exception.
}
}
}
#endregion
#region Lets do some query tests.
IEnumerable<User> Users;
Stopwatch stopwatch = new Stopwatch();
int matches = 0;
// Lets find all users who are of age 29.
Console.WriteLine("Finding all users who are of age 29");
Console.WriteLine("");
Console.WriteLine("---------------------------------------------------");
Console.WriteLine("{0,-20} | {1,6} | {2,9}", "Search Strategy", "Found", "Time");
Console.WriteLine("---------------------------------------------------");
// Join test.
stopwatch.Start();
Users = (from user in UserTable
join property in UserPropertyTable on user.Id equals property.UserId
where property.Key == "Age" && property.Value == "29"
select user);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Joining Tables", matches, stopwatch.ElapsedMilliseconds);
// Dictionary test.
stopwatch.Restart();
var DictionarySearch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t).ToDictionary(x => x.UserId);
Users = (from t in UserTable where DictionarySearch.ContainsKey(t.Id) select t);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Dictionary Contain", matches, stopwatch.ElapsedMilliseconds);
// HashSet test.
stopwatch.Restart();
var HashsetSearch = new HashSet<Guid>(from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId);
Users = (from t in UserTable where HashsetSearch.Contains(t.Id) select t);
matches = Users.Count();
stopwatch.Stop();
Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "HashSet Contain", matches, stopwatch.ElapsedMilliseconds);
// Following takes so long that we wont run them!
//// Array test.
//stopwatch.Restart();
//var ArrayMatch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId).ToArray();
//Users = (from t in UserTable where ArrayMatch.Contains(t.Id) select t);
//matches = Users.Count();
//stopwatch.Stop();
//Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "Array Contain", matches, stopwatch.ElapsedMilliseconds);
//// List test.
//stopwatch.Restart();
//var ListMatch = (from t in UserPropertyTable where t.Key == "Age" && t.Value == "29" select t.UserId).ToList();
//Users = (from t in UserTable where ListMatch.Contains(t.Id) select t);
//matches = Users.Count();
//stopwatch.Stop();
//Console.WriteLine("{0,-20} | {1,6} | {2,6} ms.", "List Contain", matches, stopwatch.ElapsedMilliseconds);
Console.WriteLine("---------------------------------------------------");
#endregion
Console.WriteLine("");
Console.WriteLine("Hit return to exit...");
Console.Read();
}
}
public class User
{
public User(string UserName)
{
this.Id = Guid.NewGuid();
this.UserName = UserName;
}
public Guid Id { get; set; }
public string UserName { get; set; }
public override bool Equals(object obj)
{
User other = obj as User;
if (other == null)
return false;
return this.Id == other.Id;
}
public override int GetHashCode()
{
return Id.GetHashCode();
}
}
public class UserProperty
{
public UserProperty(User user, string key, string value)
{
this.Id = Guid.NewGuid();
this.UserId = user.Id;
this.Key = key;
this.Value = value;
}
public Guid Id { get; private set; }
public Guid UserId {get; private set;}
public string Key { get; set; }
public string Value { get; set; }
public override bool Equals(object obj)
{
UserProperty other = obj as UserProperty;
if (other == null)
return false;
return this.UserId == other.UserId && this.Key == other.Key;
}
public override int GetHashCode()
{
return string.Format("{0}-{1}", this.UserId, this.Key).GetHashCode();
}
}
}
这将使 linq/join 与其他方法相当:
var properties = UserPropertyTable
.Where(p=>p.Key == "Age" && p.Value == "29")
.ToArray();
Users = (from user in UserTable
join property in properties
on user.Id equals property.UserId
select user);
这是我能达到的最快(~2X):
var filteredUserIds = new HashSet<Guid>(
UserPropertyTable
.Where(p=>p.Key == "Age" && p.Value == "29")
.Select(p=>p.UserId));
Users = (from user in UserTable
where filteredUserIds.Contains(user.Id)
select user);
带输出
---------------------------------------------------搜索策略 | 找到 | 时间---------------------------------------------------我的方法 |210366 | 157 毫秒字典包含 |210366 | 325 毫秒哈希集包含 |210366 | 325 毫秒---------------------------------------------------
以下是您可以执行的一些操作:
- 对于添加的每个元素和正在探测的每个元素,都会调用 GetHashCode(哈希表如何比较两个哈希代码?每个探测器至少调用一次等于。优化这些。不要使用字符串。格式(!它需要解析格式字符串并分配内存。
UserId.GHC() * 37 ^ Name.GHC()
应该做的。 - 不要使用 LINQ。编写手动循环。LINQ 对正在处理的每个项目有多个虚拟调用或委托调用。这会增加指令,使CPU停止并防止内联。
- 你能预先计算数据结构吗?预先计算哈希表或排序列表。合并两个排序列表可能比使用哈希表更快。这不是框架内置的。要为合并联接编写大量困难的自定义代码。
- 为什么要在测量中包含
property.Key == "Age" && property.Value == "29"
谓词?是否需要为每次执行运行?您可以尝试通过使用整数值而不是字符串来优化它。也许您可以预先计算UserPropertyTable
的索引,以便您可以在恒定时间内获得匹配的元素。
这应该会给你 1-10 倍的加速,具体取决于你实现的程度。如果你需要比这更快,我需要先问一些问题。通常,您可以找到仅适用于特定情况的专用技巧。