从WCF生成并插入大量数据到MS SQL中
本文关键字:数据 MS SQL WCF 插入 | 更新日期: 2023-09-27 18:11:12
我有一个任务,生成并插入大约1'000'000行到2个表。困难在于我必须插入现有的外键,所以我首先从其他表中获取数据……然后我尝试生成新的数据表与1000000行粘贴它与SqlBulk…但是这几代花费的时间太长了(大约每分钟4000行!)这是代码,也许有人可以帮助优化它…
public void GenerateData(DateTime fromDate, DateTime toDate, int Size)
{
DataTable theDatetime = new DataTable();
theDatetime.Columns.Add("Datetime", new DateTime().GetType());
theDatetime.Columns.Add("Date", new DateTime().GetType());
theDatetime.Columns.Add("Hour", new Int32().GetType());
DataTable theOverspeed = new DataTable();
theOverspeed.Columns.Add("CarNumber", new Int32().ToString().GetType());
theOverspeed.Columns.Add("Datetime", new DateTime().GetType());
theOverspeed.Columns.Add("DistrictCode", new Int32().GetType());
theOverspeed.Columns.Add("MarkCode", new Int32().GetType());
theOverspeed.Columns.Add("OwnerCode", new Int32().GetType());
theOverspeed.Columns.Add("RecognitorNumber", new Int32().GetType());
theOverspeed.Columns.Add("Region", new Int32().GetType());
theOverspeed.Columns.Add("RouteCode", new Int32().GetType());
theOverspeed.Columns.Add("Overspeed", new Int32().GetType());
theOverspeed.Columns.Add("Speed", new Int32().GetType());
Dictionary<DateTime, DateTime> Dates = new Dictionary<DateTime, DateTime>();
Random TheGenerator = new Random();
DataTable theCars = getCars();
DataTable theRecognitors = getRecognitors();
int[] sizes = new int[5];
for(int step = 0; step < 5; ++step)
{
DateTime Current1 = GetFifthDate(fromDate, toDate)[step];
DateTime Current2 = GetFifthDate(fromDate, toDate)[step+1];
int CurrentCount = GetFifthCount(Size)[step];
for (int i = 0; i < CurrentCount; ++i)
{
var aDate = GetRandomDate(Current1, Current2);
if (!Dates.ContainsKey(aDate))
{
InsertFact(theCars, aDate, theRecognitors,
theDatetime, theOverspeed);
Dates.Add(aDate, aDate);
}
else
{
i--;
}
}
}
和InsertFact方法
private void InsertFact(DataTable theCars,
DateTime theDate,
DataTable theRecognitor, DataTable theDatetime, DataTable theOverspeed)
{
Random theGenerator = new Random(DateTime.Now.Millisecond);
DataRow rowDate = theDatetime.NewRow();
rowDate["DateTime"] = theDate;
rowDate["Date"] = theDate.Date;
rowDate["Hour"] = theDate.Hour;
theDatetime.Rows.Add(rowDate);
int car = theGenerator.Next(theCars.Rows.Count);
int recognitor = theGenerator.Next(theRecognitor.Rows.Count);
DataRow rowOverspeed = theOverspeed.NewRow();
rowOverspeed["CarNumber"] = theCars.Rows[car][4];
rowOverspeed["Datetime"] = theDate;
rowOverspeed["DistrictCode"] = theRecognitor.Rows[recognitor][3];
rowOverspeed["MarkCode"] = theCars.Rows[car][3];
rowOverspeed["OwnerCode"] = theCars.Rows[car][2];
rowOverspeed["RecognitorNumber"] = theRecognitor.Rows[recognitor][2];
rowOverspeed["Region"] = theCars.Rows[car][1];
rowOverspeed["RouteCode"] = theRecognitor.Rows[recognitor][1];
rowOverspeed["Overspeed"] = theGenerator.Next(10, 40);
rowOverspeed["Speed"] = theGenerator.Next(100) > 40 ? 70 : 90;
theOverspeed.Rows.Add(rowOverspeed);}
GetRecognitors
private DataTable getRecognitors()
{
DataTable Result = new DataTable("Recognitors");
using (SqlConnection theConnection = new SqlConnection(str))
{
theConnection.Open();
SqlDataAdapter theAdapter = new SqlDataAdapter();
SqlCommand theCommand = new SqlCommand("Select * from Recognitor", theConnection);
theCommand.CommandType = System.Data.CommandType.Text;
theCommand.CommandTimeout = 0;
theAdapter.SelectCommand = theCommand;
theAdapter.Fill(Result);
}
return Result;
}
GetCars
private DataTable getCars()
{
DataTable Result = new DataTable("Cars");
using (SqlConnection theConnection = new SqlConnection(str))
{
theConnection.Open();
SqlDataAdapter theAdapter = new SqlDataAdapter();
SqlCommand theCommand = new SqlCommand("Select * from Car", theConnection);
theCommand.CommandType = System.Data.CommandType.Text;
theCommand.CommandTimeout = 0;
theAdapter.SelectCommand = theCommand;
theAdapter.Fill(Result);
}
return Result;
}
在我看来,您的代码有太多的空闲传递,并且它们的数量在数据生成期间增加。看看这两行:
for (int i = 0; i < CurrentCount; ++i)
{
var aDate = GetRandomDate(Current1, Current2);
if (!Dates.ContainsKey(aDate))
{
// insert
}
else
{
i--;
}
}
生成的数据越多,条件Dates.ContainsKey
求值为真的次数越多。由于生成的数字呈正态分布,生成唯一数字所需的时间呈非线性增长。
你绝对应该修改用于生成日期的方法。
注:Dictionary<DateTime, DateTime> Dates
=> HashSet<DateTime>
.