在插入sql数据库时处理大量数据
本文关键字:数据 处理 插入 sql 数据库 | 更新日期: 2023-09-27 18:16:39
在我的代码中,用户可以上传一个excel文档,希望包含它的电话联系人列表。我作为一个开发人员应该阅读excel文件,把它变成一个数据表,并将其插入数据库。问题是,一些客户端有大量的联系人,比如说5000或更多的联系人,当我试图将这些数据量插入数据库时,它会崩溃并给我一个超时异常。避免这种异常的最好方法是什么?是否有代码可以减少插入语句的时间,这样用户就不会等待太长时间?
代码
public SqlConnection connection = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString);
public void Insert(string InsertQuery)
{
SqlDataAdapter adp = new SqlDataAdapter();
adp.InsertCommand = new SqlCommand(InsertQuery, connection);
if (connection.State == System.Data.ConnectionState.Closed)
{
connection.Open();
}
adp.InsertCommand.ExecuteNonQuery();
connection.Close();
}
protected void submit_Click(object sender, EventArgs e)
{
string UploadFolder = "Savedfiles/";
if (Upload.HasFile) {
string fileName = Upload.PostedFile.FileName;
string path=Server.MapPath(UploadFolder+fileName);
Upload.SaveAs(path);
Msg.Text = "successfully uploaded";
DataTable ValuesDt = new DataTable();
ValuesDt = ConvertExcelFileToDataTable(path);
Session["valuesdt"] = ValuesDt;
Excel_grd.DataSource = ValuesDt;
Excel_grd.DataBind();
}
}
protected void SendToServer_Click(object sender, EventArgs e)
{
DataTable Values = Session["valuesdt"] as DataTable ;
if(Values.Rows.Count>0)
{
DataTable dv = Values.DefaultView.ToTable(true, "Mobile1", "Mobile2", "Tel", "Category");
double Mobile1,Mobile2,Tel;string Category="";
for (int i = 0; i < Values.Rows.Count; i++)
{
Mobile1 =Values.Rows[i]["Mobile1"].ToString()==""?0: double.Parse(Values.Rows[i]["Mobile1"].ToString());
Mobile2 = Values.Rows[i]["Mobile2"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Mobile2"].ToString());
Tel = Values.Rows[i]["Tel"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Tel"].ToString());
Category = Values.Rows[i]["Category"].ToString();
Insert("INSERT INTO client(Mobile1,Mobile2,Tel,Category) VALUES(" + Mobile1 + "," + Mobile2 + "," + Tel + ",'" + Category + "')");
Msg.Text = "Submitied successfully to the server ";
}
}
}
您可以尝试SqlBulkCopy
插入Datatable到数据库表
像这样,
using (SqlBulkCopy bulkCopy = new SqlBulkCopy(sqlConnection, SqlBulkCopyOptions.KeepIdentity))
{
bulkCopy.DestinationTableName = DestTableName;
string[] DtColumnName = YourDataTableColumns;
foreach (string dbcol in DbColumnName)//To map Column of Datatable to that of DataBase tabele
{
foreach (string dtcol in DtColumnName)
{
if (dbcol.ToLower() == dtcol.ToLower())
{
SqlBulkCopyColumnMapping mapID = new SqlBulkCopyColumnMapping(dtcol, dbcol);
bulkCopy.ColumnMappings.Add(mapID);
break;
}
}
}
bulkCopy.WriteToServer(YourDataTableName.CreateDataReader());
bulkCopy.Close();
}
更多信息请阅读http://msdn.microsoft.com/en-us/library/system.data.sqlclient.sqlbulkcopy.aspx
每次插入1行,对于这样的数据量来说,这是非常昂贵的
在这些情况下,你应该使用大容量插入,所以到DB的往返只需要一次,如果你需要回滚——所有的都是相同的事务
您可以使用SqlBulkCopy,这是更多的工作,或者您可以使用SqlAdpater的批量更新功能。与创建自己的插入语句,然后构建一个sqldatadpater,然后手动执行它不同,创建一个数据集,填充它,创建一个sqldataadpater,设置批处理中插入的数量,然后执行适配器一次。
我可以重复这段代码,但本文将展示如何做到这一点:http://msdn.microsoft.com/en-us/library/kbbwt18a%28v=vs.80%29.aspx
protected void SendToServer_Click(object sender, EventArgs e)
{
DataTable Values = Session["valuesdt"] as DataTable ;
if(Values.Rows.Count>0)
{
DataTable dv = Values.DefaultView.ToTable(true, "Mobile1", "Mobile2", "Tel", "Category");
//Fix up default values
for (int i = 0; i < Values.Rows.Count; i++)
{
Values.Rows[i]["Mobile1"] =Values.Rows[i]["Mobile1"].ToString()==""?0: double.Parse(Values.Rows[i]["Mobile1"].ToString());
Values.Rows[i]["Mobile2"] = Values.Rows[i]["Mobile2"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Mobile2"].ToString());
Values.Rows[i]["Tel"] = Values.Rows[i]["Tel"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Tel"].ToString());
Values.Rows[i]["Category"] = Values.Rows[i]["Category"].ToString();
}
BatchUpdate(dv,1000);
}
}
public static void BatchUpdate(DataTable dataTable,Int32 batchSize)
{
// Assumes GetConnectionString() returns a valid connection string.
string connectionString = GetConnectionString();
// Connect to the database.
using (SqlConnection connection = new SqlConnection(connectionString))
{
// Create a SqlDataAdapter.
SqlDataAdapter adapter = new SqlDataAdapter();
// Set the INSERT command and parameter.
adapter.InsertCommand = new SqlCommand(
"INSERT INTO client(Mobile1,Mobile2,Tel,Category) VALUES(@Mobile1,@Mobile2,@Tel,@Category);", connection);
adapter.InsertCommand.Parameters.Add("@Mobile1",
SqlDbType.Float);
adapter.InsertCommand.Parameters.Add("@Mobile2",
SqlDbType.Float);
adapter.InsertCommand.Parameters.Add("@Tel",
SqlDbType.Float);
adapter.InsertCommand.Parameters.Add("@Category",
SqlDbType.NVarchar, 50);
adapter.InsertCommand.UpdatedRowSource = UpdateRowSource.None;
// Set the batch size.
adapter.UpdateBatchSize = batchSize;
// Execute the update.
adapter.Update(dataTable);
}
}
我知道这是一个超级老的帖子,但你不应该需要使用5000个插入的现有答案中解释的批量操作。您的性能会受到很大影响,因为您在每次插入行时关闭并重新打开连接。以下是我过去使用的一些代码,它保持一个连接打开并执行尽可能多的命令,以将所有数据推送到DB:
public static class DataWorker
{
public static Func<IEnumerable<T>, Task> GetStoredProcedureWorker<T>(Func<SqlConnection> connectionSource, string storedProcedureName, Func<T, IEnumerable<(string paramName, object paramValue)>> parameterizer)
{
if (connectionSource is null) throw new ArgumentNullException(nameof(connectionSource));
SqlConnection openConnection()
{
var conn = connectionSource() ?? throw new ArgumentNullException(nameof(connectionSource), $"Connection from {nameof(connectionSource)} cannot be null");
var connState = conn.State;
if (connState != ConnectionState.Open)
{
conn.Open();
}
return conn;
}
async Task DoStoredProcedureWork(IEnumerable<T> workData)
{
using (var connection = openConnection())
using (var command = connection.CreateCommand())
{
command.CommandType = CommandType.StoredProcedure;
command.CommandText = storedProcedureName;
command.Prepare();
foreach (var thing in workData)
{
command.Parameters.Clear();
foreach (var (paramName, paramValue) in parameterizer(thing))
{
command.Parameters.AddWithValue(paramName, paramValue ?? DBNull.Value);
}
await command.ExecuteNonQueryAsync().ConfigureAwait(false);
}
}
}
return DoStoredProcedureWork;
}
}
这实际上是来自一个项目,我正在收集限制列表的电子邮件,所以parameterizer
参数可能是什么样子的相关示例以及如何使用上面的代码:
IEnumerable<(string,object)> RestrictionToParameter(EmailRestriction emailRestriction)
{
yield return ("@emailAddress", emailRestriction.Email);
yield return ("@reason", emailRestriction.Reason);
yield return ("@restrictionType", emailRestriction.RestrictionType);
yield return ("@dateTime", emailRestriction.Date);
}
var worker = DataWorker.GetStoredProcedureWorker<EmailRestriction>(ConnectionFactory, @"[emaildata].[AddRestrictedEmail]", RestrictionToParameter);
await worker(emailRestrictions).ConfigureAwait(false);