在插入sql数据库时处理大量数据

本文关键字:数据 处理 插入 sql 数据库 | 更新日期: 2023-09-27 18:16:39

在我的代码中,用户可以上传一个excel文档,希望包含它的电话联系人列表。我作为一个开发人员应该阅读excel文件,把它变成一个数据表,并将其插入数据库。问题是,一些客户端有大量的联系人,比如说5000或更多的联系人,当我试图将这些数据量插入数据库时,它会崩溃并给我一个超时异常。避免这种异常的最好方法是什么?是否有代码可以减少插入语句的时间,这样用户就不会等待太长时间?

代码

public SqlConnection connection = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString);
public void Insert(string InsertQuery)
{
    SqlDataAdapter adp = new SqlDataAdapter();
    adp.InsertCommand = new SqlCommand(InsertQuery, connection);
    if (connection.State == System.Data.ConnectionState.Closed)
    {
        connection.Open();
    }
    adp.InsertCommand.ExecuteNonQuery();
    connection.Close();
}
protected void submit_Click(object sender, EventArgs e) 
{
    string UploadFolder = "Savedfiles/";
    if (Upload.HasFile) {
        string fileName = Upload.PostedFile.FileName;
        string path=Server.MapPath(UploadFolder+fileName);
        Upload.SaveAs(path);
        Msg.Text = "successfully uploaded";
        DataTable ValuesDt = new DataTable();
        ValuesDt = ConvertExcelFileToDataTable(path);
        Session["valuesdt"] = ValuesDt;
        Excel_grd.DataSource = ValuesDt;
        Excel_grd.DataBind();

    }
}
protected void SendToServer_Click(object sender, EventArgs e)
{
    DataTable Values = Session["valuesdt"] as DataTable ;
    if(Values.Rows.Count>0)
    {
        DataTable dv = Values.DefaultView.ToTable(true, "Mobile1", "Mobile2", "Tel", "Category");
        double Mobile1,Mobile2,Tel;string Category="";
        for (int i = 0; i < Values.Rows.Count; i++)
       {
            Mobile1 =Values.Rows[i]["Mobile1"].ToString()==""?0: double.Parse(Values.Rows[i]["Mobile1"].ToString());
            Mobile2 = Values.Rows[i]["Mobile2"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Mobile2"].ToString());
            Tel = Values.Rows[i]["Tel"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Tel"].ToString());
           Category = Values.Rows[i]["Category"].ToString();
           Insert("INSERT INTO client(Mobile1,Mobile2,Tel,Category) VALUES(" + Mobile1 + "," + Mobile2 + "," + Tel + ",'" + Category + "')");
           Msg.Text = "Submitied successfully to the server ";
       }

    }
}

在插入sql数据库时处理大量数据

您可以尝试SqlBulkCopy插入Datatable到数据库表

像这样,

using (SqlBulkCopy bulkCopy = new SqlBulkCopy(sqlConnection, SqlBulkCopyOptions.KeepIdentity))
{
    bulkCopy.DestinationTableName = DestTableName;
    string[] DtColumnName = YourDataTableColumns;
    foreach (string dbcol in DbColumnName)//To map Column of Datatable to that of DataBase tabele
    {
        foreach (string dtcol in DtColumnName)
        {
            if (dbcol.ToLower() == dtcol.ToLower())
            {
                SqlBulkCopyColumnMapping mapID = new SqlBulkCopyColumnMapping(dtcol, dbcol);
                bulkCopy.ColumnMappings.Add(mapID);
                break;
            }
        }
    }
    bulkCopy.WriteToServer(YourDataTableName.CreateDataReader());
    bulkCopy.Close();
}

更多信息请阅读http://msdn.microsoft.com/en-us/library/system.data.sqlclient.sqlbulkcopy.aspx

每次插入1行,对于这样的数据量来说,这是非常昂贵的

在这些情况下,你应该使用大容量插入,所以到DB的往返只需要一次,如果你需要回滚——所有的都是相同的事务

您可以使用SqlBulkCopy,这是更多的工作,或者您可以使用SqlAdpater的批量更新功能。与创建自己的插入语句,然后构建一个sqldatadpater,然后手动执行它不同,创建一个数据集,填充它,创建一个sqldataadpater,设置批处理中插入的数量,然后执行适配器一次。

我可以重复这段代码,但本文将展示如何做到这一点:http://msdn.microsoft.com/en-us/library/kbbwt18a%28v=vs.80%29.aspx

protected void SendToServer_Click(object sender, EventArgs e)
{
    DataTable Values = Session["valuesdt"] as DataTable ;
    if(Values.Rows.Count>0)
    {
        DataTable dv = Values.DefaultView.ToTable(true, "Mobile1", "Mobile2", "Tel", "Category");
        //Fix up default values
        for (int i = 0; i < Values.Rows.Count; i++)
       {
            Values.Rows[i]["Mobile1"] =Values.Rows[i]["Mobile1"].ToString()==""?0: double.Parse(Values.Rows[i]["Mobile1"].ToString());
            Values.Rows[i]["Mobile2"] = Values.Rows[i]["Mobile2"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Mobile2"].ToString());
            Values.Rows[i]["Tel"] = Values.Rows[i]["Tel"].ToString() == "" ? 0 : double.Parse(Values.Rows[i]["Tel"].ToString());
           Values.Rows[i]["Category"] = Values.Rows[i]["Category"].ToString();
       }
       BatchUpdate(dv,1000);

    }
}
public static void BatchUpdate(DataTable dataTable,Int32 batchSize)
{
    // Assumes GetConnectionString() returns a valid connection string.
    string connectionString = GetConnectionString();
    // Connect to the database.
    using (SqlConnection connection = new SqlConnection(connectionString))
    {
        // Create a SqlDataAdapter.
        SqlDataAdapter adapter = new SqlDataAdapter();
        // Set the INSERT command and parameter.
        adapter.InsertCommand = new SqlCommand(
            "INSERT INTO client(Mobile1,Mobile2,Tel,Category) VALUES(@Mobile1,@Mobile2,@Tel,@Category);", connection);
        adapter.InsertCommand.Parameters.Add("@Mobile1", 
          SqlDbType.Float);
        adapter.InsertCommand.Parameters.Add("@Mobile2", 
          SqlDbType.Float);
        adapter.InsertCommand.Parameters.Add("@Tel", 
          SqlDbType.Float);
        adapter.InsertCommand.Parameters.Add("@Category", 
          SqlDbType.NVarchar, 50);
        adapter.InsertCommand.UpdatedRowSource = UpdateRowSource.None;
        // Set the batch size.
        adapter.UpdateBatchSize = batchSize;
        // Execute the update.
        adapter.Update(dataTable);
    }
}

我知道这是一个超级老的帖子,但你不应该需要使用5000个插入的现有答案中解释的批量操作。您的性能会受到很大影响,因为您在每次插入行时关闭并重新打开连接。以下是我过去使用的一些代码,它保持一个连接打开并执行尽可能多的命令,以将所有数据推送到DB:

public static class DataWorker
{
    public static Func<IEnumerable<T>, Task> GetStoredProcedureWorker<T>(Func<SqlConnection> connectionSource, string storedProcedureName, Func<T, IEnumerable<(string paramName, object paramValue)>> parameterizer)
    {
        if (connectionSource is null) throw new ArgumentNullException(nameof(connectionSource));
        SqlConnection openConnection()
        {
            var conn = connectionSource() ?? throw new ArgumentNullException(nameof(connectionSource), $"Connection from {nameof(connectionSource)} cannot be null");
            var connState = conn.State;
            if (connState != ConnectionState.Open)
            {
                conn.Open();
            }
            return conn;
        }
        async Task DoStoredProcedureWork(IEnumerable<T> workData)
        {
            using (var connection = openConnection())
            using (var command = connection.CreateCommand())
            {
                command.CommandType = CommandType.StoredProcedure;
                command.CommandText = storedProcedureName;
                command.Prepare();
                foreach (var thing in workData)
                {
                    command.Parameters.Clear();
                    foreach (var (paramName, paramValue) in parameterizer(thing))
                    {
                        command.Parameters.AddWithValue(paramName, paramValue ?? DBNull.Value);
                    }
                    await command.ExecuteNonQueryAsync().ConfigureAwait(false);
                }
            }
        }
        return DoStoredProcedureWork;
    }
}

这实际上是来自一个项目,我正在收集限制列表的电子邮件,所以parameterizer参数可能是什么样子的相关示例以及如何使用上面的代码:

        IEnumerable<(string,object)> RestrictionToParameter(EmailRestriction emailRestriction)
        {
            yield return ("@emailAddress", emailRestriction.Email);
            yield return ("@reason", emailRestriction.Reason);
            yield return ("@restrictionType", emailRestriction.RestrictionType);
            yield return ("@dateTime", emailRestriction.Date);
        }
        var worker = DataWorker.GetStoredProcedureWorker<EmailRestriction>(ConnectionFactory, @"[emaildata].[AddRestrictedEmail]", RestrictionToParameter);

        await worker(emailRestrictions).ConfigureAwait(false);