创建XLSX文件对于100000条记录需要更多的时间,并且占用最大的RAM内存
本文关键字:时间 内存 RAM 文件 XLSX 100000条 记录 创建 | 更新日期: 2023-09-27 18:06:06
我正在创建带有自定义代码的XLSX文件,没有Open XML SDK。它可以很好地为200列的50,000条记录和最大13 GB RAM工作。
但是当我尝试100000行和200列,最大16 GB RAM,从未创建XLSX文件,并不断增加和减少RAM内存,也增加和减少CPU使用率。
我正在将100000行和200列写入流,并同时将流复制到打包部分流,而不拆分XML文件。这个XML文件的大小是3gb。
你能不能给出一个不使用Open XML SDK的解决方案?
当我尝试使用Open XML时,它可以为单个用户处理100000条记录和200列。但是,当为两个用户创建100000条200列的记录时,服务器挂起了。
我的自定义代码占用更多内存,但不挂起。
在下面的代码"CreateOpenXMLComWorkSheet_XMLWriter"方法占用更多的RAM大小。
我使用下面的代码供您参考。如果有什么需要修改的地方,请告诉我。
//Package method
Package package = null;
using (package = ZipPackage.Open(path, FileMode.Create))
{
packgPart = package.CreatePart(new Uri(relativePaths[relIndex], UriKind.Relative), contentTypes[6], CompressionOption.Maximum);
XmlWriter xmlWriter;
Stream stream = CreateOpenXMLComWorkSheet_XMLWriter(data, "", out xmlWriter);
CopyStream(stream, packgPart.GetStream());
xmlWriter.Flush();
xmlWriter.Close();
xmlWriter = null;
package.Flush();
packgPart = null;
stream.Close();
stream.Dispose();
stream = null;
relIndex++;
GC.Collect();
package.Close();
}
// CreateOpenXMLComWorkSheet method
// Define other methods and classes here
private static Stream CreateOpenXMLComWorkSheet_XMLWriter(List<StringBuilder> rows, string sheet,out XmlWriter xmlWriter)
{
string[] cols;
XmlWriterSettings xmlWriterSettings = new XmlWriterSettings();
xmlWriterSettings.NewLineHandling = NewLineHandling.None;
xmlWriterSettings.Indent = false;
xmlWriter = null;
MemoryStream stream = new MemoryStream();
string nameSpace = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
xmlWriter = XmlWriter.Create(stream,xmlWriterSettings);
xmlWriter.WriteStartElement("x","worksheet",nameSpace);
xmlWriter.WriteStartElement("x","sheetData",nameSpace);
for (m = 0; m < rows.Count; m++)
{
xmlWriter.WriteStartElement("x","row",nameSpace);
cols = rows[m].ToString().Split(new string[] { univDelimiter }, StringSplitOptions.None);
for (int i = 1; i <= cols.Length; i++)
{
cellValue = cols[i - 1];
if (double.TryParse(cellValue,out dVal))
{
dataType = "n";
}
else
{
dataType = "str";
}
xmlWriter.WriteStartElement("x","c",nameSpace);
xmlWriter.WriteAttributeString("s", "13");
xmlWriter.WriteAttributeString("t", dataType);
xmlWriter.WriteStartElement("x", "v",nameSpace);
xmlWriter.WriteValue(cellValue);
xmlWriter.WriteEndElement();
xmlWriter.WriteEndElement();
}
xmlWriter.WriteEndElement();
rows[m] = null;
}
xmlWriter.WriteEndElement();
xmlWriter.WriteEndElement();
xmlWriter.Flush();
stream.Position = 0;
return stream;
}
//CopyStream method
private static void CopyStream(Stream source, Stream target)
{
const int bufSize = 0x1000;
byte[] buf = new byte[bufSize];
int bytesRead = 0;
while ((bytesRead = source.Read(buf, 0, bufSize)) > 0)
target.Write(buf, 0, bytesRead);
}
看来你在写文件时采取了错误的方法,打开xml sdk是足够好的工具来创建大量数据的excel。我认为你需要采取类似sax的方法,它使用xmlreader和writer的组合,而不会耗尽内存。
看看这个符合你具体要求的精彩博客。
https://blogs.msdn.microsoft.com/brian_jones/2010/06/22/writing-large-excel-files-with-the-open-xml-sdk/为了减少内存压力,考虑不要在XmlWriter中使用MemoryStream。如果您使用基于磁盘的流,那么这将大大减少内存压力。
使用您在这里获得的流packgPart.GetStream()
作为xml写入器的后存储
我也感觉到你不需要在内存中加载整个CSV。
这是一个只使用流的版本
void Main()
{
string inputFile = "D:''_bigfile.csv";
string path = "D:''pack.zip";
Package package = null;
using (package = ZipPackage.Open(path, FileMode.Create))
{
var packgPart = package.CreatePart(new Uri("/test.xml", UriKind.Relative), System.Net.Mime.MediaTypeNames.Text.Xml, CompressionOption.Maximum);
using (var inputStream = File.OpenRead(inputFile))
{
CreateOpenXMLComWorkSheet_XMLWriter(inputStream, "", packgPart.GetStream());
}
}
}
private const string univDelimiter = "|";
private static void CreateOpenXMLComWorkSheet_XMLWriter(Stream inputStream, string sheet, Stream packagePartStream)
{
string cellValue = "";
string dataType = "";
double dVal = 0;
string[] cols;
XmlWriterSettings xmlWriterSettings = new XmlWriterSettings();
xmlWriterSettings.NewLineHandling = NewLineHandling.None;
xmlWriterSettings.Indent = false;
string nameSpace = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
using (var xmlWriter = XmlWriter.Create(packagePartStream, xmlWriterSettings))
{
xmlWriter.WriteStartElement("x","worksheet",nameSpace);
xmlWriter.WriteStartElement("x","sheetData",nameSpace);
using (var sr = new StreamReader(inputStream))
{
string line = null;
while ((line = sr.ReadLine()) != null)
{
xmlWriter.WriteStartElement("x","row",nameSpace);
cols = line.Split(new string[] { univDelimiter }, StringSplitOptions.None);
for (int i = 1; i <= cols.Length; i++)
{
cellValue = cols[i - 1];
if (double.TryParse(cellValue,out dVal))
{
dataType = "n";
}
else
{
dataType = "str";
}
xmlWriter.WriteStartElement("x","c",nameSpace);
xmlWriter.WriteAttributeString("s", "13");
xmlWriter.WriteAttributeString("t", dataType);
xmlWriter.WriteStartElement("x", "v",nameSpace);
xmlWriter.WriteValue(cellValue);
xmlWriter.WriteEndElement();
xmlWriter.WriteEndElement();
}
xmlWriter.WriteEndElement();
}
}
xmlWriter.WriteEndElement();
xmlWriter.WriteEndElement();
}
}