最快/最安全的文件查找/解析
本文关键字:查找 解析 文件 安全 最快 | 更新日期: 2023-09-27 18:28:09
在c:
上,我有数以万计的*.foobar
文件。它们分布在各种各样的地方(即分区)。这些文件的大小大约为1-64kb,并且是纯文本。
我有一个class Foobar(string fileContents)
,它强烈地键入这些.foobar
文件。
我的挑战是获得c:
上所有*.foobar
文件的列表,表示为Foobar
对象的数组。做这件事最快的方法是什么?
我很想知道是否有比我的第一种方法更好的方法(毫无疑问),以及我的这种方法是否有任何潜在的问题(例如,I/O并发问题引发异常?):
var files = Directory.EnumerateFiles
(rootPath, "*.foobar", SearchOption.AllDirectories);
Foobar[] foobars =
(
from filePath in files.AsParallel()
let contents = File.ReadAllText(filePath)
select new Foobar(contents)
)
.ToArray();
因为权限错误(或其他错误)显然会使枚举停止,所以您可能需要实现自己的枚举器,如下所示:
class SafeFileEnumerator : IEnumerable<string>
{
private string root;
private string pattern;
private IList<Exception> errors;
public SafeFileEnumerator(string root, string pattern)
{
this.root = root;
this.pattern = pattern;
this.errors = new List<Exception>();
}
public SafeFileEnumerator(string root, string pattern, IList<Exception> errors)
{
this.root = root;
this.pattern = pattern;
this.errors = errors;
}
public Exception[] Errors()
{
return errors.ToArray();
}
class Enumerator : IEnumerator<string>
{
IEnumerator<string> fileEnumerator;
IEnumerator<string> directoryEnumerator;
string root;
string pattern;
IList<Exception> errors;
public Enumerator(string root, string pattern, IList<Exception> errors)
{
this.root = root;
this.pattern = pattern;
this.errors = errors;
fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
}
public string Current
{
get
{
if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
return fileEnumerator.Current;
}
}
public void Dispose()
{
if (fileEnumerator != null)
fileEnumerator.Dispose();
fileEnumerator = null;
if (directoryEnumerator != null)
directoryEnumerator.Dispose();
directoryEnumerator = null;
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
return true;
while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
{
if (fileEnumerator != null)
fileEnumerator.Dispose();
try
{
fileEnumerator = new SafeFileEnumerator(directoryEnumerator.Current, pattern, errors).GetEnumerator();
}
catch (Exception ex)
{
errors.Add(ex);
continue;
}
if (fileEnumerator.MoveNext())
return true;
}
if (fileEnumerator != null)
fileEnumerator.Dispose();
fileEnumerator = null;
if (directoryEnumerator != null)
directoryEnumerator.Dispose();
directoryEnumerator = null;
return false;
}
public void Reset()
{
Dispose();
fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
}
}
public IEnumerator<string> GetEnumerator()
{
return new Enumerator(root, pattern, errors);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
做得很好,这里有一个代码扩展,可以返回FileSystemInfo而不是字符串路径。行中的一些小更改,如添加SearchOption(就像原生的.net一样),以及在根文件夹被拒绝访问的情况下在初始目录get上捕获错误。再次感谢您的原创帖子!
public class SafeFileEnumerator : IEnumerable<FileSystemInfo>
{
/// <summary>
/// Starting directory to search from
/// </summary>
private DirectoryInfo root;
/// <summary>
/// Filter pattern
/// </summary>
private string pattern;
/// <summary>
/// Indicator if search is recursive or not
/// </summary>
private SearchOption searchOption;
/// <summary>
/// Any errors captured
/// </summary>
private IList<Exception> errors;
/// <summary>
/// Create an Enumerator that will scan the file system, skipping directories where access is denied
/// </summary>
/// <param name="root">Starting Directory</param>
/// <param name="pattern">Filter pattern</param>
/// <param name="option">Recursive or not</param>
public SafeFileEnumerator(string root, string pattern, SearchOption option)
: this(new DirectoryInfo(root), pattern, option)
{}
/// <summary>
/// Create an Enumerator that will scan the file system, skipping directories where access is denied
/// </summary>
/// <param name="root">Starting Directory</param>
/// <param name="pattern">Filter pattern</param>
/// <param name="option">Recursive or not</param>
public SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option)
: this(root, pattern, option, new List<Exception>())
{}
// Internal constructor for recursive itterator
private SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
{
if (root == null || !root.Exists)
{
throw new ArgumentException("Root directory is not set or does not exist.", "root");
}
this.root = root;
this.searchOption = option;
this.pattern = String.IsNullOrEmpty(pattern)
? "*"
: pattern;
this.errors = errors;
}
/// <summary>
/// Errors captured while parsing the file system.
/// </summary>
public Exception[] Errors
{
get
{
return errors.ToArray();
}
}
/// <summary>
/// Helper class to enumerate the file system.
/// </summary>
private class Enumerator : IEnumerator<FileSystemInfo>
{
// Core enumerator that we will be walking though
private IEnumerator<FileSystemInfo> fileEnumerator;
// Directory enumerator to capture access errors
private IEnumerator<DirectoryInfo> directoryEnumerator;
private DirectoryInfo root;
private string pattern;
private SearchOption searchOption;
private IList<Exception> errors;
public Enumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
{
this.root = root;
this.pattern = pattern;
this.errors = errors;
this.searchOption = option;
Reset();
}
/// <summary>
/// Current item the primary itterator is pointing to
/// </summary>
public FileSystemInfo Current
{
get
{
//if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
return fileEnumerator.Current as FileSystemInfo;
}
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public void Dispose()
{
Dispose(true, true);
}
private void Dispose(bool file, bool dir)
{
if (file)
{
if (fileEnumerator != null)
fileEnumerator.Dispose();
fileEnumerator = null;
}
if (dir)
{
if (directoryEnumerator != null)
directoryEnumerator.Dispose();
directoryEnumerator = null;
}
}
public bool MoveNext()
{
// Enumerate the files in the current folder
if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
return true;
// Don't go recursive...
if (searchOption == SearchOption.TopDirectoryOnly) { return false; }
while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
{
Dispose(true, false);
try
{
fileEnumerator = new SafeFileEnumerator(
directoryEnumerator.Current,
pattern,
SearchOption.AllDirectories,
errors
).GetEnumerator();
}
catch (Exception ex)
{
errors.Add(ex);
continue;
}
// Open up the current folder file enumerator
if (fileEnumerator.MoveNext())
return true;
}
Dispose(true, true);
return false;
}
public void Reset()
{
Dispose(true,true);
// Safely get the enumerators, including in the case where the root is not accessable
if (root != null)
{
try
{
fileEnumerator = root.GetFileSystemInfos(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<FileSystemInfo>().GetEnumerator();
}
catch (Exception ex)
{
errors.Add(ex);
fileEnumerator = null;
}
try
{
directoryEnumerator = root.GetDirectories(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<DirectoryInfo>().GetEnumerator();
}
catch (Exception ex)
{
errors.Add(ex);
directoryEnumerator = null;
}
}
}
}
public IEnumerator<FileSystemInfo> GetEnumerator()
{
return new Enumerator(root, pattern, searchOption, errors);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}