最快/最安全的文件查找/解析

本文关键字:查找 解析 文件 安全 最快 | 更新日期: 2023-09-27 18:28:09

c:上,我有数以万计的*.foobar文件。它们分布在各种各样的地方(即分区)。这些文件的大小大约为1-64kb,并且是纯文本。

我有一个class Foobar(string fileContents),它强烈地键入这些.foobar文件。

我的挑战是获得c:上所有*.foobar文件的列表,表示为Foobar对象的数组。做这件事最快的方法是什么?

我很想知道是否有比我的第一种方法更好的方法(毫无疑问),以及我的这种方法是否有任何潜在的问题(例如,I/O并发问题引发异常?):

var files = Directory.EnumerateFiles
                (rootPath, "*.foobar", SearchOption.AllDirectories);
Foobar[] foobars = 
(
    from filePath in files.AsParallel()
    let contents = File.ReadAllText(filePath)
    select new Foobar(contents)
)
.ToArray();

最快/最安全的文件查找/解析

因为权限错误(或其他错误)显然会使枚举停止,所以您可能需要实现自己的枚举器,如下所示:

class SafeFileEnumerator : IEnumerable<string>
{
  private string root;
  private string pattern;
  private IList<Exception> errors;
  public SafeFileEnumerator(string root, string pattern)
  {
     this.root = root;
     this.pattern = pattern;
     this.errors = new List<Exception>();
  }
  public SafeFileEnumerator(string root, string pattern, IList<Exception> errors)
  {
     this.root = root;
     this.pattern = pattern;
     this.errors = errors;
  }
  public Exception[] Errors()
  {
     return errors.ToArray();
  }
  class Enumerator : IEnumerator<string>
  {
     IEnumerator<string> fileEnumerator;
     IEnumerator<string> directoryEnumerator;
     string root;
     string pattern;
     IList<Exception> errors;
     public Enumerator(string root, string pattern, IList<Exception> errors)
     {
        this.root = root;
        this.pattern = pattern;
        this.errors = errors;
        fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
        directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
     }
     public string Current
     {
        get
        {
           if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
           return fileEnumerator.Current;
        }
     }
     public void Dispose()
     {
        if (fileEnumerator != null)
           fileEnumerator.Dispose();
        fileEnumerator = null;
        if (directoryEnumerator != null)
           directoryEnumerator.Dispose();
        directoryEnumerator = null;
     }
     object System.Collections.IEnumerator.Current
     {
        get { return Current; }
     }
     public bool MoveNext()
     {
        if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
           return true;
        while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
        {
           if (fileEnumerator != null)
              fileEnumerator.Dispose();
           try
           {
              fileEnumerator = new SafeFileEnumerator(directoryEnumerator.Current, pattern, errors).GetEnumerator();
           }
           catch (Exception ex)
           {
              errors.Add(ex);
              continue;
           }
           if (fileEnumerator.MoveNext())
              return true;
        }
        if (fileEnumerator != null)
           fileEnumerator.Dispose();
        fileEnumerator = null;
        if (directoryEnumerator != null)
           directoryEnumerator.Dispose();
        directoryEnumerator = null;
        return false;
     }
     public void Reset()
     {
        Dispose();
        fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
        directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
     }
  }
  public IEnumerator<string> GetEnumerator()
  {
     return new Enumerator(root, pattern, errors);
  }
  System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
  {
     return GetEnumerator();
  }
}

做得很好,这里有一个代码扩展,可以返回FileSystemInfo而不是字符串路径。行中的一些小更改,如添加SearchOption(就像原生的.net一样),以及在根文件夹被拒绝访问的情况下在初始目录get上捕获错误。再次感谢您的原创帖子!

public class SafeFileEnumerator : IEnumerable<FileSystemInfo>
{
    /// <summary>
    /// Starting directory to search from
    /// </summary>
    private DirectoryInfo root;
    /// <summary>
    /// Filter pattern
    /// </summary>
    private string pattern;
    /// <summary>
    /// Indicator if search is recursive or not
    /// </summary>
    private SearchOption searchOption;
    /// <summary>
    /// Any errors captured
    /// </summary>
    private IList<Exception> errors;
    /// <summary>
    /// Create an Enumerator that will scan the file system, skipping directories where access is denied
    /// </summary>
    /// <param name="root">Starting Directory</param>
    /// <param name="pattern">Filter pattern</param>
    /// <param name="option">Recursive or not</param>
    public SafeFileEnumerator(string root, string pattern, SearchOption option)
        : this(new DirectoryInfo(root), pattern, option)
    {}
    /// <summary>
    /// Create an Enumerator that will scan the file system, skipping directories where access is denied
    /// </summary>
    /// <param name="root">Starting Directory</param>
    /// <param name="pattern">Filter pattern</param>
    /// <param name="option">Recursive or not</param>
    public SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option)
        : this(root, pattern, option, new List<Exception>()) 
    {}
    // Internal constructor for recursive itterator
    private SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
    {
        if (root == null || !root.Exists)
        {
            throw new ArgumentException("Root directory is not set or does not exist.", "root");
        }
        this.root = root;
        this.searchOption = option;
        this.pattern = String.IsNullOrEmpty(pattern)
            ? "*"
            : pattern;
        this.errors = errors;
    }
    /// <summary>
    /// Errors captured while parsing the file system.
    /// </summary>
    public Exception[] Errors
    {
        get
        {
            return errors.ToArray();
        }
    }
    /// <summary>
    /// Helper class to enumerate the file system.
    /// </summary>
    private class Enumerator : IEnumerator<FileSystemInfo>
    {
        // Core enumerator that we will be walking though
        private IEnumerator<FileSystemInfo> fileEnumerator;
        // Directory enumerator to capture access errors
        private IEnumerator<DirectoryInfo> directoryEnumerator;
        private DirectoryInfo root;
        private string pattern;
        private SearchOption searchOption;
        private IList<Exception> errors;
        public Enumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
        {
            this.root = root;
            this.pattern = pattern;
            this.errors = errors;
            this.searchOption = option;
            Reset();
        }
        /// <summary>
        /// Current item the primary itterator is pointing to
        /// </summary>
        public FileSystemInfo Current
        {
            get
            {
                //if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
                return fileEnumerator.Current as FileSystemInfo;
            }
        }
        object System.Collections.IEnumerator.Current
        {
            get { return Current; }
        }
        public void Dispose()
        {
            Dispose(true, true);
        }
        private void Dispose(bool file, bool dir)
        {
            if (file)
            {
                if (fileEnumerator != null)
                    fileEnumerator.Dispose();
                fileEnumerator = null;
            }
            if (dir)
            {
                if (directoryEnumerator != null)
                    directoryEnumerator.Dispose();
                directoryEnumerator = null;
            }
        }
        public bool MoveNext()
        {
            // Enumerate the files in the current folder
            if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
                return true;
            // Don't go recursive...
            if (searchOption == SearchOption.TopDirectoryOnly) { return false; }
            while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
            {
                Dispose(true, false);
                try
                {
                    fileEnumerator = new SafeFileEnumerator(
                        directoryEnumerator.Current,
                        pattern,
                        SearchOption.AllDirectories,
                        errors
                        ).GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    continue;
                }
                // Open up the current folder file enumerator
                if (fileEnumerator.MoveNext())
                    return true;
            }
            Dispose(true, true);
            return false;
        }
        public void Reset()
        {
            Dispose(true,true);
            // Safely get the enumerators, including in the case where the root is not accessable
            if (root != null)
            {
                try
                {
                    fileEnumerator = root.GetFileSystemInfos(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<FileSystemInfo>().GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    fileEnumerator = null;
                }
                try
                {
                    directoryEnumerator = root.GetDirectories(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<DirectoryInfo>().GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    directoryEnumerator = null;
                }
            }
        }
    }
    public IEnumerator<FileSystemInfo> GetEnumerator()
    {
        return new Enumerator(root, pattern, searchOption, errors);
    }
    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
}