如何使用 C# 从文本文件中的行中提取特定数据并输出到新的文本文件

本文关键字:文件 文本 数据 输出 何使用 提取 | 更新日期: 2023-09-27 17:56:45

我有几个包含数据行的大型.csv文件。我需要从每一行中仅提取数据的特定部分,从而忽略我不感兴趣的部分并将结果输出到新的文本文件中。

例如,下面是数据的一部分:

Fr 23:59:59 M40 N04161K RX LAG 2 JNYT  17 STORE OCC 1 PRUD 1 RAW  -9 LAG   0
Fr 23:59:59 M08  N09461M  %SAT   3  %CONG   0  MQ 0  EB 0  OSQ     0 NSQ     4
Fr 23:59:59 M20 N09461M SAT   3%  SQ     0  FLOW     4  GN  13  STOC  9

我想写一个新文件,看起来像这样:

5,23,59,59,2,17,1,1,-9,0
5,23,59,59,3,0,0,0,0,4
5,23,59,59,3,0,4,13,9

(您会注意到数据的开头是"5",我也想使用它来代替代表"星期五"的"Fr")

数据在数据集

中通过"M"引用(M40、M08 等)标识,输出其数据集中的所有数据会很有用(例如,将所有带有 M40 的数据过滤到一个.txt文件中,因此我的"if"语句)

我希望每个数字用逗号分隔,但不是必需的

这是我到目前为止的代码:

class Program
{
    static void Main(string[] args)
    {
        String line;
        try
        {
            //Pass the file path and file name to the StreamReader constructor
            StreamReader sr = new StreamReader("C:''MessExport_20110402_0000.csv");
            StreamWriter sw = new StreamWriter("C:''output.txt");
            //Read the first line of text
            line = sr.ReadLine();
            //Continue to read until you reach end of file
            while (line != null)
            {
                if (line.Contains("M40"))
                {
                    sw.WriteLine(line);
                }
                    if (line.Contains("M08"))
                    {
                        sw.WriteLine(line);
                    }      
                line = sr.ReadLine();
            }
            //close the files
            sr.Close();
            sw.Close();
            //Console.ReadLine();
        }
        catch (Exception e)
        {
            Console.WriteLine("Exception: " + e.Message);
        }
        finally
        {
            Console.WriteLine("Executing finally block.");
            Console.WriteLine("Press any key to exit.");
            Console.ReadKey();
        }

    }
}

然后,读取下一个.csv文件并再次将结果输出到新的.txt文件中会很有用

我对使用任何带有正则表达式和拆分的代码都很陌生,因此任何帮助将不胜感激。

如何使用 C# 从文本文件中的行中提取特定数据并输出到新的文本文件

只是一个简单的实现:

string workingDirectory = @"c:'";
var days = new[] { "Su", "Mo", "Tu", "We", "Th", "Fr", "Sa" };
var writers = new Dictionary<string, StreamWriter>();
using (StreamReader sr = new StreamReader(workingDirectory + "data.csv"))
{
    string line;
    while ((line = sr.ReadLine()) != null)
    {
        var items = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
        StreamWriter w;
        if (!writers.TryGetValue(items[2], out w))
        {
            w = new StreamWriter(workingDirectory + items[2] + ".txt");
            writers.Add(items[2], w);
        }
        var times = items[1].Split(':');
        var digits = items.Skip(3)
                    .Select(x => { int i; return new { IsValid = int.TryParse(x, out i), Value = x }; })
                    .Where(x => x.IsValid).Select(x => x.Value);
        var data = new[] { Array.IndexOf(days, items[0]).ToString() }.Concat(times).Concat(digits);
        w.WriteLine(String.Join(",", data));
    }
}
foreach (var w in writers)
{
    w.Value.Close();
    w.Value.Dispose();
}

这是一个快速的刺,但我认为它会让你部分到达那里。

var lines = new List<string> { 
    "Fr 23:59:59 M40 N04161K RX LAG 2 JNYT  17 STORE OCC 1 PRUD 1 RAW  -9 LAG   0",
    "Fr 23:59:59 M08  N09461M  %SAT   3  %CONG   0  MQ 0  EB 0  OSQ     0 NSQ     4",
    "Fr 23:59:59 M20 N09461M SAT   3%  SQ     0  FLOW     4  GN  13  STOC  9"
};
var options = RegexOptions.IgnorePatternWhitespace;
var regex = new Regex("(?: ^''w''w | -?''b''d+''b )", options );
foreach (var l in lines ){
    var matches = regex.Matches( l );
    foreach(Match m in matches){
        Console.Write( "{0},", m.Value );
    }
    Console.WriteLine();
}

生产:

Fr,23,59,59,2,17,1,1,-9,0,
Fr,23,59,59,3,0,0,0,0,4,
Fr,23,59,59,3,0,4,13,9,
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Program
{
  public class TransformCsv
  {
    [STAThread]
    public static void Main(String[] args)
    {
      (new TransformCsv()).Run(@"c:'temp'MessExport_20110402_0000.csv", @"c:'temp'output.txt", LineFilterFunction);
    }
    public static Boolean LineFilterFunction(String line)
    {
      return line.Contains("M40") || line.Contains("M08");
    }
    ////////////////////
    private List<String> _dayOfWeek = new List<String>() { "Mo", "Tu", "We", "Th", "Fr", "Sa", "Su" };
    private Dictionary<String, String> _mReference =
      new Dictionary<String, String>()
      {
        // Add other M-reference mappings here.
        { "M40", "2" },
        { "M08", "3" },
        { "M20", "3" }
      };
    public void Run(String inputFilePath, String outputFilePath, Func<String, Boolean> lineFilterFunction)
    {
      using (var reader = new StreamReader(inputFilePath))
      {
        using (var writer = new StreamWriter(outputFilePath))
        {
          String line = null;
          while ((line = reader.ReadLine()) != null)
          {
            if (!String.IsNullOrWhiteSpace(line) && lineFilterFunction(line))
              writer.WriteLine(this.GetTransformedLine(line));
          }
        }
      }
    }
    private static Char[] _spaceCharacter = " ".ToCharArray();
    private String GetTransformedLine(String line)
    {
      var elements = line.Split(_spaceCharacter, StringSplitOptions.RemoveEmptyEntries);
      var result = new List<String>();
      result.Add((_dayOfWeek.IndexOf(elements[0]) + 1).ToString());
      result.Add(elements[1].Replace(':', ','));
      result.Add(_mReference[elements[2]]);
      result.AddRange(elements.Skip(3).Where(e => this.IsInt32(e)));
      return String.Join(",", result);
    }
    private Boolean IsInt32(String s)
    {
      Int32 _;
      return Int32.TryParse(s, out _);
    }
  }
}