如何使用 C# 从文本文件中的行中提取特定数据并输出到新的文本文件
本文关键字:文件 文本 数据 输出 何使用 提取 | 更新日期: 2023-09-27 17:56:45
我有几个包含数据行的大型.csv文件。我需要从每一行中仅提取数据的特定部分,从而忽略我不感兴趣的部分并将结果输出到新的文本文件中。
例如,下面是数据的一部分:
Fr 23:59:59 M40 N04161K RX LAG 2 JNYT 17 STORE OCC 1 PRUD 1 RAW -9 LAG 0
Fr 23:59:59 M08 N09461M %SAT 3 %CONG 0 MQ 0 EB 0 OSQ 0 NSQ 4
Fr 23:59:59 M20 N09461M SAT 3% SQ 0 FLOW 4 GN 13 STOC 9
我想写一个新文件,看起来像这样:
5,23,59,59,2,17,1,1,-9,0
5,23,59,59,3,0,0,0,0,4
5,23,59,59,3,0,4,13,9
(您会注意到数据的开头是"5",我也想使用它来代替代表"星期五"的"Fr")
数据在数据集中通过"M"引用(M40、M08 等)标识,输出其数据集中的所有数据会很有用(例如,将所有带有 M40 的数据过滤到一个.txt文件中,因此我的"if"语句)
我希望每个数字用逗号分隔,但不是必需的
这是我到目前为止的代码:
class Program
{
static void Main(string[] args)
{
String line;
try
{
//Pass the file path and file name to the StreamReader constructor
StreamReader sr = new StreamReader("C:''MessExport_20110402_0000.csv");
StreamWriter sw = new StreamWriter("C:''output.txt");
//Read the first line of text
line = sr.ReadLine();
//Continue to read until you reach end of file
while (line != null)
{
if (line.Contains("M40"))
{
sw.WriteLine(line);
}
if (line.Contains("M08"))
{
sw.WriteLine(line);
}
line = sr.ReadLine();
}
//close the files
sr.Close();
sw.Close();
//Console.ReadLine();
}
catch (Exception e)
{
Console.WriteLine("Exception: " + e.Message);
}
finally
{
Console.WriteLine("Executing finally block.");
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
}
}
}
然后,读取下一个.csv文件并再次将结果输出到新的.txt文件中会很有用
我对使用任何带有正则表达式和拆分的代码都很陌生,因此任何帮助将不胜感激。
只是一个简单的实现:
string workingDirectory = @"c:'";
var days = new[] { "Su", "Mo", "Tu", "We", "Th", "Fr", "Sa" };
var writers = new Dictionary<string, StreamWriter>();
using (StreamReader sr = new StreamReader(workingDirectory + "data.csv"))
{
string line;
while ((line = sr.ReadLine()) != null)
{
var items = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
StreamWriter w;
if (!writers.TryGetValue(items[2], out w))
{
w = new StreamWriter(workingDirectory + items[2] + ".txt");
writers.Add(items[2], w);
}
var times = items[1].Split(':');
var digits = items.Skip(3)
.Select(x => { int i; return new { IsValid = int.TryParse(x, out i), Value = x }; })
.Where(x => x.IsValid).Select(x => x.Value);
var data = new[] { Array.IndexOf(days, items[0]).ToString() }.Concat(times).Concat(digits);
w.WriteLine(String.Join(",", data));
}
}
foreach (var w in writers)
{
w.Value.Close();
w.Value.Dispose();
}
这是一个快速的刺,但我认为它会让你部分到达那里。
var lines = new List<string> {
"Fr 23:59:59 M40 N04161K RX LAG 2 JNYT 17 STORE OCC 1 PRUD 1 RAW -9 LAG 0",
"Fr 23:59:59 M08 N09461M %SAT 3 %CONG 0 MQ 0 EB 0 OSQ 0 NSQ 4",
"Fr 23:59:59 M20 N09461M SAT 3% SQ 0 FLOW 4 GN 13 STOC 9"
};
var options = RegexOptions.IgnorePatternWhitespace;
var regex = new Regex("(?: ^''w''w | -?''b''d+''b )", options );
foreach (var l in lines ){
var matches = regex.Matches( l );
foreach(Match m in matches){
Console.Write( "{0},", m.Value );
}
Console.WriteLine();
}
生产:
Fr,23,59,59,2,17,1,1,-9,0,
Fr,23,59,59,3,0,0,0,0,4,
Fr,23,59,59,3,0,4,13,9,
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Program
{
public class TransformCsv
{
[STAThread]
public static void Main(String[] args)
{
(new TransformCsv()).Run(@"c:'temp'MessExport_20110402_0000.csv", @"c:'temp'output.txt", LineFilterFunction);
}
public static Boolean LineFilterFunction(String line)
{
return line.Contains("M40") || line.Contains("M08");
}
////////////////////
private List<String> _dayOfWeek = new List<String>() { "Mo", "Tu", "We", "Th", "Fr", "Sa", "Su" };
private Dictionary<String, String> _mReference =
new Dictionary<String, String>()
{
// Add other M-reference mappings here.
{ "M40", "2" },
{ "M08", "3" },
{ "M20", "3" }
};
public void Run(String inputFilePath, String outputFilePath, Func<String, Boolean> lineFilterFunction)
{
using (var reader = new StreamReader(inputFilePath))
{
using (var writer = new StreamWriter(outputFilePath))
{
String line = null;
while ((line = reader.ReadLine()) != null)
{
if (!String.IsNullOrWhiteSpace(line) && lineFilterFunction(line))
writer.WriteLine(this.GetTransformedLine(line));
}
}
}
}
private static Char[] _spaceCharacter = " ".ToCharArray();
private String GetTransformedLine(String line)
{
var elements = line.Split(_spaceCharacter, StringSplitOptions.RemoveEmptyEntries);
var result = new List<String>();
result.Add((_dayOfWeek.IndexOf(elements[0]) + 1).ToString());
result.Add(elements[1].Replace(':', ','));
result.Add(_mReference[elements[2]]);
result.AddRange(elements.Skip(3).Where(e => this.IsInt32(e)));
return String.Join(",", result);
}
private Boolean IsInt32(String s)
{
Int32 _;
return Int32.TryParse(s, out _);
}
}
}