.NET正则表达式引擎不返回任何匹配项,但我期望为8
本文关键字:期望 引擎 正则表达式 返回 任何匹 NET | 更新日期: 2023-09-27 18:30:04
我正试图编写一个正则表达式,以从SQL脚本中获取每一个插入行。当我在RegexHero上使用.NETRegexTester时,我得到了预期的8场比赛。然而,当我将这个snippit作为控制台应用程序运行时,它不会返回任何匹配项。
const string text =
@"INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(12 , 7 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(15 , 53 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(20 , 216 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(16 , 8 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(26 , 5 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(18 , 12 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(9 , 10 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(2 , 72 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
";
static void Main(string[] args)
{
string query = @"^'(.*?')(,|;)$";
var matches = Regex.Matches(text, query, RegexOptions.Singleline | RegexOptions.Multiline);
Console.WriteLine("Expected Matches: 8");
Console.WriteLine("Matches Found: {0}", matches.Count);
Console.ReadLine();
}
我的网站和代码(多行和单行)的选项完全相同——它们应该使用相同的.NET正则表达式引擎,那么是什么导致了两者之间的差异呢?
最终结果:
对于所有好奇的人来说,我最后的Regex是
@"(?<=^'() # The beginning of a line followed by a (
((('(?<c>.*?)'(?!')(?=['s')])) | # Text string in SQL supports line breaks
(?<c>-?['d'.]+) | # Any numbers
(X'(?<c>[0-9a-f]*)') # Something formatted like X'0123456789abcdef'
)('s,'s)? # Spaces and commas between the records
)+ # Repeat the pattern at least one time
(?=(?<!'')')[;,]'r?$) # The End of the line ending with ); or ), and not immediately proceeded by ''";
请注意,所有计划将其用于R&D(剥离和部署)开发——这只适用于我的SQL,因为它非常常规。如果与非第三方程序生成的SQL一起使用,则需要进行调整以处理许多边缘情况,而这些情况我不需要处理。
以下是解析器解析代码的完整代码。希望它能帮助其他陷入类似困境的人。
foreach (var tableFolder in Directory.GetDirectories(_exportFolder))
{
//Popluate the schema of the DataTable
DataTable table = new DataTable();
using (SqlDataAdapter ada = new SqlDataAdapter(String.Format("Select top 0 * from [{0}]", Path.GetFileName(tableFolder)), conn))
{
ada.Fill(table);
}
//All of the files to import for this table
string[] filePaths = Directory.GetFiles(tableFolder, "*.sql");
foreach (string file in filePaths)
{
string text;
using (var txtRdr = new StreamReader(file))
{
text = txtRdr.ReadToEnd();
}
const string recordRegex =
@"(?<=^'() #The begining of a line followed by a (
((('(?<s>.*?)'(?!')(?=['s')])) | # Something formatted like 'some text' supports line breaks
(?<n>-?['d'.]+) | # Any numbers
(X'(?<h>[0-9a-f]*)') # Something formatted like X'0123456789abcdef'
)('s,'s)? # Spaces and commas between the records
)+ # Repeat the pattern at least one time
(?=(?<!'')')[;,]'r?$) # The End of the line ending with ); or ), and not immedatly proceded by ''";
//Creates one match per row in the database
var records = Regex.Matches(text, recordRegex, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);
const string headerRegex = @"^INSERT'sINTO's'[['w_'-'s]+']'s'('s(?:'[(['w_'-'s]+)']'s(?:,'s)?)+')";
var header = Regex.Match(text, headerRegex).Groups[1].Captures.Cast<Capture>().ToArray();
foreach (Match record in records)
{
//Due to how we captured the 3 groups we had to put them back in order in one list.
var columns = record.Groups.Cast<Group>()
.Skip(1) //Groups[0] contins the entire record.
.SelectMany(group => group.Captures.Cast<Capture>()) //Flattens all of the captures in the three groups in to one list
.OrderBy(capture => capture.Index) //Reorder the combined list as the SelectMany will not be outputting the correct order.
.ToArray();
DataRow row = table.NewRow();
for (int i = 0; i < columns.Length; i++)
{
Type columnType = table.Columns[header[i].Value].DataType;
if (columnType == typeof(String))
{
row[header[i].Value] = columns[i].Value;
}
else if (columnType == typeof(Int32))
{
row[header[i].Value] = Convert.ToInt32(columns[i].Value);
}
else if (columnType == typeof(Double))
{
row[header[i].Value] = Convert.ToDouble(columns[i].Value);
}
else if (columnType == typeof(Boolean))
{
if (columns[i].Value == "0")
row[header[i].Value] = false;
else if (columns[i].Value == "1")
row[header[i].Value] = true;
else
throw new InvalidDataException();
}
else if (columnType == typeof(Int16))
{
row[header[i].Value] = Convert.ToInt16(columns[i].Value);
}
else if (columnType == typeof(Byte[]))
{
row[header[i].Value] = StringToByteArray(columns[i].Value);
}
else
{
throw new NotImplementedException();
}
}
table.Rows.Add(row);
}
using (var bulkCopy = new SqlBulkCopy(conn))
{
bulkCopy.DestinationTableName = Path.GetFileName(tableFolder);
bulkCopy.BulkCopyTimeout = 0;
bulkCopy.WriteToServer(table);
}
}
}
更新:
通过将caputre组重命名为所有相同的名称,.NET的正则表达式引擎将它们组合在一起,简化了
var columns = record.Groups[1].Cast<Group>().Skip(1).SelectMany(group => group.Captures.Cast<Capture>()).OrderBy(capture => capture.Index).ToArray();
至
var columns = record.Groups[1].Captures.Cast<Capture>().ToArray();
请注意,在Regex Hero页面上切换"CrLf标记行结束"设置会导致8行停止匹配;这是造成问题的原因的线索。
在C#代码中,文本字符串中的换行符被编码为CR/LF对("'r'n"
)。正则表达式中的$
(在多行模式中与行尾匹配)仅与'n
字符匹配。因此,在正则表达式没有考虑的最后一个逗号(或分号)之间有一个额外的'r
字符,匹配失败。
解决这个问题的一些方法包括:
- 剥开回车:
text = text.Replace("'r'n", "'n");
,或 - 匹配回车:
string query = @"^'(.*?')(,|;)'r$";