把这本字典和另一本字典作比较
本文关键字:字典 一本 比较 | 更新日期: 2023-09-27 18:16:05
正如标题所示,我正在尝试将一本字典与另一本字典进行比较。它用于比较两个字典中的双元数据
我正在使用控制台应用程序来加载文件。应用程序读取该文件并每2个字母(例如;Hey变成了他和ey)作为键,值将以百分比表示重字组合出现的频率(在本例中,他为50%,ey为50%)。我已经能做到这一点了
现在棘手的部分是将最近添加的双元组与第二个文件(文本)进行比较。我将第二个文件添加到另一个字典中。所以现在我有两个字典(一个用于第一个文件,第二个用于第二个文件)。
现在我想比较这两个字典,看看哪些双元数据不在其中一个或另一个(例如;第一个目录是;第二个目录有he, ey, yg, gu, y, ys)。所以yg gu y y应该出现
下面是我的代码:
static StringBuilder newText = new StringBuilder();
static Dictionary<string, int> d = new Dictionary<string, int>();
static Dictionary<string, double> dNL = new Dictionary<string, double>();
static Dictionary<string, double> dDE = new Dictionary<string, double>();
static Dictionary<string, double> dFR = new Dictionary<string, double>();
static Dictionary<string, double> dSP = new Dictionary<string, double>();
static Dictionary<string, double> dEN = new Dictionary<string, double>();
static Dictionary<string, double> dIT = new Dictionary<string, double>();
static List<string> fileList = new List<string>();
static List<string> dConverted = new List<string>();
static List<string> dConvertedNL = new List<string>();
static List<string> dConvertedDE = new List<string>();
static List<string> dConvertedFR = new List<string>();
static List<string> dConvertedSP = new List<string>();
static List<string> dConvertedEN = new List<string>();
static List<string> dConvertedIT = new List<string>();
static string text;
static string languageChosen;
static string gecombineerdeLetters;
static int value = 0;
static int totaleNGram = 0;
static string[] fileRead;
static char[] tekst;
[STAThread]
static void Main(string[] args)
{
Start:
Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:");
Console.WriteLine("press 1 - 7 / enter 'exit' to close the program");
Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language");
Console.WriteLine();
languageChosen = Console.ReadLine();
if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6")
{
calculateChanceKnown();
Console.WriteLine();
goto Start;
}
else if (languageChosen == "7")
{
OpenReadFile();
// compare to every language
// first convert to list
dConverted = d.Keys.ToList();
dConvertedNL = dNL.Keys.ToList();
dConvertedDE = dDE.Keys.ToList();
dConvertedFR = dFR.Keys.ToList();
dConvertedSP = dSP.Keys.ToList();
dConvertedEN = dEN.Keys.ToList();
dConvertedIT = dIT.Keys.ToList();
// compare d against each other list to see which has the most simularities
//List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList();
Console.WriteLine();
goto Start;
}
else if(languageChosen == "exit")
{
Environment.Exit(0);
}
else
{
Console.WriteLine("Wrong input, try again!");
Console.WriteLine();
goto Start;
}
Console.Read();
}
static void calculateChanceKnown()
{
OpenReadFile();
switch (languageChosen)
{
case "1":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
//if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again
}
d.Clear();
//foreach (KeyValuePair<string, double> kvp in dNL)
//{
// Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
//}
break;
case "2":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
}
foreach (KeyValuePair<string, double> kvp in dDE)
{
Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
}
break;
case "3":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
}
foreach (KeyValuePair<string, double> kvp in dFR)
{
Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
}
break;
case "4":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
}
foreach (KeyValuePair<string, double> kvp in dSP)
{
Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
}
break;
case "5":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
}
foreach (KeyValuePair<string, double> kvp in dEN)
{
Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
}
break;
case "6":
foreach (KeyValuePair<string, int> kvp in d)
{
//Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
}
foreach (KeyValuePair<string, double> kvp in dIT)
{
Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
}
break;
default:
break;
}
}
static void OpenReadFile()
{
var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" };
using (fileDialog)
{
if (fileDialog.ShowDialog() == DialogResult.OK)
{
System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName);
string line;
while ((line = sr.ReadLine()) != null)
{
fileList.Add(line);
}
}
}
fileRead = fileList.ToArray();
tekst = string.Join(string.Empty, fileRead).ToCharArray();
foreach (char c in tekst)
{
if (!char.IsPunctuation(c)) newText.Append(c);
}
text = newText.ToString();
text.ToLower();
text = Regex.Replace(text, @"['s+]", "");
tekst = text.ToCharArray();
for (int i = 0; i < tekst.Count() - 1; i += 1)
{
gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]);
//Console.WriteLine(gecombineerdeLetters);
if (!d.TryGetValue(gecombineerdeLetters, out value))
{
d.Add(gecombineerdeLetters, 1);
totaleNGram += 1;
}
else
{
d[gecombineerdeLetters] += 1;
totaleNGram += 1;
}
}
}
我试过了:
- 使用except<>
- SequenceEqual
每次我试图比较同一个文件时,它都会给我一个不在两个目录中的双引号
public class DictionaryComparer
{
public List<string> CompareDictionaries(IDictionary<string, double> first, IDictionary<string, double> second)
{
var dictionary = new Dictionary<string, int>();
foreach (var f in first)
{
if (!dictionary.ContainsKey(f.Key))
{
dictionary.Add(f.Key, 1);
}
else
{
dictionary[f.Key]++;
}
}
foreach (var f in second)
{
if (!dictionary.ContainsKey(f.Key))
{
dictionary.Add(f.Key, 1);
}
else
{
dictionary[f.Key]++;
}
}
return dictionary.Where(s => s.Value == 1).Select(a => a.Key).ToList();
}
}
和测试:
[TestFixture]
public class Test
{
[Test]
public void Compare()
{
IDictionary<string, double> dictionaryOne = new Dictionary<string, double>()
{
{"he", 0},{"ey", 0 }
};
Dictionary<string, double> dictionaryTwo = new Dictionary<string, double>()
{
{"he", 0},{"ey", 0 },{"yg", 0 },{"gu", 0 },{"uy", 0 },{"ys", 0 }
};
var comparer = new DictionaryComparer();
var list = comparer.CompareDictionaries(dictionaryOne, dictionaryTwo);
Assert.That(4, Is.EqualTo(list.Count));
Assert.That("yg", Is.EqualTo(list[0]));
Assert.That("gu", Is.EqualTo(list[1]));
Assert.That("uy", Is.EqualTo(list[2]));
Assert.That("ys", Is.EqualTo(list[3]));
}
}
希望有意义