把这本字典和另一本字典作比较

本文关键字:字典 一本 比较 | 更新日期: 2023-09-27 18:16:05

正如标题所示,我正在尝试将一本字典与另一本字典进行比较。它用于比较两个字典中的双元数据

我正在使用控制台应用程序来加载文件。应用程序读取该文件并每2个字母(例如;Hey变成了他和ey)作为键,值将以百分比表示重字组合出现的频率(在本例中,他为50%,ey为50%)。我已经能做到这一点了

现在棘手的部分是将最近添加的双元组与第二个文件(文本)进行比较。我将第二个文件添加到另一个字典中。所以现在我有两个字典(一个用于第一个文件,第二个用于第二个文件)。

现在我想比较这两个字典,看看哪些双元数据不在其中一个或另一个(例如;第一个目录是;第二个目录有he, ey, yg, gu, y, ys)。所以yg gu y y应该出现

下面是我的代码:

static StringBuilder newText = new StringBuilder();
static Dictionary<string, int> d = new Dictionary<string, int>();
static Dictionary<string, double> dNL = new Dictionary<string, double>();
static Dictionary<string, double> dDE = new Dictionary<string, double>();
static Dictionary<string, double> dFR = new Dictionary<string, double>();
static Dictionary<string, double> dSP = new Dictionary<string, double>();
static Dictionary<string, double> dEN = new Dictionary<string, double>();
static Dictionary<string, double> dIT = new Dictionary<string, double>();
static List<string> fileList = new List<string>();
static List<string> dConverted = new List<string>();
static List<string> dConvertedNL = new List<string>();
static List<string> dConvertedDE = new List<string>();
static List<string> dConvertedFR = new List<string>();
static List<string> dConvertedSP = new List<string>();
static List<string> dConvertedEN = new List<string>();
static List<string> dConvertedIT = new List<string>();
static string text;
static string languageChosen;
static string gecombineerdeLetters;
static int value = 0;
static int totaleNGram = 0;
static string[] fileRead;
static char[] tekst;
[STAThread]
static void Main(string[] args)
{
    Start:
    Console.WriteLine("Welcome to this program. We need some text to start getting results. Please enter a Language your text is in:");
    Console.WriteLine("press 1 - 7 / enter 'exit' to close the program");
    Console.WriteLine("1. Dutch / 2. German / 3. French / 4. Spanish / 5. English / 6. Italian / 7. Enter unknown language");
    Console.WriteLine();
    languageChosen = Console.ReadLine();
    if (languageChosen == "1" || languageChosen == "2" || languageChosen == "3" || languageChosen == "4" || languageChosen == "5" || languageChosen == "6")
    {
        calculateChanceKnown();
        Console.WriteLine();
        goto Start;
    }
    else if (languageChosen == "7")
    {
        OpenReadFile();
        // compare to every language
        // first convert to list
        dConverted = d.Keys.ToList();
        dConvertedNL = dNL.Keys.ToList();
        dConvertedDE = dDE.Keys.ToList();
        dConvertedFR = dFR.Keys.ToList();
        dConvertedSP = dSP.Keys.ToList();
        dConvertedEN = dEN.Keys.ToList();
        dConvertedIT = dIT.Keys.ToList();
        // compare d against each other list to see which has the most simularities
        //List<string> firstNotSecond = dConverted.Except(dConvertedNL).ToList();
        Console.WriteLine();
        goto Start;
    }
    else if(languageChosen == "exit")
    {
        Environment.Exit(0);
    }
    else
    {
        Console.WriteLine("Wrong input, try again!");
        Console.WriteLine();
        goto Start;
    }
    Console.Read();
}
static void calculateChanceKnown()
{
    OpenReadFile();
    switch (languageChosen)
    {
        case "1":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dNL.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
                //if key already exists -> add +1 to totaleNGram -> d[gecombineerdeletters] +1 -> calculate % again
            }
            d.Clear();
            //foreach (KeyValuePair<string, double> kvp in dNL)
            //{
            //    Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            //}
            break;
        case "2":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dDE.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
            }
            foreach (KeyValuePair<string, double> kvp in dDE)
            {
                Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            }
            break;
        case "3":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dFR.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
            }
            foreach (KeyValuePair<string, double> kvp in dFR)
            {
                Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            }
            break;
        case "4":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dSP.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
            }
            foreach (KeyValuePair<string, double> kvp in dSP)
            {
                Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            }
            break;
        case "5":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dEN.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
            }
            foreach (KeyValuePair<string, double> kvp in dEN)
            {
                Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            }
            break;
        case "6":
            foreach (KeyValuePair<string, int> kvp in d)
            {
                //Console.WriteLine("Key = {0}, Value = {1}, Chance = {2}%", kvp.Key, kvp.Value, ((double)kvp.Value / totaleNGram) * 100);
                dIT.Add(kvp.Key, ((double)kvp.Value / totaleNGram) * 100);
            }
            foreach (KeyValuePair<string, double> kvp in dIT)
            {
                Console.WriteLine("Bigram = {0}, Chance = {1}%", kvp.Key, kvp.Value);
            }
            break;
        default:
            break;
    }
}
static void OpenReadFile()
{
    var fileDialog = new OpenFileDialog { Multiselect = false, Title = "Open Text File", Filter = "txt files (*txt)|*.txt| word files (*.doc, *.docx)|*.doc; *docx" };
    using (fileDialog)
    {
        if (fileDialog.ShowDialog() == DialogResult.OK)
        {
            System.IO.StreamReader sr = new System.IO.StreamReader(fileDialog.FileName);
            string line;
            while ((line = sr.ReadLine()) != null)
            {
                fileList.Add(line);
            }
        }
    }
    fileRead = fileList.ToArray();
    tekst = string.Join(string.Empty, fileRead).ToCharArray();
    foreach (char c in tekst)
    {
        if (!char.IsPunctuation(c)) newText.Append(c);
    }
    text = newText.ToString();
    text.ToLower();
    text = Regex.Replace(text, @"['s+]", "");
    tekst = text.ToCharArray();
    for (int i = 0; i < tekst.Count() - 1; i += 1)
    {
        gecombineerdeLetters = string.Format("{0}{1}", tekst[i], tekst[i + 1]);
        //Console.WriteLine(gecombineerdeLetters);
        if (!d.TryGetValue(gecombineerdeLetters, out value))
        {
            d.Add(gecombineerdeLetters, 1);
            totaleNGram += 1;
        }
        else
        {
            d[gecombineerdeLetters] += 1;
            totaleNGram += 1;
        }
    }
}

我试过了:

  • 使用except<>
  • SequenceEqual

每次我试图比较同一个文件时,它都会给我一个不在两个目录中的双引号

把这本字典和另一本字典作比较

public class DictionaryComparer
{
    public List<string> CompareDictionaries(IDictionary<string, double> first, IDictionary<string, double> second)
    {
        var dictionary  = new Dictionary<string, int>();
        foreach (var f in first)
        {
            if (!dictionary.ContainsKey(f.Key))
            {
                dictionary.Add(f.Key, 1);
            }
            else
            {
                dictionary[f.Key]++;
            }
        }
        foreach (var f in second)
        {
            if (!dictionary.ContainsKey(f.Key))
            {
                dictionary.Add(f.Key, 1);
            }
            else
            {
                dictionary[f.Key]++;
            }
        }
        return dictionary.Where(s => s.Value == 1).Select(a => a.Key).ToList();
    }
}

和测试:

[TestFixture]
public class Test
{
    [Test]
    public void Compare()
    {
        IDictionary<string, double> dictionaryOne = new Dictionary<string, double>()
        {
            {"he", 0},{"ey", 0 }
        };
        Dictionary<string, double> dictionaryTwo = new Dictionary<string, double>()
        {
            {"he", 0},{"ey", 0 },{"yg", 0 },{"gu", 0 },{"uy", 0 },{"ys", 0 }
        };
        var comparer = new DictionaryComparer();
        var list = comparer.CompareDictionaries(dictionaryOne, dictionaryTwo);
        Assert.That(4, Is.EqualTo(list.Count));
        Assert.That("yg", Is.EqualTo(list[0]));
        Assert.That("gu", Is.EqualTo(list[1]));
        Assert.That("uy", Is.EqualTo(list[2]));
        Assert.That("ys", Is.EqualTo(list[3]));
    }
}

希望有意义