使用另一个字节列表/数组计数字节列表/阵列中的出现次数

本文关键字:字节 列表 另一个 数组 数字 阵列 | 更新日期: 2023-09-27 17:59:28

我正在尝试计算一个字节序列在另一个字节顺序中发生的所有次数。但是,如果已经对字节进行了计数,它就不能重用字节。例如,给定字符串
k.k.k.k.k.k.假设字节序列是k.k,那么它只会发现3次出现,而不是5次,因为它们会像[k.k].[k.k].[k.k].[k.[k].[k].[k].[k].k]那样分解,它们会重叠,基本上只是向右移动2。

理想情况下,这个想法是了解压缩字典或运行时编码的外观。所以目标是获得

k.k.k.k.k.k.只有两部分,因为(k.k.k.)是你能拥有的最大、最好的符号。

以下是迄今为止的来源:

using System;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
using System.Text;
using System.IO;

    static class Compression 
    {
        static int Main(string[] args)
        {
            List<byte> bytes = File.ReadAllBytes("ok.txt").ToList();
            List<List<int>> list = new List<List<int>>();
            // Starting Numbers of bytes - This can be changed manually.
            int StartingNumBytes = bytes.Count;
            for (int i = StartingNumBytes; i > 0; i--)
            {
                Console.WriteLine("i: " + i);
                for (int ii = 0; ii < bytes.Count - i; ii++)
                {
                    Console.WriteLine("ii: " + i);
                    // New pattern comes with refresh data.
                    List<byte> pattern = new List<byte>();
                    for (int iii = 0; iii < i; iii++)
                    {
                        pattern.Add(bytes[ii + iii]);
                    }

                    DisplayBinary(bytes, "red");
                    DisplayBinary(pattern, "green");
                    int matches = 0;
                   // foreach (var position in bytes.ToArray().Locate(pattern.ToArray()))
                    for (int position = 0; position < bytes.Count; position++) {
                        if (pattern.Count > (bytes.Count - position))
                        {
                            continue;
                        }

                        for (int iiii = 0; iiii < pattern.Count; iiii++)
                        {
                            if (bytes[position + iiii] != pattern[iiii])
                            {
                                //Have to use goto because C# doesn't support continue <level>
                                goto outer;
                            }
                        }
                        // If it made it this far, it has found a match.
                        matches++;
                        Console.WriteLine("Matches: " + matches + " Orig Count: " + bytes.Count + " POS: " + position);
                        if (matches > 1)
                        {
                            int numBytesToRemove = pattern.Count;
                            for (int ra = 0; ra < numBytesToRemove; ra++)
                            {
                                // Remove it at the position it was found at, once it
                                // deletes the first one, the list will shift left and you'll need to be here again.
                                bytes.RemoveAt(position);
                            }
                            DisplayBinary(bytes, "red");
                            Console.WriteLine(pattern.Count + " Bytes removed.");
                            // Since you deleted some bytes, set the position less because you will need to redo the pos.
                            position = position - 1;
                        }

                        outer:
                            continue;
                    }
                    List<int> sublist = new List<int>();
                    sublist.Add(matches);
                    sublist.Add(pattern.Count);
                    // Some sort of calculation to determine how good the symbol was
                    sublist.Add(bytes.Count-((matches * pattern.Count)-matches));
                    list.Add(sublist);
                }
            }

            Display(list);
            Console.Read();
            return 0;
        }

        static void DisplayBinary(List<byte> bytes, string color="white")
        {
            switch(color){
                case "green":
                    Console.ForegroundColor = ConsoleColor.Green;
                    break;
                case "red":
                    Console.ForegroundColor = ConsoleColor.Red;
                    break;
                default:
                    break;
            }

            for (int i=0; i<bytes.Count; i++)
            {
                if (i % 8 ==0)
                    Console.WriteLine();
                Console.Write(GetIntBinaryString(bytes[i]) + " ");
            }
            Console.WriteLine();
            Console.ResetColor();
        }
        static string GetIntBinaryString(int n)
        {
            char[] b = new char[8];
            int pos = 7;
            int i = 0;
            while (i < 8)
            {
                if ((n & (1 << i)) != 0)
                {
                    b[pos] = '1';
                }
                else
                {
                    b[pos] = '0';
                }
                pos--;
                i++;
            }
            //return new string(b).TrimStart('0');
            return new string(b);
        }
        static void Display(List<List<int>> list)
        {
            //
            // Display everything in the List.
            //
            Console.WriteLine("Elements:");
            foreach (var sublist in list)
            {
                foreach (var value in sublist)
                {
                    Console.Write("{0,4}", value);
                }
                Console.WriteLine();
            }
            //
            // Display total count.
            //
            int count = 0;
            foreach (var sublist in list)
            {
                count += sublist.Count;
            }
            Console.WriteLine("Count:");
            Console.WriteLine(count);
        }
        static public int SearchBytePattern(byte[] pattern, byte[] bytes)
        {
            int matches = 0;
            // precomputing this shaves some seconds from the loop execution
            int maxloop = bytes.Length - pattern.Length;
            for (int i = 0; i < maxloop; i++)
            {
                if (pattern[0] == bytes[i])
                {
                    bool ismatch = true;
                    for (int j = 1; j < pattern.Length; j++)
                    {
                        if (bytes[i + j] != pattern[j])
                        {
                            ismatch = false;
                            break;
                        }
                    }
                    if (ismatch)
                    {
                        matches++;
                        i += pattern.Length - 1;
                    }
                }
            }
            return matches;
        }
    }

参考帖子获取非二进制的文件应该是,这里是二进制数据:011010110010111001101011001011100110101100101110011010110010111001101011001011100110101100101110我希望它比它开始的时候更小。

使用另一个字节列表/数组计数字节列表/阵列中的出现次数

private static int CountOccurences(byte[] target, byte[] pattern)
{
    var targetString = BitConverter.ToString(target);
    var patternString = BitConverter.ToString(pattern);
    return new Regex(patternString).Matches(targetString).Count;
}

使用此解决方案,您可以访问匹配的各个索引(在枚举时),或者您可以对结果调用Count()来查看有多少匹配:

public static IEnumerable<int> Find<T>(T[] pattern, T[] sequence, bool overlap)
{
    int i = 0;
    while (i < sequence.Length - pattern.Length + 1)
    {
        if (pattern.SequenceEqual(sequence.Skip(i).Take(pattern.Length)))
        {
            yield return i;
            i += overlap ? 1 : pattern.Length;
        }
        else
        {
            i++;
        }
    }
}

overlap: false来解决你的问题,或者用overlap: true来查看重叠的匹配(如果你感兴趣的话)

我这里有几个其他方法,它们具有略微不同的API(以及更好的性能),其中包括一个直接处理字节流的方法。

快速而肮脏,没有正则表达式。虽然我不确定它是否回答了问题的意图,但它应该相对较快。我想我将对regex进行一些定时测试,以确定相对速度:

    private int CountOccurrences(string TestString, string TestPattern)
    {
        int PatternCount = 0;
        int SearchIndex = 0;
        if (TestPattern.Length == 0)
            throw new ApplicationException("CountOccurrences: Unable to process because TestPattern has zero length.");
        if (TestString.Length == 0)
            return 0;
        do
        {
            SearchIndex = TestString.IndexOf(TestPattern, SearchIndex);
            if (SearchIndex >= 0)
            {
                ++PatternCount;
                SearchIndex += TestPattern.Length;
            }
        }
        while ((SearchIndex >= 0) && (SearchIndex < TestString.Length));
        return PatternCount;
    }
    private void btnTest_Click(object sender, EventArgs e)
    {
        string TestString1 = "k.k.k.k.k.k.k.k.k.k.k.k";
        string TestPattern1 = "k.k";
        System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1).ToString()); // outputs 6
        System.Console.WriteLine(CountOccurrences(TestString1 + ".k", TestPattern1).ToString()); // still 6
        System.Console.WriteLine(CountOccurrences(TestString1, TestPattern1 + ".").ToString()); // only 5
    }