将字符串数组(#3、#1、#2、#4)中的数字压缩到范围(#1:#4)

本文关键字:压缩 数字 范围 数组 字符串 | 更新日期: 2023-09-27 18:21:04

1。我做了一个函数,它使用字符串数组

{"foo", "#123", "#124", "bar", "#125", "#126"}

制作一个新的数组,将数字转换为一个范围:

{"foo", "#123:126", "bar"}

并返回:

"foo,#123:126,bar"
  1. 请注意,它不会仅更改两个数字的范围(它不应该将{"#1", "#2"}更改为{"#1:#2"})。这是因为CCD_ 3和CCD_。

  2. 顺序对于所有值都很重要,但不包括在某个范围内压缩的值。例如,在{#6, #5, #1, foo, #2, #3}中,#2#3将被#1压扁,这没关系,但其余部分的顺序应该相同。

下面是我的实现,由于有多个.Contains调用,它的时间效率非常低。


using System;
using System.Linq;
using System.Collections.Generic;
class Program
{
    static void Main(string[] args)
    {
        var ids = new string[] {
            "foo", //output: unmodified
            "#60", "#59", "#61", "#62", //from integer is in between, output: #59:#62
            "#12", "#14", "#17", "#13", "#18", "#bar", "#19", "#20", //two ranges and string intertwined, output: #12:#14,#17:#20,#bar
            "#25", "#26", //output: unmodified
            "#39", "#38", //output: unmodified
            "baz", //output: unmodified
            "#12", "#13", "#14" //duplicate sequences, output: #12:#14
        };
        //this is what the function should output when fed `ids`:
        Console.WriteLine("foo,#59:#62,#12:#14,#17:#20,#bar,#25,#26,#38,#39,baz,#12:#14");
        Console.WriteLine(Compress(ids));
        Console.Read();
    }
    static string Compress(IEnumerable<string> IDs)
    {
        var result = new List<string>();
        var ignore = new HashSet<string>();
        foreach (var item in IDs)
        {
            if (ignore.Contains(item)) continue;
            var id = item;
            if (id.StartsWith("#"))
            {
                int fromInt;
                if (int.TryParse(id.Substring(1), out fromInt))
                {
                    var less1 = $"#{fromInt - 1}";
                    var plus1 = $"#{fromInt + 1}";
                    var hasPlus1 = IDs.Contains(plus1);
                    if (IDs.Contains(less1) && hasPlus1) continue;
                    var plus2 = $"#{fromInt + 2}";
                    if (hasPlus1 && IDs.Contains(plus2))
                    {
                        ignore.Add(plus1);
                        ignore.Add(plus2);
                        var toInt = fromInt + 2;
                        while (IDs.Contains($"#{toInt + 1}"))
                        {
                            toInt += 1;
                            ignore.Add($"#{toInt}");
                        }
                        id = $"#{fromInt}:#{toInt}";
                    }
                }
            }
            result.Add(id);
        }
        return string.Join(",", result);
    }
}

有人能告诉我怎样才能提高效率吗?

将字符串数组(#3、#1、#2、#4)中的数字压缩到范围(#1:#4)

这里有一种方法(下面代码中的注释):

static string Compress(IEnumerable<string> ids)
{
    var list = new LinkedList<string>(ids);
    // store the numbers and their linked-list nodes
    var dict = new Dictionary<LinkedListNode<string>, int>();
    var current = list.First;
    int num;
    while(current != null)
    {
        var value = current.Value;
        if(value.StartsWith("#") && int.TryParse(value.TrimStart('#'), out num)) 
            dict[current] = num;
        current = current.Next;
    }
    // if found more than 2 numbers, compress to a range
    if(dict.Count > 2)
    {
        var sorted = dict.OrderBy(x => x.Value).ToArray();
        var ranges = new List<Range>();
        int min, max;
        for(var i = 0; i < sorted.Length;)
        {
            var rangeNodes = new List<LinkedListNode<string>>();
            min = max = sorted[i].Value;
            // record nodes, until we can't find a continuing number
            // (Note: this loop operates on the same counter as the outer
            // for-loop, so we only iterate through the sorted list once)
            for(; i < sorted.Length && sorted[i].Value - max <= 1; i++)
            {
                rangeNodes.Add(sorted[i].Key);
                max = sorted[i].Value;
            }
            // if more than 2 numbers, replace the first string, and remove the others 
            if(rangeNodes.Count > 2)
            {
                rangeNodes[0].Value = string.Format("#{0}:#{1}", min, max);
                for(int n = 1; n < rangeNodes.Count; n++)
                    list.Remove(rangeNodes[n]);
            }   
        }
    }
    // create a joined string
    return string.Join(",",list);
}

我最终得到了一个两遍算法。主要挑战是选择支持快速密钥查找和重复密钥的正确数据结构。Dictionary<int, List<int>看起来太占用空间了,所以我决定使用一个类似链表的结构,由一个保持第一个关键位置的Dictionary<int, int>和一个带有链接和处理信息的单独数组组成。

第一遍准备处理结构,第二遍发出结果。由于每个元素只被解析一次(包括使用具有O(1)时间复杂度的字典查找的范围检查),因此算法IMO的时间复杂度应该是O(N)。

除此之外,它与您的非常相似,因此可能被视为优化的实现。

enum EntryType { Single, Range, Unknown }
struct Entry
{
    public string Value;
    public int Number;
    public int Next; // position of the next value with the same number
    public EntryType Type;
}
static string Compress(IEnumerable<string> input)
{
    var entryList = input.Select(value => new Entry { Value = value }).ToArray();
    var numberQueue = new Dictionary<int, int>(entryList.Length); // Key=number, Value=position
    for (int pos = entryList.Length - 1; pos >= 0; pos--)
    {
        var value = entryList[pos].Value;
        int number;
        if (value.Length > 1 && value[0] == '#' && int.TryParse(value.Substring(1), out number))
        {
            int nextPos;
            if (!numberQueue.TryGetValue(number, out nextPos)) nextPos = -1;
            entryList[pos].Number = number;
            entryList[pos].Type = EntryType.Unknown;
            entryList[pos].Next = nextPos;
            numberQueue[number] = pos;
        }
    }
    var output = new StringBuilder();
    for (int pos = 0; pos < entryList.Length; pos++)
    {
        var entryType = entryList[pos].Type;
        if (entryType == EntryType.Range) continue; // already processed
        var number = entryList[pos].Number;
        int startPos = pos, endPos = pos, prevCount = 0, nextCount = 0;
        if (entryType == EntryType.Unknown)
        {
            for (int prevPos; numberQueue.TryGetValue(number - prevCount - 1, out prevPos) && prevPos >= 0; startPos = prevPos, prevCount++) { }
            for (int nextPos; numberQueue.TryGetValue(number + nextCount + 1, out nextPos) && nextPos >= 0; endPos = nextPos, nextCount++) { }
            entryType = prevCount + nextCount >= 2 ? EntryType.Range : EntryType.Single;
            for (int offset = -prevCount; offset <= nextCount; offset++)
            {
                var nextNumber = number + offset;
                int nextPos = numberQueue[nextNumber];
                entryList[nextPos].Type = entryType;
                numberQueue[nextNumber] = entryList[nextPos].Next;
            }
        }
        if (output.Length > 0) output.Append(',');
        if (entryType == EntryType.Single)
            output.Append(entryList[pos].Value);
        else
            output.Append(entryList[startPos].Value).Append(':').Append(entryList[endPos].Value);
    }
    return output.ToString();
}