将字符串数组(#3、#1、#2、#4)中的数字压缩到范围(#1:#4)
本文关键字:压缩 数字 范围 数组 字符串 | 更新日期: 2023-09-27 18:21:04
1。我做了一个函数,它使用字符串数组
{"foo", "#123", "#124", "bar", "#125", "#126"}
制作一个新的数组,将数字转换为一个范围:
{"foo", "#123:126", "bar"}
并返回:
"foo,#123:126,bar"
请注意,它不会仅更改两个数字的范围(它不应该将
{"#1", "#2"}
更改为{"#1:#2"}
)。这是因为CCD_ 3和CCD_。顺序对于所有值都很重要,但不包括在某个范围内压缩的值。例如,在
{#6, #5, #1, foo, #2, #3}
中,#2
和#3
将被#1
压扁,这没关系,但其余部分的顺序应该相同。
下面是我的实现,由于有多个.Contains
调用,它的时间效率非常低。
using System;
using System.Linq;
using System.Collections.Generic;
class Program
{
static void Main(string[] args)
{
var ids = new string[] {
"foo", //output: unmodified
"#60", "#59", "#61", "#62", //from integer is in between, output: #59:#62
"#12", "#14", "#17", "#13", "#18", "#bar", "#19", "#20", //two ranges and string intertwined, output: #12:#14,#17:#20,#bar
"#25", "#26", //output: unmodified
"#39", "#38", //output: unmodified
"baz", //output: unmodified
"#12", "#13", "#14" //duplicate sequences, output: #12:#14
};
//this is what the function should output when fed `ids`:
Console.WriteLine("foo,#59:#62,#12:#14,#17:#20,#bar,#25,#26,#38,#39,baz,#12:#14");
Console.WriteLine(Compress(ids));
Console.Read();
}
static string Compress(IEnumerable<string> IDs)
{
var result = new List<string>();
var ignore = new HashSet<string>();
foreach (var item in IDs)
{
if (ignore.Contains(item)) continue;
var id = item;
if (id.StartsWith("#"))
{
int fromInt;
if (int.TryParse(id.Substring(1), out fromInt))
{
var less1 = $"#{fromInt - 1}";
var plus1 = $"#{fromInt + 1}";
var hasPlus1 = IDs.Contains(plus1);
if (IDs.Contains(less1) && hasPlus1) continue;
var plus2 = $"#{fromInt + 2}";
if (hasPlus1 && IDs.Contains(plus2))
{
ignore.Add(plus1);
ignore.Add(plus2);
var toInt = fromInt + 2;
while (IDs.Contains($"#{toInt + 1}"))
{
toInt += 1;
ignore.Add($"#{toInt}");
}
id = $"#{fromInt}:#{toInt}";
}
}
}
result.Add(id);
}
return string.Join(",", result);
}
}
有人能告诉我怎样才能提高效率吗?
这里有一种方法(下面代码中的注释):
static string Compress(IEnumerable<string> ids)
{
var list = new LinkedList<string>(ids);
// store the numbers and their linked-list nodes
var dict = new Dictionary<LinkedListNode<string>, int>();
var current = list.First;
int num;
while(current != null)
{
var value = current.Value;
if(value.StartsWith("#") && int.TryParse(value.TrimStart('#'), out num))
dict[current] = num;
current = current.Next;
}
// if found more than 2 numbers, compress to a range
if(dict.Count > 2)
{
var sorted = dict.OrderBy(x => x.Value).ToArray();
var ranges = new List<Range>();
int min, max;
for(var i = 0; i < sorted.Length;)
{
var rangeNodes = new List<LinkedListNode<string>>();
min = max = sorted[i].Value;
// record nodes, until we can't find a continuing number
// (Note: this loop operates on the same counter as the outer
// for-loop, so we only iterate through the sorted list once)
for(; i < sorted.Length && sorted[i].Value - max <= 1; i++)
{
rangeNodes.Add(sorted[i].Key);
max = sorted[i].Value;
}
// if more than 2 numbers, replace the first string, and remove the others
if(rangeNodes.Count > 2)
{
rangeNodes[0].Value = string.Format("#{0}:#{1}", min, max);
for(int n = 1; n < rangeNodes.Count; n++)
list.Remove(rangeNodes[n]);
}
}
}
// create a joined string
return string.Join(",",list);
}
我最终得到了一个两遍算法。主要挑战是选择支持快速密钥查找和重复密钥的正确数据结构。Dictionary<int, List<int>
看起来太占用空间了,所以我决定使用一个类似链表的结构,由一个保持第一个关键位置的Dictionary<int, int>
和一个带有链接和处理信息的单独数组组成。
第一遍准备处理结构,第二遍发出结果。由于每个元素只被解析一次(包括使用具有O(1)时间复杂度的字典查找的范围检查),因此算法IMO的时间复杂度应该是O(N)。
除此之外,它与您的非常相似,因此可能被视为优化的实现。
enum EntryType { Single, Range, Unknown }
struct Entry
{
public string Value;
public int Number;
public int Next; // position of the next value with the same number
public EntryType Type;
}
static string Compress(IEnumerable<string> input)
{
var entryList = input.Select(value => new Entry { Value = value }).ToArray();
var numberQueue = new Dictionary<int, int>(entryList.Length); // Key=number, Value=position
for (int pos = entryList.Length - 1; pos >= 0; pos--)
{
var value = entryList[pos].Value;
int number;
if (value.Length > 1 && value[0] == '#' && int.TryParse(value.Substring(1), out number))
{
int nextPos;
if (!numberQueue.TryGetValue(number, out nextPos)) nextPos = -1;
entryList[pos].Number = number;
entryList[pos].Type = EntryType.Unknown;
entryList[pos].Next = nextPos;
numberQueue[number] = pos;
}
}
var output = new StringBuilder();
for (int pos = 0; pos < entryList.Length; pos++)
{
var entryType = entryList[pos].Type;
if (entryType == EntryType.Range) continue; // already processed
var number = entryList[pos].Number;
int startPos = pos, endPos = pos, prevCount = 0, nextCount = 0;
if (entryType == EntryType.Unknown)
{
for (int prevPos; numberQueue.TryGetValue(number - prevCount - 1, out prevPos) && prevPos >= 0; startPos = prevPos, prevCount++) { }
for (int nextPos; numberQueue.TryGetValue(number + nextCount + 1, out nextPos) && nextPos >= 0; endPos = nextPos, nextCount++) { }
entryType = prevCount + nextCount >= 2 ? EntryType.Range : EntryType.Single;
for (int offset = -prevCount; offset <= nextCount; offset++)
{
var nextNumber = number + offset;
int nextPos = numberQueue[nextNumber];
entryList[nextPos].Type = entryType;
numberQueue[nextNumber] = entryList[nextPos].Next;
}
}
if (output.Length > 0) output.Append(',');
if (entryType == EntryType.Single)
output.Append(entryList[pos].Value);
else
output.Append(entryList[startPos].Value).Append(':').Append(entryList[endPos].Value);
}
return output.ToString();
}