将结构化文本转换为业务实体的更好方法

本文关键字:实体 更好 方法 业务 结构化 文本 转换 | 更新日期: 2023-09-27 18:15:26

我试图找到一个更好的解决方案,将纯文本(但每个字段都有预定义的长度)转换为业务实体。例如,输入文本"Testuser new york 10018",前11个字符为用户名,后12个字符为城市,后5个字符为邮政编码。输入文本可以很长,比如1000个字符,表示实体中的多个属性

感谢您的帮助。

I tried following approach

  1. 已定义的xml结构,可反序列化为业务实体

  2. 使用xslt导航到每个节点,并通过在输入文本上使用子字符串函数来填充xml元素值。

  3. 一旦xml被填充,将xml反序列化为实体

但我认为上述方法可能无法扩展与多个xslt的负载,以将不同的输入转换为相应的xml

将结构化文本转换为业务实体的更好方法

System.Text.RegularExpressions名称空间中使用正则表达式可能是一个很好的和优雅的方法,所以像这样:

static Regex inputParser = new Regex("(.{11})(.{12})(.{5})", RegexOptions.Compiled");
foreach(Match m in inputParser.Matches(yourInput)) {
    BusinessEntity e = new BusinessEntity();
    e.Username = m.Groups(1).Value.TrimEnd(); // Remove spaces from the end; I take it that's what they'll be padded with
    e.City = m.Groups(2).Value.TrimEnd();
    e.ZipCode = m.Groups(3).Value;
    myListOfBusinessEntities.Add(e);
}

如果你面临一个单一的情况,你可以简单地写一个简单的类,用一个方法接收一个文本行并返回一个新的实体。

如果你用空格填充你的行,有一个固定长度的行,System.Text.Encoding类和GetString方法的二进制读取器可以产生更快的解决方案

根据问题的细化,我推断您对不同的输入有多种不同的格式。这里有一个IFormatter的实现,它可以帮你完成大部分的工作。请注意,这可能会以几种不同的方式被破坏,并且没有任何保证:

void Test()
{
    var serializer = new FixedWidthSerializer<MyClass>();
    var ms = new MemoryStream();
    serializer.Serialize(ms, new MyClass { Age = 30, FirstName = "John", LastName = "Doe"});
    ms.Position = 0;
    var newMyClass = (MyClass)serializer.Deserialize(ms);
}
[Serializable]
private class MyClass
{
    public String FirstName { get; set; }
    public String LastName;
    public Int32 Age { get; set; }
}
public class FixedWidthSerializer<T> : IFormatter
{
    private readonly FixedWidthFieldDefinition[] _fieldDefinition;
    public FixedWidthSerializer()
        : 
        this(FormatterServices.GetSerializableMembers(typeof(T)).Select(sm=>new FixedWidthFieldDefinition(sm.Name, 100)).ToArray())
    { }
    public FixedWidthSerializer(FixedWidthFieldDefinition[] fieldDefinition)
    {
        if (fieldDefinition == null) throw new ArgumentNullException("fieldDefinition");
        _fieldDefinition = fieldDefinition;
        Context = new StreamingContext(StreamingContextStates.All);            
    }
    public class FixedWidthFieldDefinition
    {
        public String FieldName { get; protected set; }
        public Int32 CharLength { get; protected set; }
        public FixedWidthFieldDefinition(String fieldName, Int32 charLength)
        {
            FieldName = fieldName;
            CharLength = charLength;
        }
    }
    public object Deserialize(Stream serializationStream)
    {
        var streamReader = new StreamReader(serializationStream);
        var textLine = streamReader.ReadLine();
        if (textLine == null)
            throw new SerializationException("Ran out of text!");
        var obj = FormatterServices.GetUninitializedObject(typeof (T));
        var memberDictionary = FormatterServices.GetSerializableMembers(obj.GetType(), Context).ToDictionary(mi => mi.Name);
        var offset = 0;
        foreach (var fieldDef in _fieldDefinition)
        {
            if (offset + fieldDef.CharLength > textLine.Length)
                throw new SerializationException("Line was too short!");
            // Read the current field and increase the offset
            var fieldStringValue = textLine.Substring(offset, fieldDef.CharLength);
            offset += fieldDef.CharLength;
            MemberInfo memberInfo;
            if (!memberDictionary.TryGetValue(fieldDef.FieldName, out memberInfo))
                throw new SerializationException("You asked for the member '" + fieldDef.FieldName + "', but it doesn't exist on type '" + typeof (T) + "'");
            var memberAsField = memberInfo as FieldInfo;
            if (memberAsField != null)
                memberAsField.SetValue(obj, Convert.ChangeType(fieldStringValue.TrimEnd(), memberAsField.FieldType));
            else
                throw new SerializationException("I don't know what to make of the property '" + fieldDef.FieldName + "'");
        }
        return obj;
    }
    public void Serialize(Stream serializationStream, object graph)
    {
        var serializableMembers = FormatterServices.GetSerializableMembers(graph.GetType());
        var membersToSerialize = _fieldDefinition.Select(fd => serializableMembers.First(sm => sm.Name == fd.FieldName)).ToArray();
        var objectData = FormatterServices.GetObjectData(graph, membersToSerialize);
        var sb = new StringBuilder(_fieldDefinition.Sum(fd => fd.CharLength));
        for (var i = 0; i < _fieldDefinition.Length; i++)
            sb.Append(((String) Convert.ChangeType(objectData[i], typeof (String))).PadRight(_fieldDefinition[i].CharLength), 0, _fieldDefinition[i].CharLength);
        var sw = new StreamWriter(serializationStream);
        sw.WriteLine(sb.ToString());
        sw.Flush();
    }
    public ISurrogateSelector SurrogateSelector { get; set; }
    public SerializationBinder Binder { get; set; }
    public StreamingContext Context { get; set; }
}