解析邮件头中的 MIME 日期(C#,但与语言无关)

本文关键字:语言 日期 MIME | 更新日期: 2023-09-27 18:32:42

我正在开发一个处理电子邮件的小型本地 c# 应用程序。我正在使用 S22/Imap 下载并分隔标头。当我尝试获取电子邮件的日期时,它会向我返回字符串版本,如 MIME 标头。

对于大多数人来说,DateTime.TryParse运行良好,但对于某些日期,它失败了。以下是它们中的每一个的示例,我找不到如何在 RFC 中处理它们:

  • 周五, 15 一月 2016 20:21:44 -0600 -0700
  • 周一, 3 一月 2011 20:32:19 +0000 (GMT+00:00(
  • 周二, 12 六月 2012 19:22:28 0200 (+ 是隐式的吗?
  • 周一, 11 一月 2010 17:28:39 3600 (不在 RFC 中(
  • 周四, 07 十月 2010 17:31:20 7200
  • 周五, 24 七月 2009 21:13:28 +0100 (巴黎, 马德里( (我必须忽略 +0100 后面的内容吗? 我可以不丢失信息吗?
  • 周四, 28
  • 五月 2015 10:58:24 +0200 (巴黎, 马德里 (heure d'été(( (同样的问题(

谁能告诉我前五个的含义,或者指出一个可以解释另一种格式的文档?

提前感谢您的任何帮助。

解析邮件头中的 MIME 日期(C#,但与语言无关)

我必须编写自己的类来解析这些(如果您有兴趣,我有一个与 S22 竞争的库。Imap称为MailKit,它使用我的MimeKit库来解析消息,日期等(。

我冒昧地将我的日期解析器从 MimeKit 中分离出来,以便将其作为独立粘贴到此处

using System;
using System.Text;
using System.Collections.Generic;
namespace DateParserUtils {
    [Flags]
    enum DateTokenFlags : byte
    {
        None           = 0,
        NonNumeric     = (1 << 0),
        NonWeekday     = (1 << 1),
        NonMonth       = (1 << 2),
        NonTime        = (1 << 3),
        NonAlphaZone   = (1 << 4),
        NonNumericZone = (1 << 5),
        HasColon       = (1 << 6),
        HasSign        = (1 << 7),
    }
    class DateToken
    {
        public DateTokenFlags Flags { get; private set; }
        public int StartIndex { get; private set; }
        public int Length { get; private set; }
        public bool IsNumeric {
            get { return (Flags & DateTokenFlags.NonNumeric) == 0; }
        }
        public bool IsWeekday {
            get { return (Flags & DateTokenFlags.NonWeekday) == 0; }
        }
        public bool IsMonth {
            get { return (Flags & DateTokenFlags.NonMonth) == 0; }
        }
        public bool IsTimeOfDay {
            get { return (Flags & DateTokenFlags.NonTime) == 0 && (Flags & DateTokenFlags.HasColon) != 0; }
        }
        public bool IsNumericZone {
            get { return (Flags & DateTokenFlags.NonNumericZone) == 0 && (Flags & DateTokenFlags.HasSign) != 0; }
        }
        public bool IsAlphaZone {
            get { return (Flags & DateTokenFlags.NonAlphaZone) == 0; }
        }
        public bool IsTimeZone {
            get { return IsNumericZone || IsAlphaZone; }
        }
        public DateToken (DateTokenFlags flags, int startIndex, int length)
        {
            StartIndex = startIndex;
            Length = length;
            Flags = flags;
        }
    }
    /// <summary>
    /// Utility methods to parse and format rfc822 date strings.
    /// </summary>
    /// <remarks>
    /// Utility methods to parse and format rfc822 date strings.
    /// </remarks>
    public static class DateUtils
    {
        internal static readonly DateTime UnixEpoch = new DateTime (1970, 1, 1, 0, 0, 0, 0);
        const string MonthCharacters = "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember";
        const string WeekdayCharacters = "SundayMondayTuesdayWednesdayThursdayFridaySaturday";
        const string AlphaZoneCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
        const string NumericZoneCharacters = "+-0123456789";
        const string NumericCharacters = "0123456789";
        const string TimeCharacters = "0123456789:";
        static readonly string[] Months = {
            "Jan", "Feb", "Mar", "Apr", "May", "Jun",
            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
        };
        static readonly string[] WeekDays = {
            "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
        };
        static readonly Dictionary<string, int> timezones;
        static readonly DateTokenFlags[] datetok;
        static DateUtils ()
        {
            timezones = new Dictionary<string, int> {
                { "UT",       0 }, { "UTC",      0 }, { "GMT",      0 },
                { "EDT",   -400 }, { "EST",   -500 },
                { "CDT",   -500 }, { "CST",   -600 },
                { "MDT",   -600 }, { "MST",   -700 },
                { "PDT",   -700 }, { "PST",   -800 },
                // Note: rfc822 got the signs backwards for the military
                // timezones so some sending clients may mistakenly use the
                // wrong values.
                { "A",      100 }, { "B",      200 }, { "C",      300 },
                { "D",      400 }, { "E",      500 }, { "F",      600 },
                { "G",      700 }, { "H",      800 }, { "I",      900 },
                { "K",     1000 }, { "L",     1100 }, { "M",     1200 },
                { "N",     -100 }, { "O",     -200 }, { "P",     -300 },
                { "Q",     -400 }, { "R",     -500 }, { "S",     -600 },
                { "T",     -700 }, { "U",     -800 }, { "V",     -900 },
                { "W",    -1000 }, { "X",    -1100 }, { "Y",    -1200 },
                { "Z",        0 },
            };
            datetok = new DateTokenFlags[256];
            var any = new char[2];
            for (int c = 0; c < 256; c++) {
                if (c >= 0x41 && c <= 0x5a) {
                    any[1] = (char) (c + 0x20);
                    any[0] = (char) c;
                } else if (c >= 0x61 && c <= 0x7a) {
                    any[0] = (char) (c - 0x20);
                    any[1] = (char) c;
                }
                if (NumericZoneCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonNumericZone;
                if (AlphaZoneCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonAlphaZone;
                if (WeekdayCharacters.IndexOfAny (any) == -1)
                    datetok[c] |= DateTokenFlags.NonWeekday;
                if (NumericCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonNumeric;
                if (MonthCharacters.IndexOfAny (any) == -1)
                    datetok[c] |= DateTokenFlags.NonMonth;
                if (TimeCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonTime;
            }
            datetok[':'] |= DateTokenFlags.HasColon;
            datetok['+'] |= DateTokenFlags.HasSign;
            datetok['-'] |= DateTokenFlags.HasSign;
        }
        static bool TryGetWeekday (DateToken token, byte[] text, out DayOfWeek weekday)
        {
            weekday = DayOfWeek.Sunday;
            if (!token.IsWeekday || token.Length < 3)
                return false;
            var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
            if (name.Length > 3)
                name = name.Substring (0, 3);
            for (int day = 0; day < WeekDays.Length; day++) {
                if (WeekDays[day].Equals (name, StringComparison.OrdinalIgnoreCase)) {
                    weekday = (DayOfWeek) day;
                    return true;
                }
            }
            return false;
        }
        static bool TryParseInt32 (byte[] text, ref int index, int endIndex, out int value)
        {
            int startIndex = index;
            value = 0;
            while (index < endIndex && text[index] >= (byte) '0' && text[index] <= (byte) '9') {
                int digit = text[index] - (byte) '0';
                if (value > int.MaxValue / 10) {
                    // integer overflow
                    return false;
                }
                if (value == int.MaxValue / 10 && digit > int.MaxValue % 10) {
                    // integer overflow
                    return false;
                }
                value = (value * 10) + digit;
                index++;
            }
            return index > startIndex;
        }
        static bool TryGetDayOfMonth (DateToken token, byte[] text, out int day)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;
            day = 0;
            if (!token.IsNumeric)
                return false;
            if (!TryParseInt32 (text, ref index, endIndex, out day))
                return false;
            if (day <= 0 || day > 31)
                return false;
            return true;
        }
        static bool TryGetMonth (DateToken token, byte[] text, out int month)
        {
            month = 0;
            if (!token.IsMonth || token.Length < 3)
                return false;
            var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
            if (name.Length > 3)
                name = name.Substring (0, 3);
            for (int i = 0; i < Months.Length; i++) {
                if (Months[i].Equals (name, StringComparison.OrdinalIgnoreCase)) {
                    month = i + 1;
                    return true;
                }
            }
            return false;
        }
        static bool TryGetYear (DateToken token, byte[] text, out int year)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;
            year = 0;
            if (!token.IsNumeric)
                return false;
            if (!TryParseInt32 (text, ref index, endIndex, out year))
                return false;
            if (year < 100)
                year += (year < 70) ? 2000 : 1900;
            return year >= 1969;
        }
        static bool TryGetTimeOfDay (DateToken token, byte[] text, out int hour, out int minute, out int second)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;
            hour = minute = second = 0;
            if (!token.IsTimeOfDay)
                return false;
            if (!TryParseInt32 (text, ref index, endIndex, out hour) || hour > 23)
                return false;
            if (index >= endIndex || text[index++] != (byte) ':')
                return false;
            if (!TryParseInt32 (text, ref index, endIndex, out minute) || minute > 59)
                return false;
            // Allow just hh:mm (i.e. w/o the :ss?)
            if (index >= endIndex || text[index++] != (byte) ':')
                return true;
            if (!TryParseInt32 (text, ref index, endIndex, out second) || second > 59)
                return false;
            return index == endIndex;
        }
        static bool TryGetTimeZone (DateToken token, byte[] text, out int tzone)
        {
            tzone = 0;
            if (token.IsNumericZone) {
                int endIndex = token.StartIndex + token.Length;
                int index = token.StartIndex;
                int sign;
                if (text[index] == (byte) '-')
                    sign = -1;
                else if (text[index] == (byte) '+')
                    sign = 1;
                else
                    return false;
                index++;
                if (!TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
                    return false;
                tzone *= sign;
            } else if (token.IsAlphaZone) {
                if (token.Length > 3)
                    return false;
                var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
                if (!timezones.TryGetValue (name, out tzone))
                    return false;
            } else if (token.IsNumeric) {
                int endIndex = token.StartIndex + token.Length;
                int index = token.StartIndex;
                if (!ParseUtils.TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
                    return false;
            }
            return true;
        }
        static bool IsWhiteSpace (byte c)
        {
            return c == ' ' || c == ''t';
        }
        static bool IsTokenDelimeter (byte c)
        {
            return c == (byte) '-' || c == (byte) '/' || c == (byte) ',' || IsWhiteSpace (c);
        }
        static bool SkipWhiteSpace (byte[] text, ref int index, int endIndex)
        {
            int startIndex = index;
            while (index < endIndex && IsWhiteSpace (text[index]))
                index++;
            return index > startIndex;
        }
        static bool SkipComment (byte[] text, ref int index, int endIndex)
        {
            bool escaped = false;
            int depth = 1;
            index++;
            while (index < endIndex && depth > 0) {
                if (text[index] == (byte) '''') {
                    escaped = !escaped;
                } else if (!escaped) {
                    if (text[index] == (byte) '(')
                        depth++;
                    else if (text[index] == (byte) ')')
                        depth--;
                    escaped = false;
                } else {
                    escaped = false;
                }
                index++;
            }
            return depth == 0;
        }
        static bool SkipCommentsAndWhiteSpace (byte[] text, ref int index, int endIndex)
        {
            SkipWhiteSpace (text, ref index, endIndex);
            while (index < endIndex && text[index] == (byte) '(') {
                int startIndex = index;
                if (!SkipComment (text, ref index, endIndex))
                    return false;
                SkipWhiteSpace (text, ref index, endIndex);
            }
            return true;
        }
        static IEnumerable<DateToken> TokenizeDate (byte[] text, int startIndex, int length)
        {
            int endIndex = startIndex + length;
            int index = startIndex;
            DateTokenFlags mask;
            int start;
            while (index < endIndex) {
                if (!SkipCommentsAndWhiteSpace (text, ref index, endIndex))
                    break;
                if (index >= endIndex)
                    break;
                // get the initial mask for this token
                if ((mask = datetok[text[index]]) != DateTokenFlags.None) {
                    start = index++;
                    // find the end of this token
                    while (index < endIndex && !IsTokenDelimeter (text[index]))
                        mask |= datetok[text[index++]];
                    yield return new DateToken (mask, start, index - start);
                }
                // skip over the token delimeter
                index++;
            }
            yield break;
        }
        static bool TryParseStandardDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
        {
            int day, month, year, tzone;
            int hour, minute, second;
            DayOfWeek weekday;
            //bool haveWeekday;
            int n = 0;
            date = new DateTimeOffset ();
            // we need at least 5 tokens, 6 if we have a weekday
            if (tokens.Count < 5)
                return false;
            // Note: the weekday is not required
            if (TryGetWeekday (tokens[n], text, out weekday)) {
                if (tokens.Count < 6)
                    return false;
                //haveWeekday = true;
                n++;
            }
            if (!TryGetDayOfMonth (tokens[n++], text, out day))
                return false;
            if (!TryGetMonth (tokens[n++], text, out month))
                return false;
            if (!TryGetYear (tokens[n++], text, out year))
                return false;
            if (!TryGetTimeOfDay (tokens[n++], text, out hour, out minute, out second))
                return false;
            if (!TryGetTimeZone (tokens[n], text, out tzone))
                tzone = 0;
            while (tzone < -1400)
                tzone += 2400;
            while (tzone > 1400)
                tzone -= 2400;
            int minutes = tzone % 100;
            int hours = tzone / 100;
            var offset = new TimeSpan (hours, minutes, 0);
            try {
                date = new DateTimeOffset (year, month, day, hour, minute, second, offset);
            } catch (ArgumentOutOfRangeException) {
                return false;
            }
            return true;
        }
        static bool TryParseUnknownDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
        {
            int? day = null, month = null, year = null, tzone = null;
            int hour = 0, minute = 0, second = 0;
            bool numericMonth = false;
            bool haveWeekday = false;
            bool haveTime = false;
            DayOfWeek weekday;
            TimeSpan offset;
            for (int i = 0; i < tokens.Count; i++) {
                int value;
                if (!haveWeekday && tokens[i].IsWeekday) {
                    if (TryGetWeekday (tokens[i], text, out weekday)) {
                        haveWeekday = true;
                        continue;
                    }
                }
                if ((month == null || numericMonth) && tokens[i].IsMonth) {
                    if (TryGetMonth (tokens[i], text, out value)) {
                        if (numericMonth) {
                            numericMonth = false;
                            day = month;
                        }
                        month = value;
                        continue;
                    }
                }
                if (!haveTime && tokens[i].IsTimeOfDay) {
                    if (TryGetTimeOfDay (tokens[i], text, out hour, out minute, out second)) {
                        haveTime = true;
                        continue;
                    }
                }
                if (tzone == null && tokens[i].IsTimeZone) {
                    if (TryGetTimeZone (tokens[i], text, out value)) {
                        tzone = value;
                        continue;
                    }
                }
                if (tokens[i].IsNumeric) {
                    if (tokens[i].Length == 4) {
                        if (year == null) {
                            if (TryGetYear (tokens[i], text, out value))
                                year = value;
                        } else if (tzone == null) {
                            if (TryGetTimeZone (tokens[i], text, out value))
                                tzone = value;
                        }
                        continue;
                    }
                    if (tokens[i].Length > 2)
                        continue;
                    // Note: we likely have either YYYY[-/]MM[-/]DD or MM[-/]DD[-/]YY
                    int endIndex = tokens[i].StartIndex + tokens[i].Length;
                    int index = tokens[i].StartIndex;
                    TryParseInt32 (text, ref index, endIndex, out value);
                    if (month == null && value > 0 && value <= 12) {
                        numericMonth = true;
                        month = value;
                        continue;
                    }
                    if (day == null && value > 0 && value <= 31) {
                        day = value;
                        continue;
                    }
                    if (year == null && value >= 69) {
                        year = 1900 + value;
                        continue;
                    }
                }
                // WTF is this??
            }
            if (year == null || month == null || day == null) {
                date = new DateTimeOffset ();
                return false;
            }
            if (!haveTime)
                hour = minute = second = 0;
            if (tzone != null) {
                int minutes = tzone.Value % 100;
                int hours = tzone.Value / 100;
                offset = new TimeSpan (hours, minutes, 0);
            } else {
                offset = new TimeSpan (0);
            }
            try {
                date = new DateTimeOffset (year.Value, month.Value, day.Value, hour, minute, second, offset);
            } catch (ArgumentOutOfRangeException) {
                date = new DateTimeOffset ();
                return false;
            }
            return true;
        }
        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the supplied buffer starting at the given index
        /// and spanning across the specified number of bytes.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="startIndex">The starting index of the input buffer.</param>
        /// <param name="length">The number of bytes in the input buffer to parse.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
        /// a valid range in the byte array.
        /// </exception>
        public static bool TryParse (byte[] buffer, int startIndex, int length, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");
            if (startIndex < 0 || startIndex > buffer.Length)
                throw new ArgumentOutOfRangeException ("startIndex");
            if (length < 0 || length > (buffer.Length - startIndex))
                throw new ArgumentOutOfRangeException ("length");
            var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;
            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;
            date = new DateTimeOffset ();
            return false;
        }
        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the supplied buffer starting at the specified index.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="startIndex">The starting index of the input buffer.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <paramref name="startIndex"/> is not within the range of the byte array.
        /// </exception>
        public static bool TryParse (byte[] buffer, int startIndex, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");
            if (startIndex < 0 || startIndex > buffer.Length)
                throw new ArgumentOutOfRangeException ("startIndex");
            int length = buffer.Length - startIndex;
            var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;
            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;
            date = new DateTimeOffset ();
            return false;
        }
        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the specified buffer.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        public static bool TryParse (byte[] buffer, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");
            var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;
            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;
            date = new DateTimeOffset ();
            return false;
        }
        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the specified text.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="text">The input text.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="text"/> is <c>null</c>.
        /// </exception>
        public static bool TryParse (string text, out DateTimeOffset date)
        {
            if (text == null)
                throw new ArgumentNullException ("text");
            var buffer = Encoding.UTF8.GetBytes (text);
            var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;
            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;
            date = new DateTimeOffset ();
            return false;
        }
    }
}

在我的应用程序上,我将一些电子邮件保存到数据库中,因此使用 SQL 服务器,我能够使用 PARSE(( 以非常简单的方式执行此操作。

选择解析('周三,28 九月 2022 17:04:44 -0500' 作为日期时间(