解析邮件头中的 MIME 日期(C#,但与语言无关)
本文关键字:语言 日期 MIME | 更新日期: 2023-09-27 18:32:42
我正在开发一个处理电子邮件的小型本地 c# 应用程序。我正在使用 S22/Imap 下载并分隔标头。当我尝试获取电子邮件的日期时,它会向我返回字符串版本,如 MIME 标头。
对于大多数人来说,DateTime.TryParse运行良好,但对于某些日期,它失败了。以下是它们中的每一个的示例,我找不到如何在 RFC 中处理它们:
- 周五, 15 一月 2016 20:21:44 -0600 -0700
- 周一, 3 一月 2011 20:32:19 +0000 (GMT+00:00(
- 周二, 12 六月 2012 19:22:28 0200 (+ 是隐式的吗?
- 周一, 11 一月 2010 17:28:39 3600 (不在 RFC 中(
- 周四, 07 十月 2010 17:31:20 7200
- 周五, 24 七月 2009 21:13:28 +0100 (巴黎, 马德里( (我必须忽略 +0100 后面的内容吗? 我可以不丢失信息吗? 周四, 28
- 五月 2015 10:58:24 +0200 (巴黎, 马德里 (heure d'été(( (同样的问题(
谁能告诉我前五个的含义,或者指出一个可以解释另一种格式的文档?
提前感谢您的任何帮助。
我必须编写自己的类来解析这些(如果您有兴趣,我有一个与 S22 竞争的库。Imap称为MailKit,它使用我的MimeKit库来解析消息,日期等(。
我冒昧地将我的日期解析器从 MimeKit 中分离出来,以便将其作为独立粘贴到此处
using System;
using System.Text;
using System.Collections.Generic;
namespace DateParserUtils {
[Flags]
enum DateTokenFlags : byte
{
None = 0,
NonNumeric = (1 << 0),
NonWeekday = (1 << 1),
NonMonth = (1 << 2),
NonTime = (1 << 3),
NonAlphaZone = (1 << 4),
NonNumericZone = (1 << 5),
HasColon = (1 << 6),
HasSign = (1 << 7),
}
class DateToken
{
public DateTokenFlags Flags { get; private set; }
public int StartIndex { get; private set; }
public int Length { get; private set; }
public bool IsNumeric {
get { return (Flags & DateTokenFlags.NonNumeric) == 0; }
}
public bool IsWeekday {
get { return (Flags & DateTokenFlags.NonWeekday) == 0; }
}
public bool IsMonth {
get { return (Flags & DateTokenFlags.NonMonth) == 0; }
}
public bool IsTimeOfDay {
get { return (Flags & DateTokenFlags.NonTime) == 0 && (Flags & DateTokenFlags.HasColon) != 0; }
}
public bool IsNumericZone {
get { return (Flags & DateTokenFlags.NonNumericZone) == 0 && (Flags & DateTokenFlags.HasSign) != 0; }
}
public bool IsAlphaZone {
get { return (Flags & DateTokenFlags.NonAlphaZone) == 0; }
}
public bool IsTimeZone {
get { return IsNumericZone || IsAlphaZone; }
}
public DateToken (DateTokenFlags flags, int startIndex, int length)
{
StartIndex = startIndex;
Length = length;
Flags = flags;
}
}
/// <summary>
/// Utility methods to parse and format rfc822 date strings.
/// </summary>
/// <remarks>
/// Utility methods to parse and format rfc822 date strings.
/// </remarks>
public static class DateUtils
{
internal static readonly DateTime UnixEpoch = new DateTime (1970, 1, 1, 0, 0, 0, 0);
const string MonthCharacters = "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember";
const string WeekdayCharacters = "SundayMondayTuesdayWednesdayThursdayFridaySaturday";
const string AlphaZoneCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string NumericZoneCharacters = "+-0123456789";
const string NumericCharacters = "0123456789";
const string TimeCharacters = "0123456789:";
static readonly string[] Months = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static readonly string[] WeekDays = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static readonly Dictionary<string, int> timezones;
static readonly DateTokenFlags[] datetok;
static DateUtils ()
{
timezones = new Dictionary<string, int> {
{ "UT", 0 }, { "UTC", 0 }, { "GMT", 0 },
{ "EDT", -400 }, { "EST", -500 },
{ "CDT", -500 }, { "CST", -600 },
{ "MDT", -600 }, { "MST", -700 },
{ "PDT", -700 }, { "PST", -800 },
// Note: rfc822 got the signs backwards for the military
// timezones so some sending clients may mistakenly use the
// wrong values.
{ "A", 100 }, { "B", 200 }, { "C", 300 },
{ "D", 400 }, { "E", 500 }, { "F", 600 },
{ "G", 700 }, { "H", 800 }, { "I", 900 },
{ "K", 1000 }, { "L", 1100 }, { "M", 1200 },
{ "N", -100 }, { "O", -200 }, { "P", -300 },
{ "Q", -400 }, { "R", -500 }, { "S", -600 },
{ "T", -700 }, { "U", -800 }, { "V", -900 },
{ "W", -1000 }, { "X", -1100 }, { "Y", -1200 },
{ "Z", 0 },
};
datetok = new DateTokenFlags[256];
var any = new char[2];
for (int c = 0; c < 256; c++) {
if (c >= 0x41 && c <= 0x5a) {
any[1] = (char) (c + 0x20);
any[0] = (char) c;
} else if (c >= 0x61 && c <= 0x7a) {
any[0] = (char) (c - 0x20);
any[1] = (char) c;
}
if (NumericZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumericZone;
if (AlphaZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonAlphaZone;
if (WeekdayCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonWeekday;
if (NumericCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumeric;
if (MonthCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonMonth;
if (TimeCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonTime;
}
datetok[':'] |= DateTokenFlags.HasColon;
datetok['+'] |= DateTokenFlags.HasSign;
datetok['-'] |= DateTokenFlags.HasSign;
}
static bool TryGetWeekday (DateToken token, byte[] text, out DayOfWeek weekday)
{
weekday = DayOfWeek.Sunday;
if (!token.IsWeekday || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int day = 0; day < WeekDays.Length; day++) {
if (WeekDays[day].Equals (name, StringComparison.OrdinalIgnoreCase)) {
weekday = (DayOfWeek) day;
return true;
}
}
return false;
}
static bool TryParseInt32 (byte[] text, ref int index, int endIndex, out int value)
{
int startIndex = index;
value = 0;
while (index < endIndex && text[index] >= (byte) '0' && text[index] <= (byte) '9') {
int digit = text[index] - (byte) '0';
if (value > int.MaxValue / 10) {
// integer overflow
return false;
}
if (value == int.MaxValue / 10 && digit > int.MaxValue % 10) {
// integer overflow
return false;
}
value = (value * 10) + digit;
index++;
}
return index > startIndex;
}
static bool TryGetDayOfMonth (DateToken token, byte[] text, out int day)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
day = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out day))
return false;
if (day <= 0 || day > 31)
return false;
return true;
}
static bool TryGetMonth (DateToken token, byte[] text, out int month)
{
month = 0;
if (!token.IsMonth || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int i = 0; i < Months.Length; i++) {
if (Months[i].Equals (name, StringComparison.OrdinalIgnoreCase)) {
month = i + 1;
return true;
}
}
return false;
}
static bool TryGetYear (DateToken token, byte[] text, out int year)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
year = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out year))
return false;
if (year < 100)
year += (year < 70) ? 2000 : 1900;
return year >= 1969;
}
static bool TryGetTimeOfDay (DateToken token, byte[] text, out int hour, out int minute, out int second)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
hour = minute = second = 0;
if (!token.IsTimeOfDay)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out hour) || hour > 23)
return false;
if (index >= endIndex || text[index++] != (byte) ':')
return false;
if (!TryParseInt32 (text, ref index, endIndex, out minute) || minute > 59)
return false;
// Allow just hh:mm (i.e. w/o the :ss?)
if (index >= endIndex || text[index++] != (byte) ':')
return true;
if (!TryParseInt32 (text, ref index, endIndex, out second) || second > 59)
return false;
return index == endIndex;
}
static bool TryGetTimeZone (DateToken token, byte[] text, out int tzone)
{
tzone = 0;
if (token.IsNumericZone) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
int sign;
if (text[index] == (byte) '-')
sign = -1;
else if (text[index] == (byte) '+')
sign = 1;
else
return false;
index++;
if (!TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
tzone *= sign;
} else if (token.IsAlphaZone) {
if (token.Length > 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (!timezones.TryGetValue (name, out tzone))
return false;
} else if (token.IsNumeric) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
if (!ParseUtils.TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
}
return true;
}
static bool IsWhiteSpace (byte c)
{
return c == ' ' || c == ''t';
}
static bool IsTokenDelimeter (byte c)
{
return c == (byte) '-' || c == (byte) '/' || c == (byte) ',' || IsWhiteSpace (c);
}
static bool SkipWhiteSpace (byte[] text, ref int index, int endIndex)
{
int startIndex = index;
while (index < endIndex && IsWhiteSpace (text[index]))
index++;
return index > startIndex;
}
static bool SkipComment (byte[] text, ref int index, int endIndex)
{
bool escaped = false;
int depth = 1;
index++;
while (index < endIndex && depth > 0) {
if (text[index] == (byte) '''') {
escaped = !escaped;
} else if (!escaped) {
if (text[index] == (byte) '(')
depth++;
else if (text[index] == (byte) ')')
depth--;
escaped = false;
} else {
escaped = false;
}
index++;
}
return depth == 0;
}
static bool SkipCommentsAndWhiteSpace (byte[] text, ref int index, int endIndex)
{
SkipWhiteSpace (text, ref index, endIndex);
while (index < endIndex && text[index] == (byte) '(') {
int startIndex = index;
if (!SkipComment (text, ref index, endIndex))
return false;
SkipWhiteSpace (text, ref index, endIndex);
}
return true;
}
static IEnumerable<DateToken> TokenizeDate (byte[] text, int startIndex, int length)
{
int endIndex = startIndex + length;
int index = startIndex;
DateTokenFlags mask;
int start;
while (index < endIndex) {
if (!SkipCommentsAndWhiteSpace (text, ref index, endIndex))
break;
if (index >= endIndex)
break;
// get the initial mask for this token
if ((mask = datetok[text[index]]) != DateTokenFlags.None) {
start = index++;
// find the end of this token
while (index < endIndex && !IsTokenDelimeter (text[index]))
mask |= datetok[text[index++]];
yield return new DateToken (mask, start, index - start);
}
// skip over the token delimeter
index++;
}
yield break;
}
static bool TryParseStandardDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int day, month, year, tzone;
int hour, minute, second;
DayOfWeek weekday;
//bool haveWeekday;
int n = 0;
date = new DateTimeOffset ();
// we need at least 5 tokens, 6 if we have a weekday
if (tokens.Count < 5)
return false;
// Note: the weekday is not required
if (TryGetWeekday (tokens[n], text, out weekday)) {
if (tokens.Count < 6)
return false;
//haveWeekday = true;
n++;
}
if (!TryGetDayOfMonth (tokens[n++], text, out day))
return false;
if (!TryGetMonth (tokens[n++], text, out month))
return false;
if (!TryGetYear (tokens[n++], text, out year))
return false;
if (!TryGetTimeOfDay (tokens[n++], text, out hour, out minute, out second))
return false;
if (!TryGetTimeZone (tokens[n], text, out tzone))
tzone = 0;
while (tzone < -1400)
tzone += 2400;
while (tzone > 1400)
tzone -= 2400;
int minutes = tzone % 100;
int hours = tzone / 100;
var offset = new TimeSpan (hours, minutes, 0);
try {
date = new DateTimeOffset (year, month, day, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
return false;
}
return true;
}
static bool TryParseUnknownDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int? day = null, month = null, year = null, tzone = null;
int hour = 0, minute = 0, second = 0;
bool numericMonth = false;
bool haveWeekday = false;
bool haveTime = false;
DayOfWeek weekday;
TimeSpan offset;
for (int i = 0; i < tokens.Count; i++) {
int value;
if (!haveWeekday && tokens[i].IsWeekday) {
if (TryGetWeekday (tokens[i], text, out weekday)) {
haveWeekday = true;
continue;
}
}
if ((month == null || numericMonth) && tokens[i].IsMonth) {
if (TryGetMonth (tokens[i], text, out value)) {
if (numericMonth) {
numericMonth = false;
day = month;
}
month = value;
continue;
}
}
if (!haveTime && tokens[i].IsTimeOfDay) {
if (TryGetTimeOfDay (tokens[i], text, out hour, out minute, out second)) {
haveTime = true;
continue;
}
}
if (tzone == null && tokens[i].IsTimeZone) {
if (TryGetTimeZone (tokens[i], text, out value)) {
tzone = value;
continue;
}
}
if (tokens[i].IsNumeric) {
if (tokens[i].Length == 4) {
if (year == null) {
if (TryGetYear (tokens[i], text, out value))
year = value;
} else if (tzone == null) {
if (TryGetTimeZone (tokens[i], text, out value))
tzone = value;
}
continue;
}
if (tokens[i].Length > 2)
continue;
// Note: we likely have either YYYY[-/]MM[-/]DD or MM[-/]DD[-/]YY
int endIndex = tokens[i].StartIndex + tokens[i].Length;
int index = tokens[i].StartIndex;
TryParseInt32 (text, ref index, endIndex, out value);
if (month == null && value > 0 && value <= 12) {
numericMonth = true;
month = value;
continue;
}
if (day == null && value > 0 && value <= 31) {
day = value;
continue;
}
if (year == null && value >= 69) {
year = 1900 + value;
continue;
}
}
// WTF is this??
}
if (year == null || month == null || day == null) {
date = new DateTimeOffset ();
return false;
}
if (!haveTime)
hour = minute = second = 0;
if (tzone != null) {
int minutes = tzone.Value % 100;
int hours = tzone.Value / 100;
offset = new TimeSpan (hours, minutes, 0);
} else {
offset = new TimeSpan (0);
}
try {
date = new DateTimeOffset (year.Value, month.Value, day.Value, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
date = new DateTimeOffset ();
return false;
}
return true;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the given index
/// and spanning across the specified number of bytes.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="length">The number of bytes in the input buffer to parse.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
/// a valid range in the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, int length, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
if (length < 0 || length > (buffer.Length - startIndex))
throw new ArgumentOutOfRangeException ("length");
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the specified index.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> is not within the range of the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
int length = buffer.Length - startIndex;
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified buffer.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
public static bool TryParse (byte[] buffer, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified text.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="text">The input text.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="text"/> is <c>null</c>.
/// </exception>
public static bool TryParse (string text, out DateTimeOffset date)
{
if (text == null)
throw new ArgumentNullException ("text");
var buffer = Encoding.UTF8.GetBytes (text);
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
}
}
在我的应用程序上,我将一些电子邮件保存到数据库中,因此使用 SQL 服务器,我能够使用 PARSE(( 以非常简单的方式执行此操作。
选择解析('周三,28 九月 2022 17:04:44 -0500' 作为日期时间(