Json文件中存在重复条目.C#
本文关键字:文件 存在 Json | 更新日期: 2023-09-27 17:58:18
我正在创建一个控制台应用程序,它可以读取bbc提要,然后它必须创建带有新闻对象的json文件。它每小时按小时运行。我的问题是它复制了父对象,但我不明白为什么。奇怪的是,如果你在一小时内运行它,它就会工作,但如果你在该小时前5分钟运行它,就会创建这个重复的父元素。
public static void Main(string[] args)
{
// get the starting time of app.
DateTime startingTime = DateTime.Now;
int minute = 1;
int hoursRun = 0;
bool folderCreated = false;
int n = startingTime.AddHours(hoursRun).Hour;
//this will be the folder path for feeds.
string feedsFolderPath = Environment.GetFolderPath(
System.Environment.SpecialFolder.Desktop) + "''feeds";
// uri for feeds.
string bbcURI = "http://feeds.bbci.co.uk/news/uk/rss.xml";
while (true)
{
// check the hour and if it is more than 1 minutes past the hour wait for the next hour.
if (DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour && DateTime.Now.Minute < minute)
{
//get feeds
News bbcNewsFeed = ProcessFeedHelper.GetFeeds(bbcURI);
// if this is the first run go ahead and create a json file.
if (hoursRun == 0)
{
if (!folderCreated)
{
ProcessFeedHelper.CreateFolder(feedsFolderPath);
folderCreated = true;
}
ProcessFeedHelper.CreateJsonFile(bbcNewsFeed, feedsFolderPath);
}
else
{
//if it is the second time then we need to check for duplicates.
ProcessFeedHelper.RemoveDuplicatesFeeds(bbcNewsFeed, feedsFolderPath);
ProcessFeedHelper.CreateJsonFile(bbcNewsFeed, feedsFolderPath);
}
// if it is the 23rd hour then we need to reset the counter and detele all files in folder.
if (hoursRun == 23)
{
hoursRun = 0;
ProcessFeedHelper.DeleteFilesInDirectory(feedsFolderPath);
}
else
{
//others increment the hoursrun.
hoursRun++;
}
bbcNewsFeed = null;
}
}
}
}
帮助类
public static News GetFeeds(String aURI)
{
News newsFeed;
//instantiate xmlreader and point to uri
using (System.Xml.XmlReader reader = System.Xml.XmlReader.Create(aURI))
{
//load the feed into SyndicationFeed Object
SyndicationFeed feed = SyndicationFeed.Load(reader);
newsFeed = new News();
List<NewsItem> newsItemList = new List<NewsItem>();
foreach (var item in feed.Items)
{
// BBC Feed parent element titles change throughout the day but I have not managed to get them all.
// Could potentially break however, the logic is correct.
// Here we create the parent element object.
if (item.Title.Text == "BBC News Channel" || item.Title.Text == "BBC News at 10")
{
newsFeed.title = item.Title.Text;
newsFeed.link = item.Id;
newsFeed.description = item.Summary.Text;
}
else
{
NewsItem newsItem = new NewsItem();
newsItem.title = item.Title.Text;
newsItem.link = item.Id;
newsItem.description = item.Summary.Text;
newsItem.publishDate = FormatDate(item.PublishDate.ToString());
//Add it to parent object.
newsItemList.Add(newsItem);
}
}
newsFeed.items = newsItemList;
//close reader once we have finished reading feed and return feed object.
reader.Close();
}
return newsFeed;
}
/// <summary>
/// Creates a folder at a specified path.
/// </summary>
/// <param name="aPath"></param>
public static void CreateFolder(string aPath)
{
System.IO.Directory.CreateDirectory(aPath);
}
/// <summary>
/// Creates a Json formatted file based on a news object passed through.
/// </summary>
/// <param name="aNews"></param>
/// <param name="aPath"></param>
public static void CreateJsonFile(News aNews, string aPath)
{
string filePath = aPath + "''" + DateTime.Now.ToString("yyyy-MM-dd-HH") + ".json";
//serialises objects in news Object and appends a file.
string jsonFile = JsonConvert.SerializeObject(aNews, Newtonsoft.Json.Formatting.Indented);
aNews = JsonConvert.DeserializeObject<News>(jsonFile);
jsonFile = JsonConvert.SerializeObject(aNews, Newtonsoft.Json.Formatting.Indented);
File.AppendAllText(@filePath, jsonFile);
Console.WriteLine(jsonFile);
}
/// <summary>
/// Removes Duplicate news articles in new feeds if they are already stored in files.
/// </summary>
/// <param name="aNews"></param>
/// <param name="aPath"></param>
public static void RemoveDuplicatesFeeds(News aNews, string aPath)
{
try
{
//get paths to all files.
string[] filesInDirectory = Directory.GetFiles(aPath);
List<News> newsInFiles = new List<News>();
News newsInFile;
// loop through files in directory.
foreach (string aFile in filesInDirectory)
{
//Read files file and deserialise the news object putting it in a news collection.
StreamReader reader = new StreamReader(aFile);
string fileContent = reader.ReadToEnd();
newsInFile = Newtonsoft.Json.JsonConvert.DeserializeObject<News>(fileContent);
newsInFiles.Add(newsInFile);
reader.Close();
}
//only go in here if there is the recent feed has news items.
if (aNews.items.Count > 0)
{
foreach (News aNewsInFile in newsInFiles)
{
// put news list into new news list so the next loop doesn't crash.
List<NewsItem> tempNewsList = new List<NewsItem>(aNews.items);
foreach (NewsItem aNewsItemFromCurrentFeed in tempNewsList)
{
//check that the current news item is not already in files saved.
var newsItemAlreadyExists = from nItems in aNewsInFile.items
where nItems.title == aNewsItemFromCurrentFeed.title
where nItems.publishDate == aNewsItemFromCurrentFeed.publishDate
where nItems.link == aNewsItemFromCurrentFeed.link
where nItems.description == aNewsItemFromCurrentFeed.description
select nItems;
// if item already stored in file then we must remove it as we don't want it.
if (newsItemAlreadyExists.First() != null)
{
if (aNews.items.Contains(aNewsItemFromCurrentFeed))
{
aNews.items.Remove(aNewsItemFromCurrentFeed);
}
}
}
}
}
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
}
/// <summary>
/// Deletes all the files in a directory(path specified in parameter).
/// </summary>
/// <param name="directoryPath"></param>
public static void DeleteFilesInDirectory(string directoryPath)
{
try
{
//create files collection and directory object.
List<FileInfo> importFiles = new List<FileInfo>();
DirectoryInfo tempDirectory = new DirectoryInfo(directoryPath);
//get all files in directory.
importFiles.AddRange(tempDirectory.GetFiles());
//if the number of files in the directory are greater than zero then delete them.
if (importFiles.Count > 0)
{
for (int i = 0; i < importFiles.Count; i++)
importFiles[i].Delete();
}
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
}
/// <summary>
/// Formats a string to ddd, mm yyyy hh:ss gmt
/// </summary>
/// <param name="aDate"></param>
/// <returns></returns>
private static String FormatDate(String aDate)
{
try
{
//split string
char[] delimiters = { ' ', ',', ':', '/' };
string[] tokens = aDate.Split(delimiters);
int year = int.Parse(tokens[2]);
int month = int.Parse(tokens[1]);
int day = int.Parse(tokens[0]);
int hh = int.Parse(tokens[3]);
int mm = int.Parse(tokens[4]);
int ss = int.Parse(tokens[5]);
//create date time object. and add gmt to end of string.
DateTime date = new DateTime(year, month, day, hh, mm, ss);
return date.ToUniversalTime().ToString("r");
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
return "";
}
}
父类
class News
{
public string title { get; set; }
public string link{ get; set; }
public string description{ get; set; }
public IList<NewsItem> items{ get; set; }
}
儿童类
class NewsItem
{
public string title { get; set; }
public string description { get; set; }
public string link { get; set; }
public string publishDate { get; set; }
}
文件示例(假设没有结束部分)
{
"title": "BBC News Channel",
"link": "http://www.bbc.co.uk/news/10318089",
"description": "Britain's most-watched news channel, delivering breaking news and analysis all day, every day.",
"items": [
{
"title": "Dover ferry port chaos leads to 14-hour traffic jams",
"description": "Delays at the Port of Dover have caused up to 14-hour tailbacks on the A20 /M20 with Kent Police warning disruption could last for another two days.",
"link": "http://www.bbc.co.uk/news/uk-england-kent-36873632",
"publishDate": "Sat, 23 Jul 2016 19:38:36 GMT"
}, ]
} {
"title": "BBC News Channel",
"link": "http://www.bbc.co.uk/news/10318089",
"description": "Britain's most-watched news channel, delivering breaking news and analysis all day, every day.",
"items": []
}
我认为问题可能是这里的竞赛条件:
if (DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour && DateTime.Now.Minute < minute)
假设你在8:59开始这个程序,正如我在上面的评论中指出的,它将小时设为8,分钟设为0。你可能认为这在23个小时左右不会发生,但是。。。
假设在8:59:59.999,此条件被检查为DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour
并返回true,因为小时当前为8。因此执行继续检查下一个条件:DateTime.Now.Minute < minute
。时间已经过去了,所以检查条件时已经是9点了。因此,这两个条件都成立,并且代码被执行。(创建一个名为2016-07-23-09.json
的文件。)
现在hoursRun
递增,所以现在是9。
循环的下一次迭代,时间大约是9:00:05。这两个条件都为true(小时为9,分钟为0),因此代码将再次运行,并附加到同一个文件(2016-07-23-09.json
)。
如果我的直觉是正确的,那么可能最小的解决方案就是这样做,这可以确保你检查的是同一时间的小时和分钟成分:
while (true)
{
var now = DateTime.Now;
if (now.Hour == startingTime.AddHours(hoursRun).Hour && now.Minute < minute)
{
我还建议在while
循环中放入sleep语句。。。在这个繁忙的循环中,您可能消耗了大量的CPU。
编辑
哦,还有,你可能一开始就不想等23个小时你可以在任何地方使用+1
作为一个简单的修复方法(尽管这意味着如果你在8:00运行程序,它将等到9:00才能写入第一个文件)。
编辑2
如果你不在乎"按小时运行",这可能是一种更简单的构建循环的方法:
DateTime lastRun = DateTime.MinValue;
while (true)
{
// sleep for 10 minutes at a time until an hour has passed
while ((DateTime.UtcNow - lastRun) < TimeSpan.FromHours(1))
{
Thread.Sleep(TimeSpan.FromMinutes(10));
}
// do work in here
// remember the last time we did work
lastRun = DateTime.UtcNow;
}