通过多个线程同时处理数据
本文关键字:处理 数据 线程 | 更新日期: 2023-09-27 18:36:55
我们有一个应用程序,它定期接收彩信,应该回复它们。
我们目前使用单个线程执行此操作,首先接收消息,然后逐个处理它们。这可以完成工作,但速度很慢。
因此,我们现在正在考虑执行相同的过程,但必须使用多个线程。
有什么简单的方法可以允许并行处理传入的记录,同时避免两个线程错误地处理同一记录?
有什么简单的方法可以允许并行处理传入的记录,同时避免两个线程错误地处理同一记录?
是的,这实际上并不太难,你想做的被称为"生产者-消费者模式"
如果你的消息接收器一次只能处理一个线程,但你的消息"处理器"可以同时处理多条消息,你只需要使用BlockingCollection来存储需要处理的工作
public sealed class MessageProcessor : IDisposable
{
public MessageProcessor()
: this(-1)
{
}
public MessageProcessor(int maxThreadsForProcessing)
{
_maxThreadsForProcessing = maxThreadsForProcessing;
_messages = new BlockingCollection<Message>();
_cts = new CancellationTokenSource();
_messageProcessorThread = new Thread(ProcessMessages);
_messageProcessorThread.IsBackground = true;
_messageProcessorThread.Name = "Message Processor Thread";
_messageProcessorThread.Start();
}
public int MaxThreadsForProcessing
{
get { return _maxThreadsForProcessing; }
}
private readonly BlockingCollection<Message> _messages;
private readonly CancellationTokenSource _cts;
private readonly Thread _messageProcessorThread;
private bool _disposed = false;
private readonly int _maxThreadsForProcessing;
/// <summary>
/// Add a new message to be queued up and processed in the background.
/// </summary>
public void ReceiveMessage(Message message)
{
_messages.Add(message);
}
/// <summary>
/// Signals the system to stop processing messages.
/// </summary>
/// <param name="finishQueue">Should the queue of messages waiting to be processed be allowed to finish</param>
public void Stop(bool finishQueue)
{
_messages.CompleteAdding();
if(!finishQueue)
_cts.Cancel();
//Wait for the message processor thread to finish it's work.
_messageProcessorThread.Join();
}
/// <summary>
/// The background thread that processes messages in the system
/// </summary>
private void ProcessMessages()
{
try
{
Parallel.ForEach(_messages.GetConsumingEnumerable(),
new ParallelOptions()
{
CancellationToken = _cts.Token,
MaxDegreeOfParallelism = MaxThreadsForProcessing
},
ProcessMessage);
}
catch (OperationCanceledException)
{
//Don't care that it happened, just don't want it to bubble up as a unhandeled exception.
}
}
private void ProcessMessage(Message message, ParallelLoopState loopState)
{
//Here be dragons! (or your code to process a message, your choice :-))
//Use if(_cts.Token.IsCancellationRequested || loopState.ShouldExitCurrentIteration) to test if
// we should quit out of the function early for a graceful shutdown.
}
public void Dispose()
{
if(!_disposed)
{
if(_cts != null && _messages != null && _messageProcessorThread != null)
Stop(true); //This line will block till all queued messages have been processed, if you want it to be quicker you need to call `Stop(false)` before you dispose the object.
if(_cts != null)
_cts.Dispose();
if(_messages != null)
_messages.Dispose();
GC.SuppressFinalize(this);
_disposed = true;
}
}
~MessageProcessor()
{
//Nothing to do, just making FXCop happy.
}
}
我强烈建议您阅读免费书籍《并行编程模式》,它对此进行了一些详细介绍。有一整节详细解释了生产者-消费者模型。
更新:GetConsumingEnumerable()
和Parallel.ForEach(
存在一些性能问题,而是使用库ParallelExtensionsExtras
,它是新的扩展方法GetConsumingPartitioner()
public static Partitioner<T> GetConsumingPartitioner<T>(
this BlockingCollection<T> collection)
{
return new BlockingCollectionPartitioner<T>(collection);
}
private class BlockingCollectionPartitioner<T> : Partitioner<T>
{
private BlockingCollection<T> _collection;
internal BlockingCollectionPartitioner(
BlockingCollection<T> collection)
{
if (collection == null)
throw new ArgumentNullException("collection");
_collection = collection;
}
public override bool SupportsDynamicPartitions {
get { return true; }
}
public override IList<IEnumerator<T>> GetPartitions(
int partitionCount)
{
if (partitionCount < 1)
throw new ArgumentOutOfRangeException("partitionCount");
var dynamicPartitioner = GetDynamicPartitions();
return Enumerable.Range(0, partitionCount).Select(_ =>
dynamicPartitioner.GetEnumerator()).ToArray();
}
public override IEnumerable<T> GetDynamicPartitions()
{
return _collection.GetConsumingEnumerable();
}
}