通过多个线程同时处理数据

本文关键字:处理 数据 线程 | 更新日期: 2023-09-27 18:36:55

我们有一个应用程序,它定期接收彩信,应该回复它们。

我们目前使用单个线程执行此操作,首先接收消息,然后逐个处理它们。这可以完成工作,但速度很慢。

因此,我们现在正在考虑执行相同的过程,但必须使用多个线程。

有什么简单的方法可以允许并行处理传入的记录,同时避免两个线程错误地处理同一记录?

通过多个线程同时处理数据

有什么简单的方法可以允许并行处理传入的记录,同时避免两个线程错误地处理同一记录?

是的,这实际上并不太难,你想做的被称为"生产者-消费者模式"

如果你的消息接收器一次只能处理一个线程,但你的消息"处理器"可以同时处理多条消息,你只需要使用BlockingCollection来存储需要处理的工作

public sealed class MessageProcessor : IDisposable
{
    public MessageProcessor() 
        : this(-1)
    {   
    }
    public MessageProcessor(int maxThreadsForProcessing)
    {
        _maxThreadsForProcessing = maxThreadsForProcessing;
        _messages = new BlockingCollection<Message>();
        _cts = new CancellationTokenSource();
        _messageProcessorThread = new Thread(ProcessMessages);
        _messageProcessorThread.IsBackground = true;
        _messageProcessorThread.Name = "Message Processor Thread";
        _messageProcessorThread.Start();
    }
    public int MaxThreadsForProcessing
    {
        get { return _maxThreadsForProcessing; }
    }
    private readonly BlockingCollection<Message> _messages;
    private readonly CancellationTokenSource _cts;
    private readonly Thread _messageProcessorThread;
    private bool _disposed = false;
    private readonly int _maxThreadsForProcessing;

    /// <summary>
    /// Add a new message to be queued up and processed in the background.
    /// </summary>
    public void ReceiveMessage(Message message)
    {
       _messages.Add(message);
    }
    /// <summary>
    /// Signals the system to stop processing messages.
    /// </summary>
    /// <param name="finishQueue">Should the queue of messages waiting to be processed be allowed to finish</param>
    public void Stop(bool finishQueue)
    {
        _messages.CompleteAdding();
        if(!finishQueue)
            _cts.Cancel();
        //Wait for the message processor thread to finish it's work.
        _messageProcessorThread.Join();
    }
    /// <summary>
    /// The background thread that processes messages in the system
    /// </summary>
    private void ProcessMessages()
    {
        try
        {
            Parallel.ForEach(_messages.GetConsumingEnumerable(),
                         new ParallelOptions()
                         {
                             CancellationToken = _cts.Token,
                             MaxDegreeOfParallelism = MaxThreadsForProcessing
                         },
                         ProcessMessage);
        }
        catch (OperationCanceledException)
        {
            //Don't care that it happened, just don't want it to bubble up as a unhandeled exception.
        }
    }
    private void ProcessMessage(Message message, ParallelLoopState loopState)
    {
        //Here be dragons! (or your code to process a message, your choice :-))
        //Use if(_cts.Token.IsCancellationRequested || loopState.ShouldExitCurrentIteration) to test if 
        // we should quit out of the function early for a graceful shutdown.
    }
    public void Dispose()
    {
        if(!_disposed)
        {
            if(_cts != null && _messages != null && _messageProcessorThread != null)
                Stop(true); //This line will block till all queued messages have been processed, if you want it to be quicker you need to call `Stop(false)` before you dispose the object.
            if(_cts != null)
                _cts.Dispose();
            if(_messages != null)
                _messages.Dispose();
            GC.SuppressFinalize(this);
           _disposed = true;
        }
    }
    ~MessageProcessor()
    {
        //Nothing to do, just making FXCop happy.
    }
}

我强烈建议您阅读免费书籍《并行编程模式》,它对此进行了一些详细介绍。有一整节详细解释了生产者-消费者模型。


更新:GetConsumingEnumerable()Parallel.ForEach(存在一些性能问题,而是使用库ParallelExtensionsExtras,它是新的扩展方法GetConsumingPartitioner()

public static Partitioner<T> GetConsumingPartitioner<T>(
    this BlockingCollection<T> collection)
{
    return new BlockingCollectionPartitioner<T>(collection);
}
private class BlockingCollectionPartitioner<T> : Partitioner<T>
{
    private BlockingCollection<T> _collection;
    internal BlockingCollectionPartitioner(
        BlockingCollection<T> collection)
    {
        if (collection == null)
            throw new ArgumentNullException("collection");
        _collection = collection;
    }
    public override bool SupportsDynamicPartitions {
        get { return true; }
    }
    public override IList<IEnumerator<T>> GetPartitions(
        int partitionCount)
    {
        if (partitionCount < 1)
            throw new ArgumentOutOfRangeException("partitionCount");
        var dynamicPartitioner = GetDynamicPartitions();
        return Enumerable.Range(0, partitionCount).Select(_ =>
            dynamicPartitioner.GetEnumerator()).ToArray();
    }
    public override IEnumerable<T> GetDynamicPartitions()
    {
        return _collection.GetConsumingEnumerable();
    }
}