1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
require_relative "flatten_enumerable"
class Chef
module ChefFS
class Parallelizer
class ParallelEnumerable
include Enumerable
# options:
# :ordered [true|false] - whether the output should stay in the same order
# as the input (even though it may not actually be processed in that
# order). Default: true
# :stop_on_exception [true|false] - if true, when an exception occurs in either
# input or output, we wait for any outstanding processing to complete,
# but will not process any new inputs. Default: false
# :main_thread_processing [true|false] - whether the main thread pulling
# on each() is allowed to process inputs. Default: true
# NOTE: If you set this to false, parallelizer.kill will stop each()
# in its tracks, so you need to know for sure that won't happen.
def initialize(parent_task_queue, input_enumerable, options = {}, &block)
@parent_task_queue = parent_task_queue
@input_enumerable = input_enumerable
@options = options
@block = block
@unconsumed_input = Queue.new
@in_process = {}
@unconsumed_output = Queue.new
end
attr_reader :parent_task_queue
attr_reader :input_enumerable
attr_reader :options
attr_reader :block
def each
each_with_input do |output, index, input, type|
yield output
end
end
def each_with_index
each_with_input do |output, index, input|
yield output, index
end
end
def each_with_input
exception = nil
each_with_exceptions do |output, index, input, type|
if type == :exception
if @options[:ordered] == false
exception ||= output
else
raise output
end
else
yield output, index, input
end
end
raise exception if exception
end
def each_with_exceptions(&block)
if @options[:ordered] == false
each_with_exceptions_unordered(&block)
else
each_with_exceptions_ordered(&block)
end
end
def wait
exception = nil
each_with_exceptions_unordered do |output, index, input, type|
exception ||= output if type == :exception
end
raise exception if exception
end
# Enumerable methods
def restricted_copy(enumerable)
ParallelEnumerable.new(@parent_task_queue, enumerable, @options, &@block)
end
alias :original_count :count
def count(*args, &block)
if args.size == 0 && block.nil?
@input_enumerable.count
else
original_count(*args, &block)
end
end
def first(n = nil)
if n
restricted_copy(@input_enumerable.first(n)).to_a
else
first(1)[0]
end
end
def drop(n)
restricted_copy(@input_enumerable.drop(n)).to_a
end
def flatten(levels = nil)
FlattenEnumerable.new(self, levels)
end
def take(n)
restricted_copy(@input_enumerable.take(n)).to_a
end
if Enumerable.method_defined?(:lazy)
class RestrictedLazy
def initialize(parallel_enumerable, actual_lazy)
@parallel_enumerable = parallel_enumerable
@actual_lazy = actual_lazy
end
def drop(*args, &block)
input = @parallel_enumerable.input_enumerable.lazy.drop(*args, &block)
@parallel_enumerable.restricted_copy(input)
end
def take(*args, &block)
input = @parallel_enumerable.input_enumerable.lazy.take(*args, &block)
@parallel_enumerable.restricted_copy(input)
end
def method_missing(method, *args, &block)
@actual_lazy.send(:method, *args, &block)
end
end
alias :original_lazy :lazy
def lazy
RestrictedLazy.new(self, original_lazy)
end
end
private
def each_with_exceptions_unordered
if @each_running
raise "each() called on parallel enumerable twice simultaneously! Bad mojo"
end
@each_running = true
begin
# Grab all the inputs, yielding any responses during enumeration
# in case the enumeration itself takes time
begin
@input_enumerable.each_with_index do |input, index|
@unconsumed_input.push([ input, index ])
@parent_task_queue.push(method(:process_one))
stop_processing_input = false
until @unconsumed_output.empty?
output, index, input, type = @unconsumed_output.pop
yield output, index, input, type
if type == :exception && @options[:stop_on_exception]
stop_processing_input = true
break
end
end
if stop_processing_input
break
end
end
rescue
# We still want to wait for the rest of the outputs to process
@unconsumed_output.push([$!, nil, nil, :exception])
if @options[:stop_on_exception]
@unconsumed_input.clear
end
end
until finished?
# yield thread to others (for 1.8.7)
if @unconsumed_output.empty?
sleep(0.01)
end
yield @unconsumed_output.pop until @unconsumed_output.empty?
# If no one is working on our tasks and we're allowed to
# work on them in the main thread, process an input to
# move things forward.
if @in_process.size == 0 && !(@options[:main_thread_processing] == false)
process_one
end
end
rescue
# If we exited early, perhaps due to any? finding a result, we want
# to make sure and throw away any extra results (gracefully) so that
# the next enumerator can start over.
if !finished?
stop
end
raise
ensure
@each_running = false
end
end
def each_with_exceptions_ordered
next_to_yield = 0
unconsumed = {}
each_with_exceptions_unordered do |output, index, input, type|
unconsumed[index] = [ output, input, type ]
while unconsumed[next_to_yield]
input_output = unconsumed.delete(next_to_yield)
yield input_output[0], next_to_yield, input_output[1], input_output[2]
next_to_yield += 1
end
end
input_exception = unconsumed.delete(nil)
if input_exception
yield input_exception[0], next_to_yield, input_exception[1], input_exception[2]
end
end
def stop
@unconsumed_input.clear
sleep(0.05) while @in_process.size > 0
@unconsumed_output.clear
end
#
# This is thread safe only if called from the main thread pulling on each().
# The order of these checks is important, as well, to be thread safe.
# 1. If @unconsumed_input.empty? is true, then we will never have any more
# work legitimately picked up.
# 2. If @in_process == 0, then there is no work in process, and because ofwhen unconsumed_input is empty, it will never go back up, because
# this is called after the input enumerator is finished. Note that switching #2 and #1
# could cause a race, because in_process is incremented *before* consuming input.
# 3. If @unconsumed_output.empty? is true, then we are done with outputs.
# Thus, 1+2 means no more output will ever show up, and 3 means we've passed all
# existing outputs to the user.
#
def finished?
@unconsumed_input.empty? && @in_process.size == 0 && @unconsumed_output.empty?
end
def process_one
@in_process[Thread.current] = true
begin
begin
input, index = @unconsumed_input.pop(true)
process_input(input, index)
rescue ThreadError
end
ensure
@in_process.delete(Thread.current)
end
end
def process_input(input, index)
begin
output = @block.call(input)
@unconsumed_output.push([ output, index, input, :result ])
rescue StandardError, ScriptError
if @options[:stop_on_exception]
@unconsumed_input.clear
end
@unconsumed_output.push([ $!, index, input, :exception ])
end
index
end
end
end
end
end
|