- Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathbrain.rb
404 lines (313 loc) · 9.75 KB
/
brain.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
require'addressable/uri'
requireFile.expand_path('../../lib/job',__FILE__)
requireFile.expand_path('../../lib/pipeline_info',__FILE__)
requireFile.expand_path('../summary',__FILE__)
requireFile.expand_path('../post_registration_hook',__FILE__)
requireFile.expand_path('../add_ignore_sets',__FILE__)
requireFile.expand_path('../job_status_generation',__FILE__)
requireFile.expand_path('../job_options_parser',__FILE__)
requireFile.expand_path('../pipeline_options',__FILE__)
Job.send(:include,JobStatusGeneration)
classBrain
includePostRegistrationHook
includeAddIgnoreSets
includePipelineOptions
attr_reader:couchdb
attr_reader:redis
attr_reader:schemes
attr_reader:url_pattern
definitialize(schemes,redis,couchdb)
@couchdb=couchdb
@redis=redis
@schemes=schemes
@url_pattern ||= %r{(?:#{schemes.join('|')})://.+}
end
deffind_job(ident,m)
job=Job.from_ident(ident,redis)
if !job
replym,"Sorry, I don't know anything about #{ident}."
else
yieldjob
end
end
defrequest_archive(m,target,params,depth=:inf,url_file=false)
# Check only !a; allow !ao even without voice or op
ifdepth == :inf
returnunlessauthorized?(m)
# Lock !a < FILE to ops for now: it's a very niche thing.
ifurl_file
returnunlessop?(m)
end
# Allow only ops to add jobs to the queue if there are 5 or more jobs pending
ifredis.llen('pending') >= 5 && !op?(m)
replym,"Sorry, all pipelines are currently full, and only opped users can add to the queue beyond 5 pending. Please try again later."
return
end
end
uri=Addressable::URI.parse(target).normalize
if !uri.absolute? || uri.host.nil?
replym,"Sorry, I don't understand the URL you provided."
return
end
# Parse parameters. If we run into an unknown option, report it and don't
# run the job.
h=nil
begin
h=JobOptionsParser.new.parse(params)
rescueJobOptionsParser::UnknownOptionError=>e
replym,"Sorry, I can't parse that. The error: #{e.message}."
return
end
# youtube-dl is broken: https://github.com/ArchiveTeam/ArchiveBot/issues/291
ifh[:youtube_dl]
replym,'Sorry, youtube-dl is broken at the moment.'
return
end
# Recursive retrieval with youtube-dl is bad juju.
ifh[:youtube_dl] && depth == :inf
replym,'Sorry, recursive retrieval with youtube-dl is not supported at this time.'
replym,'Please consider making a list of URLs to video pages and using !ao < URL.'
return
end
ifh[:large] && (h[:pipeline] || depth == :shallow)
replym,'--large has no effect when combined with --pipeline or !ao.'
return
end
# Is the URI in our list of recognized schemes?
if !schemes.include?(uri.scheme)
replym,"Sorry, I can only handle #{schemes.join(', ')}."
return
end
job=Job.new(uri,redis)
# Is the job already known?
ifjob.exists?
replym,"Job for #{uri} already exists."
# OK, print out its status.
job.amplify
status=job.to_status
replym, *status
return
end
# Did we want a custom user-agent? If so, check that the requested
# user-agent alias is known. If it is, set it on the job; if it isn't,
# raise an error.
user_agent=nil
ifh[:user_agent_alias]
ua_alias=h[:user_agent_alias]
user_agent=couchdb.user_agent_for_alias(ua_alias)
if !user_agent
replym,%Q{Sorry, I don't know what the user agent "#{ua_alias}" is.}
return
end
end
# OK, add the job.
batch_reply(m)do
job.register(depth,m.user.nick,m.channel.name,user_agent,url_file)
urls_in='URLs in 'ifurl_file
ifdepth == :shallow
replym,"Queued #{urls_in}#{uri.to_s} for archival without recursion."
else
replym,"Queued #{urls_in}#{uri.to_s}."
end
ifuser_agent
replym,"Using user-agent #{user_agent}."
end
replym,"Use !status #{job.ident} for updates, !abort #{job.ident} to abort."
ifh[:explain]
add_note(m,job,h[:explain],false)
end
ifh[:delay]
set_delay(job,h[:delay],h[:delay],m)
end
ifh[:concurrency]
set_concurrency(job,h[:concurrency],m)
end
run_post_registration_hooks(m,job,h)
silencedo
add_ignore_sets(m,job,['global'],false)
toggle_ignores(m,job,false)
end
pipeline=h[:pipeline]
large=h[:large]
job.queue(pipeline,large)
end
end
defrequest_status_by_url(m,url)
uri=Addressable::URI.parse(url).normalize
host=uri.host
replym,"See http://archive.fart.website/archivebot/viewer/?q=#{host}"
end
defrequest_status(m,job)
replym, *job.to_status
end
defshow_pending(m)
ifredis.llen('pending') > 10
privmsg(m,"Too many pending jobs to reply to !pending, please use the dashboard instead.")
return
end
idents=redis.lrange('pending',0, -1).reverse
urls=redis.pipelineddo
idents.each{ |ident| redis.hget(ident,'url')}
end
privmsgm,"#{urls.length} pending jobs:"
idents.zip(urls).each.with_indexdo |(ident,url),i|
msg="#{i+1}. #{url} (#{ident})"
privmsg(m,msg)
end
end
defadd_note(m,job,note,need_auth=true)
# aka !explain
ifneed_auth
returnunlessauthorized?(m)
end
job.add_note(note)
replym,%Q{Added note "#{note}" to job #{job.ident}.}
end
defwhereis(m,job)
pipeline=PipelineInfo.new(job.redis).from_pipeline_id(job.pipeline_id)
ifpipeline.nickname.nil?
replym,%Q{Job #{job.ident} is not on a pipeline.}
else
replym,%Q{Job #{job.ident} is on pipeline "#{pipeline.nickname}" (#{pipeline.id}).}
end
end
definitiate_abort(m,job)
returnunlessauthorized?(m)
job.abort
replym,"Initiated abort for #{job.url}."
end
defadd_ignore_pattern(m,job,pattern)
returnunlessauthorized?(m)
job.add_ignore_pattern(pattern)
replym,"Added ignore pattern #{pattern} to job #{job.ident}."
end
deftoggle_ignores(m,job,enabled)
job.toggle_ignores(enabled)
ifenabled
replym,"Showing ignore pattern reports for job #{job.ident}."
else
replym,"Suppressing ignore pattern reports for job #{job.ident}."
end
end
defadd_ignore_sets(m,job,names,need_authorization=true)
ifneed_authorization
returnunlessauthorized?(m)
end
if !names.respond_to?(:each)
names=names.split(',').map(&:strip)
end
returnunlessnames && !names.empty?
ignore_pairs=couchdb.resolve_ignore_sets(names)
resolved=ignore_pairs.map(&:first).uniq
patterns=ignore_pairs.map(&:last)
job.add_ignore_patterns(patterns)unlesspatterns.empty?
ifresolved.length > 1
replym,%Q{Added #{patterns.length} patterns from ignore sets {#{resolved.join(', ')}} to job #{job.ident}.}
else
replym,%Q{Added #{patterns.length} patterns from ignore set #{resolved.first} to job #{job.ident}.}
end
unknown=names - resolved
if !unknown.empty?
replym,"The following sets are unknown: #{unknown.join(', ')}"
end
end
defexpire(m,job)
returnunlessauthorized?(m) && op?(m)
ifjob.ttl < 0
replym,"Job #{job.ident} does not yet have an expiry timer."
else
job.expire
replym,"Job #{job.ident} expired."
end
end
defremove_ignore_pattern(m,job,pattern)
returnunlessauthorized?(m)
job.remove_ignore_pattern(pattern)
replym,"Removed ignore pattern #{pattern} from job #{job.ident}."
end
defset_delay(job,min,max,m)
returnunlessauthorized?(m)
returnunlessdelay_ok?(min,max,m)
job.set_delay(min,max)
replym,"Inter-request delay for job #{job.ident} set to [#{min}, #{max}] ms."
end
defset_concurrency(job,level,m)
returnunlessauthorized?(m)
returnunlessconcurrency_ok?(level,m)
job.set_concurrency(level)
noun=level == 1 ? 'worker' : 'workers'
replym,"Job #{job.ident} set to use #{level}#{noun}."
end
defyahoo(job,m)
returnunlessauthorized?(m)
job.yahoo
replym,"Job #{job.ident} set to Yahoo! mode."
end
defrequest_summary(m)
s=Summary.new(redis)
s.run
replym,s
end
private
defauthorized?(m)
if !(m.channel.opped?(m.user) || m.channel.voiced?(m.user))
replym,"Sorry, only channel operators or voiced users may use that command."
returnfalse
end
returntrue
end
defop?(m)
m.channel.opped?(m.user)
end
defdelay_ok?(min,max,m)
ifmin.to_f > max.to_f
replym,'Sorry, min delay must be less than or equal to max delay.'
returnfalse
end
true
end
defconcurrency_ok?(level,m)
iflevel.to_i < 1
replym,'Sorry, concurrency level must be at least 1.'
returnfalse
end
true
end
defbatch_reply(m)
c=Thread.current
begin
c[:batch_mode]=true
c[:buf]=[]
yield
c[:batch_mode]=false
replym, *c[:buf]
ensure
# If we catch an exception, reset the batch mode flag, but don't send
# anything.
c[:buf]=false
end
end
defsilence
c=Thread.current
begin
c[:silent]=true
yield
ensure
c[:silent]=false
end
end
defreply(m, *args)
c=Thread.current
returnifc[:silent]
ifc[:batch_mode]
c[:buf] += args
else
args.each{ |msg| m.safe_reply(msg,true)}
end
end
# TODO: privmsg should probably have a batch mode; doesn't really matter for
# now, though
defprivmsg(m, *args)
args.each{ |msg| m.user.safe_send(msg)}
end
end