Tweak the 'load balancing' algorithm
In order to reduce contention we randomly skipped jobs, but this
caused many jobs to end up stopping early. Now instead we keep on
going until we time out (also increased the chance of doing work).
Patch by: Sverre Rabbelier
--- a/app/soc/cron/job.py Sun Apr 19 17:42:27 2009 +0000
+++ b/app/soc/cron/job.py Sun Apr 19 17:42:44 2009 +0000
@@ -185,5 +185,13 @@
db.run_in_transaction(self.failJob, job_key)
return self.ERRORED
+ def iterate(self, jobs, retry_jobs):
+ """Trivial iterator that iterates over jobs then retry_jobs
+ """
+
+ for job in jobs:
+ yield job
+ while retry_jobs:
+ yield retry_jobs[0]
handler = Handler()
--- a/app/soc/views/models/cron.py Sun Apr 19 17:42:27 2009 +0000
+++ b/app/soc/views/models/cron.py Sun Apr 19 17:42:44 2009 +0000
@@ -92,15 +92,23 @@
queryGen = lambda: job_logic.getQueryForFields(filter=filter)
jobs = job_logic.entityIterator(queryGen, batchSize=10)
- for job in jobs:
- if random.randint(0, 5) > 0:
+ good = True
+ retry_jobs = []
+
+ for job in handler.iterate(jobs, retry_jobs):
+ if random.randint(0, 3) > 0:
+ retry_jobs.append(job)
continue
+
job_key = job.key().id()
- good = handler.handle(job_key)
+ status = handler.handle(job_key)
- if not good:
+ if status is handler.OUT_OF_TIME:
break
+ if status is handler.ERRORED:
+ retry_jobs.append(job)
+
jobs_completed += 1
response = 'Completed %d jobs in %d priority groups.' % (