2015-11-01 17:11:06 +01:00
|
|
|
from __future__ import print_function
|
2014-02-26 21:50:36 +01:00
|
|
|
import re
|
|
|
|
import time
|
|
|
|
|
|
|
|
def timed_ddl(db, stmt):
|
2015-11-01 17:11:06 +01:00
|
|
|
print()
|
|
|
|
print(time.asctime())
|
|
|
|
print(stmt)
|
2014-02-26 21:50:36 +01:00
|
|
|
t = time.time()
|
|
|
|
db.execute(stmt)
|
|
|
|
delay = time.time() - t
|
2015-11-01 17:11:06 +01:00
|
|
|
print('Took %.2fs' % (delay,))
|
2014-02-26 21:50:36 +01:00
|
|
|
|
|
|
|
def validate(sql_thingy):
|
|
|
|
# Do basic validation that table/col name is safe.
|
|
|
|
if not re.match('^[a-z][a-z\d_]+$', sql_thingy):
|
|
|
|
raise Exception('Invalid SQL object: %s' % (sql_thingy,))
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
def do_batch_update(db, table, cols, vals, batch_size=10000, sleep=0.1):
|
2014-02-26 21:50:36 +01:00
|
|
|
validate(table)
|
2014-03-01 17:20:04 +01:00
|
|
|
for col in cols:
|
|
|
|
validate(col)
|
2014-02-26 21:50:36 +01:00
|
|
|
stmt = '''
|
|
|
|
UPDATE %s
|
2014-03-01 17:20:04 +01:00
|
|
|
SET (%s) = (%s)
|
2014-02-26 21:50:36 +01:00
|
|
|
WHERE id >= %%s AND id < %%s
|
2014-03-01 17:20:04 +01:00
|
|
|
''' % (table, ', '.join(cols), ', '.join(['%s'] * len(cols)))
|
2015-11-01 17:11:06 +01:00
|
|
|
print(stmt)
|
2014-02-26 21:50:36 +01:00
|
|
|
(min_id, max_id) = db.execute("SELECT MIN(id), MAX(id) FROM %s" % (table,))[0]
|
|
|
|
if min_id is None:
|
|
|
|
return
|
2014-03-01 17:20:04 +01:00
|
|
|
|
2015-11-01 17:11:06 +01:00
|
|
|
print("%s rows need updating" % (max_id - min_id,))
|
2014-02-26 21:50:36 +01:00
|
|
|
while min_id <= max_id:
|
|
|
|
lower = min_id
|
|
|
|
upper = min_id + batch_size
|
2015-11-01 17:11:06 +01:00
|
|
|
print('%s about to update range [%s,%s)' % (time.asctime(), lower, upper))
|
2014-02-26 21:50:36 +01:00
|
|
|
db.start_transaction()
|
2014-03-01 17:20:04 +01:00
|
|
|
params = list(vals) + [lower, upper]
|
|
|
|
db.execute(stmt, params=params)
|
2014-02-26 21:50:36 +01:00
|
|
|
db.commit_transaction()
|
|
|
|
min_id = upper
|
|
|
|
time.sleep(sleep)
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
def add_bool_columns(db, table, cols):
|
2014-02-26 21:50:36 +01:00
|
|
|
validate(table)
|
2014-03-01 17:20:04 +01:00
|
|
|
for col in cols:
|
|
|
|
validate(col)
|
2014-02-26 21:50:36 +01:00
|
|
|
coltype = 'boolean'
|
|
|
|
val = 'false'
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
stmt = ('ALTER TABLE %s ' % (table,)) \
|
|
|
|
+ ', '.join(['ADD %s %s' % (col, coltype) for col in cols])
|
2014-02-26 21:50:36 +01:00
|
|
|
timed_ddl(db, stmt)
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
stmt = ('ALTER TABLE %s ' % (table,)) \
|
|
|
|
+ ', '.join(['ALTER %s SET DEFAULT %s' % (col, val) for col in cols])
|
2014-02-26 21:50:36 +01:00
|
|
|
timed_ddl(db, stmt)
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
vals = [val] * len(cols)
|
|
|
|
do_batch_update(db, table, cols, vals)
|
2014-02-26 21:50:36 +01:00
|
|
|
|
|
|
|
stmt = 'ANALYZE %s' % (table,)
|
|
|
|
timed_ddl(db, stmt)
|
|
|
|
|
2014-03-01 17:20:04 +01:00
|
|
|
stmt = ('ALTER TABLE %s ' % (table,)) \
|
|
|
|
+ ', '.join(['ALTER %s SET NOT NULL' % (col,) for col in cols])
|
2014-02-26 21:50:36 +01:00
|
|
|
timed_ddl(db, stmt)
|
2014-03-01 18:32:42 +01:00
|
|
|
|
|
|
|
def create_index_if_nonexistant(db, table, col, index):
|
|
|
|
validate(table)
|
|
|
|
validate(col)
|
|
|
|
validate(index)
|
|
|
|
test = """SELECT relname FROM pg_class
|
|
|
|
WHERE relname = %s"""
|
|
|
|
if len(db.execute(test, params=[index])) != 0:
|
2015-11-01 17:11:06 +01:00
|
|
|
print("Not creating index '%s' because it already exists" % (index,))
|
2014-03-01 18:32:42 +01:00
|
|
|
else:
|
|
|
|
stmt = "CREATE INDEX %s ON %s (%s)" % (index, table, col)
|
|
|
|
timed_ddl(db, stmt)
|
2014-03-11 18:17:52 +01:00
|
|
|
|
|
|
|
def act_on_message_ranges(db, orm, tasks, batch_size=5000, sleep=0.5):
|
|
|
|
# tasks should be an array of (filterer, action) tuples
|
|
|
|
# where filterer is a function that returns a filtered QuerySet
|
|
|
|
# and action is a function that acts on a QuerySet
|
|
|
|
|
|
|
|
all_objects = orm['zerver.Message'].objects
|
2014-03-12 18:39:24 +01:00
|
|
|
|
|
|
|
try:
|
|
|
|
min_id = all_objects.all().order_by('id')[0].id
|
|
|
|
except IndexError:
|
2015-11-01 17:11:06 +01:00
|
|
|
print('There is no work to do')
|
2014-03-12 18:39:24 +01:00
|
|
|
return
|
|
|
|
|
2014-03-11 18:17:52 +01:00
|
|
|
max_id = all_objects.all().order_by('-id')[0].id
|
2015-11-01 17:11:06 +01:00
|
|
|
print("max_id = %d" % (max_id,))
|
2014-03-11 18:17:52 +01:00
|
|
|
overhead = int((max_id + 1 - min_id)/ batch_size * sleep / 60)
|
2015-11-01 17:11:06 +01:00
|
|
|
print("Expect this to take at least %d minutes, just due to sleeps alone." % (overhead,))
|
2014-03-11 18:17:52 +01:00
|
|
|
|
|
|
|
while min_id <= max_id:
|
|
|
|
lower = min_id
|
|
|
|
upper = min_id + batch_size - 1
|
|
|
|
if upper > max_id:
|
|
|
|
upper = max_id
|
|
|
|
|
2015-11-01 17:11:06 +01:00
|
|
|
print('%s about to update range %s to %s' % (time.asctime(), lower, upper))
|
2014-03-11 18:17:52 +01:00
|
|
|
|
|
|
|
db.start_transaction()
|
|
|
|
for filterer, action in tasks:
|
|
|
|
objects = all_objects.filter(id__range=(lower, upper))
|
|
|
|
targets = filterer(objects)
|
|
|
|
action(targets)
|
|
|
|
db.commit_transaction()
|
|
|
|
|
|
|
|
min_id = upper + 1
|
|
|
|
time.sleep(sleep)
|
|
|
|
|