zulip/zephyr/models.py

734 lines
29 KiB
Python

from django.db import models
from django.conf import settings
from django.contrib.auth.models import User
import hashlib
import base64
import calendar
from zephyr.lib.cache import cache_with_key
from zephyr.lib.initial_password import initial_password, initial_api_key
import os
import simplejson
from django.db import transaction, IntegrityError
from zephyr.lib import bugdown
from zephyr.lib.bulk_create import batch_bulk_create
from zephyr.lib.avatar import gravatar_hash
from zephyr.lib.context_managers import lockfile
import requests
from django.contrib.auth.models import UserManager
from django.utils import timezone
from django.contrib.sessions.models import Session
import time
import subprocess
import traceback
import re
@cache_with_key(lambda self: 'display_recipient_dict:%d' % (self.id,))
def get_display_recipient(recipient):
"""
recipient: an instance of Recipient.
returns: an appropriate string describing the recipient (the stream
name, for a stream, or the email, for a user).
"""
if recipient.type == Recipient.STREAM:
stream = Stream.objects.get(id=recipient.type_id)
return stream.name
elif recipient.type == Recipient.HUDDLE:
# We don't really care what the ordering is, just that it's deterministic.
user_profile_list = (UserProfile.objects.filter(subscription__recipient=recipient)
.select_related()
.order_by('user__email'))
return [{'email': user_profile.user.email,
'full_name': user_profile.full_name,
'short_name': user_profile.short_name} for user_profile in user_profile_list]
else:
user_profile = UserProfile.objects.select_related().get(id=recipient.type_id)
return {'email': user_profile.user.email,
'full_name': user_profile.full_name,
'short_name': user_profile.short_name}
def get_log_recipient(recipient):
"""
recipient: an instance of Recipient.
returns: an appropriate string describing the recipient (the stream
name, for a stream, or the email, for a user).
"""
if recipient.type == Recipient.STREAM:
stream = Stream.objects.get(id=recipient.type_id)
return stream.name
user_profile_list = UserProfile.objects.filter(subscription__recipient=recipient).select_related()
return [{'email': user_profile.user.email,
'full_name': user_profile.full_name,
'short_name': user_profile.short_name} for user_profile in user_profile_list]
class Callbacks(object):
TYPE_RECEIVE = 0
TYPE_POINTER_UPDATE = 1
TYPE_MAX = 2
def __init__(self):
self.table = {}
def add(self, key, cb_type, callback):
if not self.table.has_key(key):
self.create_key(key)
self.table[key][cb_type].append(callback)
def call(self, key, cb_type, **kwargs):
if not self.table.has_key(key):
self.create_key(key)
for cb in self.table[key][cb_type]:
cb(**kwargs)
self.table[key][cb_type] = []
def create_key(self, key):
self.table[key] = [[] for i in range(0, Callbacks.TYPE_MAX)]
class Realm(models.Model):
domain = models.CharField(max_length=40, db_index=True, unique=True)
def __repr__(self):
return "<Realm: %s %s>" % (self.domain, self.id)
def __str__(self):
return self.__repr__()
def bulk_create_realms(realm_list):
existing_realms = set(r.domain for r in Realm.objects.select_related().all())
realms_to_create = []
for domain in realm_list:
if domain not in existing_realms:
realms_to_create.append(Realm(domain=domain))
existing_realms.add(domain)
batch_bulk_create(Realm, realms_to_create)
class UserProfile(models.Model):
user = models.OneToOneField(User)
full_name = models.CharField(max_length=100)
short_name = models.CharField(max_length=100)
pointer = models.IntegerField()
last_pointer_updater = models.CharField(max_length=64)
realm = models.ForeignKey(Realm)
api_key = models.CharField(max_length=32)
enable_desktop_notifications = models.BooleanField(default=True)
# This is class data, not instance data!
# There is one callbacks_table for the whole process.
callbacks_table = Callbacks()
# The user receives this message
# Called in the Tornado process
def receive(self, message):
self.callbacks_table.call(self.user.id, Callbacks.TYPE_RECEIVE,
messages=[message], update_types=["new_messages"])
def update_pointer(self, new_pointer, pointer_updater):
self.callbacks_table.call(self.user.id, Callbacks.TYPE_POINTER_UPDATE,
new_pointer=new_pointer,
update_types=["pointer_update"])
def add_receive_callback(self, cb):
self.callbacks_table.add(self.user.id, Callbacks.TYPE_RECEIVE, cb)
def add_pointer_update_callback(self, cb):
self.callbacks_table.add(self.user.id, Callbacks.TYPE_POINTER_UPDATE, cb)
def __repr__(self):
return "<UserProfile: %s %s>" % (self.user.email, self.realm)
def __str__(self):
return self.__repr__()
@classmethod
def create(cls, user, realm, full_name, short_name):
"""When creating a new user, make a profile for him or her."""
if not cls.objects.filter(user=user):
profile = cls(user=user, pointer=-1, realm=realm,
full_name=full_name, short_name=short_name)
profile.api_key = initial_api_key(user.email)
profile.save()
# Auto-sub to the ability to receive personals.
recipient = Recipient.objects.create(type_id=profile.id, type=Recipient.PERSONAL)
Subscription.objects.create(user_profile=profile, recipient=recipient)
return profile
class PreregistrationUser(models.Model):
email = models.EmailField(unique=True)
# status: whether an object has been confirmed.
# if confirmed, set to confirmation.settings.STATUS_ACTIVE
status = models.IntegerField(default=0)
class MitUser(models.Model):
email = models.EmailField(unique=True)
# status: whether an object has been confirmed.
# if confirmed, set to confirmation.settings.STATUS_ACTIVE
status = models.IntegerField(default=0)
# create_user_hack is the same as Django's User.objects.create_user,
# except that we don't save to the database so it can used in
# bulk_creates
def create_user_hack(username, password, email, active):
now = timezone.now()
email = UserManager.normalize_email(email)
user = User(username=username, email=email,
is_staff=False, is_active=active, is_superuser=False,
last_login=now, date_joined=now)
if active:
user.set_password(password)
else:
user.set_unusable_password()
return user
def set_default_streams(realm, stream_names):
DefaultStream.objects.filter(realm=realm).delete()
for stream_name in stream_names:
stream = create_stream_if_needed(realm, stream_name)
DefaultStream.objects.create(stream=stream, realm=realm)
def add_default_subs(user_profile):
for default in DefaultStream.objects.filter(realm=user_profile.realm):
do_add_subscription(user_profile, default.stream)
def create_user_base(email, password, active=True):
# NB: the result of Base32 + truncation is not a valid Base32 encoding.
# It's just a unique alphanumeric string.
# Use base32 instead of base64 so we don't have to worry about mixed case.
# Django imposes a limit of 30 characters on usernames.
email_hash = hashlib.sha256(settings.HASH_SALT + email).digest()
username = base64.b32encode(email_hash)[:30]
return create_user_hack(username, password, email, active)
def create_user(email, password, realm, full_name, short_name,
active=True):
user = create_user_base(email=email, password=password,
active=active)
user.save()
return UserProfile.create(user, realm, full_name, short_name)
def compute_mit_user_fullname(email):
try:
# Input is either e.g. starnine@mit.edu or user|CROSSREALM.INVALID@mit.edu
match_user = re.match(r'^([a-zA-Z0-9_.-]+)(\|.+)?@mit\.edu$', email.lower())
if match_user and match_user.group(2) is None:
dns_query = "%s.passwd.ns.athena.mit.edu" % (match_user.group(1),)
proc = subprocess.Popen(['host', '-t', 'TXT', dns_query],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, _err_unused = proc.communicate()
if proc.returncode == 0:
# Parse e.g. 'starnine:*:84233:101:Athena Consulting Exchange User,,,:/mit/starnine:/bin/bash'
# for the 4th passwd entry field, aka the person's name.
return out.split(':')[4].split(',')[0]
elif match_user:
return match_user.group(1).lower() + "@" + match_user.group(2).upper()[1:]
except:
print ("Error getting fullname for %s:" % (email,))
traceback.print_exc()
return email.lower()
def create_mit_user_if_needed(realm, email):
try:
return UserProfile.objects.get(user__email=email)
except UserProfile.DoesNotExist:
try:
# Forge a user for this person
return create_user(email, initial_password(email), realm,
compute_mit_user_fullname(email), email.split("@")[0],
active=False)
except IntegrityError:
# Unless we raced with another thread doing the same
# thing, in which case we should get the user they made
transaction.commit()
return UserProfile.objects.get(user__email=email)
def bulk_create_users(realms, users_raw):
"""
Creates and saves a User with the given email.
Has some code based off of UserManage.create_user, but doesn't .save()
"""
users = []
existing_users = set(u.email for u in User.objects.all())
for (email, full_name, short_name, active) in users_raw:
if email in existing_users:
continue
users.append((email, full_name, short_name, active))
existing_users.add(email)
users_to_create = []
for (email, full_name, short_name, active) in users:
users_to_create.append(create_user_base(email, initial_password(email),
active=active))
batch_bulk_create(User, users_to_create, 30)
users_by_email = {}
for user in User.objects.all():
users_by_email[user.email] = user
# Now create user_profiles
profiles_to_create = []
for (email, full_name, short_name, active) in users:
domain = email.split('@')[1]
profile = UserProfile(user=users_by_email[email], pointer=-1,
realm=realms[domain],
full_name=full_name, short_name=short_name)
profile.api_key = initial_api_key(email)
profiles_to_create.append(profile)
batch_bulk_create(UserProfile, profiles_to_create, 50)
profiles_by_email = {}
profiles_by_id = {}
for profile in UserProfile.objects.select_related().all():
profiles_by_email[profile.user.email] = profile
profiles_by_id[profile.user.id] = profile
recipients_to_create = []
for (email, _, _, _) in users:
recipients_to_create.append(Recipient(type_id=profiles_by_email[email].id,
type=Recipient.PERSONAL))
batch_bulk_create(Recipient, recipients_to_create)
recipients_by_email = {}
for recipient in Recipient.objects.filter(type=Recipient.PERSONAL):
recipients_by_email[profiles_by_id[recipient.type_id].user.email] = recipient
subscriptions_to_create = []
for (email, _, _, _) in users:
subscriptions_to_create.append(
Subscription(user_profile_id=profiles_by_email[email].id,
recipient=recipients_by_email[email]))
batch_bulk_create(Subscription, subscriptions_to_create)
def create_stream_if_needed(realm, stream_name):
(stream, created) = Stream.objects.get_or_create(
realm=realm, name__iexact=stream_name,
defaults={'name': stream_name})
if created:
Recipient.objects.create(type_id=stream.id, type=Recipient.STREAM)
return stream
def bulk_create_streams(realms, stream_list):
existing_streams = set((stream.realm.domain, stream.name.lower())
for stream in Stream.objects.select_related().all())
streams_to_create = []
for (domain, name) in stream_list:
if (domain, name.lower()) not in existing_streams:
streams_to_create.append(Stream(realm=realms[domain], name=name))
batch_bulk_create(Stream, streams_to_create)
recipients_to_create = []
for stream in Stream.objects.all():
if (stream.realm.domain, stream.name.lower()) not in existing_streams:
recipients_to_create.append(Recipient(type_id=stream.id,
type=Recipient.STREAM))
batch_bulk_create(Recipient, recipients_to_create)
class Stream(models.Model):
name = models.CharField(max_length=30, db_index=True)
realm = models.ForeignKey(Realm, db_index=True)
def __repr__(self):
return "<Stream: %s>" % (self.name,)
def __str__(self):
return self.__repr__()
class Meta:
unique_together = ("name", "realm")
@classmethod
def create(cls, name, realm):
stream = cls(name=name, realm=realm)
stream.save()
recipient = Recipient.objects.create(type_id=stream.id,
type=Recipient.STREAM)
return (stream, recipient)
class Recipient(models.Model):
type_id = models.IntegerField(db_index=True)
type = models.PositiveSmallIntegerField(db_index=True)
# Valid types are {personal, stream, huddle}
PERSONAL = 1
STREAM = 2
HUDDLE = 3
class Meta:
unique_together = ("type", "type_id")
# N.B. If we used Django's choice=... we would get this for free (kinda)
_type_names = {
PERSONAL: 'personal',
STREAM: 'stream',
HUDDLE: 'huddle' }
def type_name(self):
# Raises KeyError if invalid
return self._type_names[self.type]
def __repr__(self):
display_recipient = get_display_recipient(self)
return "<Recipient: %s (%d, %s)>" % (display_recipient, self.type_id, self.type)
class Client(models.Model):
name = models.CharField(max_length=30, db_index=True, unique=True)
@transaction.commit_on_success
def get_client(name):
try:
(client, _) = Client.objects.get_or_create(name=name)
except IntegrityError:
# If we're racing with other threads trying to create this
# client, get_or_create will throw IntegrityError (because our
# database is enforcing the no-duplicate-objects constraint);
# in this case one should just re-fetch the object. This race
# actually happens with populate_db.
#
# Much of the rest of our code that writes to the database
# doesn't handle this duplicate object on race issue correctly :(
transaction.commit()
return Client.objects.get(name=name)
return client
def bulk_create_clients(client_list):
existing_clients = set(client.name for client in Client.objects.select_related().all())
clients_to_create = []
for name in client_list:
if name not in existing_clients:
clients_to_create.append(Client(name=name))
existing_clients.add(name)
batch_bulk_create(Client, clients_to_create)
class Message(models.Model):
sender = models.ForeignKey(UserProfile)
recipient = models.ForeignKey(Recipient)
subject = models.CharField(max_length=60)
content = models.TextField()
pub_date = models.DateTimeField('date published')
sending_client = models.ForeignKey(Client)
def __repr__(self):
display_recipient = get_display_recipient(self.recipient)
return "<Message: %s / %s / %r>" % (display_recipient, self.subject, self.sender)
def __str__(self):
return self.__repr__()
@cache_with_key(lambda self, apply_markdown: 'message_dict:%d:%d' % (self.id, apply_markdown))
def to_dict(self, apply_markdown):
# Messages arrive in the Tornado process with the dicts already rendered.
# This avoids running the Markdown parser and some database queries in the single-threaded
# Tornado server.
#
# This field is not persisted to the database and will disappear if the object is re-fetched.
if hasattr(self, 'precomputed_dicts'):
return self.precomputed_dicts['text/html' if apply_markdown else 'text/x-markdown']
obj = dict(
id = self.id,
sender_email = self.sender.user.email,
sender_full_name = self.sender.full_name,
sender_short_name = self.sender.short_name,
type = self.recipient.type_name(),
display_recipient = get_display_recipient(self.recipient),
recipient_id = self.recipient.id,
subject = self.subject,
timestamp = calendar.timegm(self.pub_date.timetuple()),
gravatar_hash = gravatar_hash(self.sender.user.email))
if apply_markdown:
obj['content'] = bugdown.convert(self.content)
obj['content_type'] = 'text/html'
else:
obj['content'] = self.content
obj['content_type'] = 'text/x-markdown'
return obj
def to_log_dict(self):
return dict(
id = self.id,
sender_email = self.sender.user.email,
sender_full_name = self.sender.full_name,
sender_short_name = self.sender.short_name,
sending_client = self.sending_client.name,
type = self.recipient.type_name(),
recipient = get_log_recipient(self.recipient),
subject = self.subject,
content = self.content,
timestamp = calendar.timegm(self.pub_date.timetuple()))
@classmethod
def remove_unreachable(cls):
"""Remove all Messages that are not referred to by any UserMessage."""
cls.objects.exclude(id__in = UserMessage.objects.values('message_id')).delete()
class UserMessage(models.Model):
user_profile = models.ForeignKey(UserProfile)
message = models.ForeignKey(Message)
# We're not using the archived field for now, but create it anyway
# since this table will be an unpleasant one to do schema changes
# on later
archived = models.BooleanField()
class Meta:
unique_together = ("user_profile", "message")
def __repr__(self):
display_recipient = get_display_recipient(self.message.recipient)
return "<UserMessage: %s / %s>" % (display_recipient, self.user_profile.user.email)
user_hash = {}
def get_user_profile_by_id(uid):
if uid in user_hash:
return user_hash[uid]
return UserProfile.objects.select_related().get(id=uid)
# Store an event in the log for re-importing messages
def log_event(event):
assert("timestamp" in event)
with lockfile(settings.MESSAGE_LOG + '.lock'):
with open(settings.MESSAGE_LOG, 'a') as log:
log.write(simplejson.dumps(event) + '\n')
def log_message(message):
if not message.sending_client.name.startswith("test:"):
log_event(message.to_log_dict())
def do_send_message(message, no_log=False):
message.save()
# Log the message to our message log for populate_db to refill
if not no_log:
log_message(message)
if message.recipient.type == Recipient.PERSONAL:
recipients = list(set([get_user_profile_by_id(message.recipient.type_id),
get_user_profile_by_id(message.sender_id)]))
# For personals, you send out either 1 or 2 copies of the message, for
# personals to yourself or to someone else, respectively.
assert((len(recipients) == 1) or (len(recipients) == 2))
elif (message.recipient.type == Recipient.STREAM or
message.recipient.type == Recipient.HUDDLE):
recipients = [s.user_profile for
s in Subscription.objects.select_related().filter(recipient=message.recipient, active=True)]
else:
raise ValueError('Bad recipient type')
# Save the message receipts in the database
# TODO: Use bulk_create here
with transaction.commit_on_success():
for user_profile in recipients:
# Only deliver messages to "active" user accounts
if user_profile.user.is_active:
UserMessage(user_profile=user_profile, message=message).save()
# We can only publish messages to longpolling clients if the Tornado server is running.
if settings.TORNADO_SERVER:
# Render Markdown etc. here, so that the single-threaded Tornado server doesn't have to.
# TODO: Reduce duplication in what we send.
rendered = { 'text/html': message.to_dict(apply_markdown=True),
'text/x-markdown': message.to_dict(apply_markdown=False) }
requests.post(settings.TORNADO_SERVER + '/notify_new_message', data=dict(
secret = settings.SHARED_SECRET,
message = message.id,
rendered = simplejson.dumps(rendered),
users = ','.join(str(user.id) for user in recipients)))
class Subscription(models.Model):
user_profile = models.ForeignKey(UserProfile)
recipient = models.ForeignKey(Recipient)
active = models.BooleanField(default=True)
class Meta:
unique_together = ("user_profile", "recipient")
def __repr__(self):
return "<Subscription: %r -> %r>" % (self.user_profile, self.recipient)
def __str__(self):
return self.__repr__()
def do_add_subscription(user_profile, stream, no_log=False):
recipient = Recipient.objects.get(type_id=stream.id,
type=Recipient.STREAM)
(subscription, created) = Subscription.objects.get_or_create(
user_profile=user_profile, recipient=recipient,
defaults={'active': True})
did_subscribe = created
if not subscription.active:
did_subscribe = True
subscription.active = True
subscription.save()
if did_subscribe and not no_log:
log_event({'type': 'subscription_added',
'user': user_profile.user.email,
'name': stream.name,
'timestamp': time.time(),
'domain': stream.realm.domain})
return did_subscribe
def do_remove_subscription(user_profile, stream, no_log=False):
recipient = Recipient.objects.get(type_id=stream.id,
type=Recipient.STREAM)
maybe_sub = Subscription.objects.filter(user_profile=user_profile,
recipient=recipient)
if len(maybe_sub) == 0:
return False
subscription = maybe_sub[0]
did_remove = subscription.active
subscription.active = False
subscription.save()
if did_remove and not no_log:
log_event({'type': 'subscription_removed',
'user': user_profile.user.email,
'name': stream.name,
'timestamp': time.time(),
'domain': stream.realm.domain})
return did_remove
def do_activate_user(user, log=True):
user.is_active = True
user.set_password(initial_password(user.email))
user.save()
if log:
log_event({'type': 'user_activated',
'timestamp': time.time(),
'user': user.email})
def do_change_password(user, password, log=True):
user.set_password(password)
user.save()
if log:
log_event({'type': 'user_change_password',
'timestamp': time.time(),
'user': user.email,
'pwhash': user.password})
def do_change_full_name(user_profile, full_name, log=True):
user_profile.full_name = full_name
user_profile.save()
if log:
log_event({'type': 'user_change_full_name',
'timestamp': time.time(),
'user': user_profile.user.email,
'full_name': full_name})
def do_change_enable_desktop_notifications(user_profile, enable_desktop_notifications, log=True):
user_profile.enable_desktop_notifications = enable_desktop_notifications
user_profile.save()
if log:
log_event({'type': 'enable_desktop_notifications_changed',
'timestamp': time.time(),
'user': user_profile.user.email,
'enable_desktop_notifications': enable_desktop_notifications})
class Huddle(models.Model):
# TODO: We should consider whether using
# CommaSeparatedIntegerField would be better.
huddle_hash = models.CharField(max_length=40, db_index=True, unique=True)
def get_huddle_hash(id_list):
id_list = sorted(set(id_list))
hash_key = ",".join(str(x) for x in id_list)
return hashlib.sha1(hash_key).hexdigest()
def get_huddle(id_list):
huddle_hash = get_huddle_hash(id_list)
(huddle, created) = Huddle.objects.get_or_create(huddle_hash=huddle_hash)
if created:
recipient = Recipient.objects.create(type_id=huddle.id,
type=Recipient.HUDDLE)
# Add subscriptions
for uid in id_list:
Subscription.objects.create(recipient = recipient,
user_profile = UserProfile.objects.get(id=uid))
return huddle
def bulk_create_huddles(users, huddle_user_list):
huddles = {}
huddles_by_id = {}
huddle_set = set()
existing_huddles = set()
for huddle in Huddle.objects.all():
existing_huddles.add(huddle.huddle_hash)
for huddle_users in huddle_user_list:
user_ids = [users[email].id for email in huddle_users]
huddle_hash = get_huddle_hash(user_ids)
if huddle_hash in existing_huddles:
continue
huddle_set.add((huddle_hash, tuple(sorted(user_ids))))
huddles_to_create = []
for (huddle_hash, _) in huddle_set:
huddles_to_create.append(Huddle(huddle_hash=huddle_hash))
batch_bulk_create(Huddle, huddles_to_create)
for huddle in Huddle.objects.all():
huddles[huddle.huddle_hash] = huddle
huddles_by_id[huddle.id] = huddle
recipients_to_create = []
for (huddle_hash, _) in huddle_set:
recipients_to_create.append(Recipient(type_id=huddles[huddle_hash].id, type=Recipient.HUDDLE))
batch_bulk_create(Recipient, recipients_to_create)
huddle_recipients = {}
for recipient in Recipient.objects.filter(type=Recipient.HUDDLE):
huddle_recipients[huddles_by_id[recipient.type_id].huddle_hash] = recipient
subscriptions_to_create = []
for (huddle_hash, huddle_user_ids) in huddle_set:
for user_id in huddle_user_ids:
subscriptions_to_create.append(Subscription(active=True, user_profile_id=user_id,
recipient=huddle_recipients[huddle_hash]))
batch_bulk_create(Subscription, subscriptions_to_create)
# This function is used only by tests.
# We have faster implementations within the app itself.
def filter_by_subscriptions(messages, user):
user_profile = UserProfile.objects.get(user=user)
user_messages = []
subscriptions = [sub.recipient for sub in
Subscription.objects.filter(user_profile=user_profile, active=True)]
for message in messages:
# If you are subscribed to the personal or stream, or if you
# sent the personal, you can see the message.
if (message.recipient in subscriptions) or \
(message.recipient.type == Recipient.PERSONAL and
message.sender == user_profile):
user_messages.append(message)
return user_messages
def clear_database():
for model in [Message, Stream, UserProfile, User, Recipient,
Realm, Subscription, Huddle, UserMessage, Client,
DefaultStream]:
model.objects.all().delete()
Session.objects.all().delete()
class UserActivity(models.Model):
user_profile = models.ForeignKey(UserProfile)
client = models.ForeignKey(Client)
query = models.CharField(max_length=50, db_index=True)
count = models.IntegerField()
last_visit = models.DateTimeField('last visit')
class Meta:
unique_together = ("user_profile", "client", "query")
class DefaultStream(models.Model):
realm = models.ForeignKey(Realm)
stream = models.ForeignKey(Stream)
class Meta:
unique_together = ("realm", "stream")
class StreamColor(models.Model):
subscription = models.ForeignKey(Subscription)
color = models.CharField(max_length=10)