event_schema: Add documentation and rename a few functions.

This should help make this revised subsystem readable for more new
contributors.  We still need to make updates to the high-level
documentation.
This commit is contained in:
Tim Abbott 2020-09-25 12:53:00 -07:00
parent 5b7c9c4714
commit 94a9fa1891
5 changed files with 113 additions and 35 deletions

View File

@ -1,4 +1,14 @@
#!/usr/bin/env python3
#
# Validates that 3 data sources agree about the structure of Zulip's events API:
#
# * Node fixtures for the server_events_dispatch.js tests.
# * OpenAPI definitions in zerver/openapi/zulip.yaml
# * The schemas defined in zerver/lib/events_schema.py used for the
# Zulip server's test suite.
#
# We compare the Python and OpenAPI schemas by converting the OpenAPI data
# into the event_schema style of types and the diffing the schemas.
import argparse
import difflib
import os
@ -61,7 +71,8 @@ SKIP_LIST = [
"user_status__set_status_text",
]
# This list of exemptions represents details we should fix in Zulip's
# API structure and/or validators.
EXEMPT_OPENAPI_NAMES = [
# users field missing
"update_display_settings_event",
@ -132,6 +143,9 @@ def verify_fixtures_are_sorted(names: List[str]) -> None:
def from_openapi(node: Dict[str, Any]) -> Any:
"""Converts the OpenAPI data into event_schema.py style type
definitions for convenient comparison with the types used for backend
tests declared there."""
if "oneOf" in node:
return UnionType([from_openapi(n) for n in node["oneOf"]])
@ -172,7 +186,7 @@ def from_openapi(node: Dict[str, Any]) -> Any:
raise AssertionError("cannot handle node")
def validate_openapi() -> None:
def validate_openapi_against_event_schema() -> None:
node = openapi_spec.openapi()["paths"]["/events"]["get"]["responses"]["200"][
"content"
]["application/json"]["schema"]["properties"]["events"]["items"]["oneOf"]
@ -222,7 +236,7 @@ def run() -> None:
print(f"skip {name}")
continue
check_event(name, event)
validate_openapi()
validate_openapi_against_event_schema()
if __name__ == "__main__":

View File

@ -1,8 +1,12 @@
"""
This module sets up type classes like DictType and
ListType that define types for arbitrary objects, but
our first use case is to specify the types of Zulip
events that come from send_event calls.
"""This module sets up type classes like DictType and ListType that
define types for arbitrary objects. The main use case is to specify
the types of Zulip events that come from send_event calls for
verification in test_events.py; in most other code paths we'll want to
use a TypedDict.
This module consists of workarounds for cases where we cannot express
the level of detail we desire or do comparison with OpenAPI types
easily with the native Python type system.
"""
from dataclasses import dataclass
@ -20,6 +24,10 @@ def indent(s: str) -> str:
@dataclass
class DictType:
"""Dictionary is validated as having all required keys, all keys
accounted for in required_keys and optional_keys, and recursive
validation of types of fields.
"""
def __init__(
self,
required_keys: Sequence[Tuple[str, Any]],
@ -65,6 +73,7 @@ class DictType:
@dataclass
class EnumType:
"""An enum with the set of valid values declared."""
valid_vals: Sequence[Any]
def check_data(self, var_name: str, val: Dict[str, Any]) -> None:
@ -76,6 +85,7 @@ class EnumType:
class Equals:
"""Type requiring a specific value."""
def __init__(self, expected_value: Any) -> None:
self.expected_value = expected_value
@ -94,6 +104,8 @@ class Equals:
class NumberType:
"""A Union[float, int]; needed to align with the `number` type in
OpenAPI, because isinstance(4, float) == False"""
def check_data(self, var_name: str, val: Optional[Any]) -> None:
if isinstance(val, int) or isinstance(val, float):
return
@ -104,6 +116,7 @@ class NumberType:
class ListType:
"""List with every object having the declared sub_type."""
def __init__(self, sub_type: Any, length: Optional[int] = None) -> None:
self.sub_type = sub_type
self.length = length
@ -123,6 +136,7 @@ class ListType:
@dataclass
class StringDictType:
"""Type that validates an object is a Dict[str, str]"""
value_type: Any
def check_data(self, var_name: str, val: Dict[Any, Any]) -> None:
@ -157,6 +171,8 @@ class OptionalType:
@dataclass
class TupleType:
"""Deprecated; we'd like to avoid using tuples in our API. Validates
the tuple has the sequence of sub_types."""
sub_types: Sequence[Any]
def check_data(self, var_name: str, val: Any) -> None:
@ -223,6 +239,7 @@ def event_dict_type(
required_keys: Sequence[Tuple[str, Any]],
optional_keys: Sequence[Tuple[str, Any]] = [],
) -> DictType:
"""
This is just a tiny wrapper on DictType, but it provides
some minor benefits:
@ -252,14 +269,16 @@ def make_checker(data_type: DictType,) -> Callable[[str, Dict[str, object]], Non
def schema(
# return a YAML-like string for our data type
var_name: str,
data_type: Any,
) -> str:
"""
schema is a glorified repr of a data type, but it
also includes a var_name you pass in, plus we dumb
things down a bit to match our current openapi spec
"""Returns a YAML-like string for our data type; these are used for
pretty-printing and comparison between the OpenAPI type
definitions and these Python data types, as part of
schema is a glorified repr of a data type, but it also includes a
var_name you pass in, plus we dumb things down a bit to match our
current openapi spec.
"""
if hasattr(data_type, "schema"):
return data_type.schema(var_name)
@ -269,11 +288,11 @@ def schema(
def check_data(
# Check that data conforms to our data_type
data_type: Any,
var_name: str,
val: Any,
) -> None:
"""Check that val conforms to our data_type"""
if hasattr(data_type, "check_data"):
data_type.check_data(var_name, val)
return

View File

@ -1,3 +1,14 @@
# This module is a collection of testing helpers for validating the
# schema of "events" sent by Zulip's server-to-client push system.
#
# By policy, every event generated by Zulip's API should be validated
# by a test in test_events.py with a schema checker here (which is
# validated, in turn, against the OpenAPI documentation for GET
# /events in zulip.yaml and the fixtures used by the Zulip webapp
# frontend).
#
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html
from typing import Dict, List, Sequence, Tuple, Union
from zerver.lib.data_types import (
@ -183,6 +194,10 @@ default_streams_event = event_dict_type(
)
check_default_streams = make_checker(default_streams_event)
# The event type has an unusual number of optional fields. The
# message_id/message_ids fields are conditional on the
# bulk_message_deletion client_capability, whereas the other fields
# are conditional on private vs. stream messages.
delete_message_event = event_dict_type(
required_keys=[
# force vertical
@ -279,9 +294,11 @@ invites_changed_event = event_dict_type(
)
check_invites_changed = make_checker(invites_changed_event)
# This type, like other instances of TupleType, is a legacy feature of
# a very old Zulip API; we plan to replace it with an object as those
# are more extensible.
muted_topic_type = TupleType(
[
# We should use objects, not tuples!
str, # stream name
str, # topic name
int, # timestamp
@ -327,6 +344,8 @@ message_event = event_dict_type(
)
check_message = make_checker(message_event)
# This legacy presence structure is intended to be replaced by a more
# sensible data structure.
presence_type = DictType(
required_keys=[
("status", EnumType(["active", "idle"])),
@ -372,14 +391,17 @@ def check_presence(
assert list(event["presence"].values())[0]["status"] == status
# We will eventually just send user_ids.
reaction_user_type = DictType(
# Type for the legacy user field; the `user_id` field is intended to
# replace this and we expect to remove this once clients have migrated
# to support the modern API.
reaction_legacy_user_type = DictType(
required_keys=[
# force vertical
("email", str),
("full_name", str),
("user_id", int),
]
# We should probably declare is_mirror_dummy as an optional field here.
)
reaction_add_event = event_dict_type(
@ -391,7 +413,7 @@ reaction_add_event = event_dict_type(
("emoji_code", str),
("reaction_type", str),
("user_id", int),
("user", reaction_user_type),
("user", reaction_legacy_user_type),
]
)
check_reaction_add = make_checker(reaction_add_event)
@ -406,7 +428,7 @@ reaction_remove_event = event_dict_type(
("emoji_code", str),
("reaction_type", str),
("user_id", int),
("user", reaction_user_type),
("user", reaction_legacy_user_type),
]
)
check_reaction_remove = make_checker(reaction_remove_event)
@ -669,29 +691,25 @@ def check_realm_export(
has_deleted_timestamp: bool,
has_failed_timestamp: bool,
) -> None:
"""
Check the overall event first, knowing it has some
optional types.
"""
# Check the overall event first, knowing it has some
# optional types.
_check_realm_export(var_name, event)
"""
Theoretically we can have multiple exports, but until
that happens in practice, we assume our tests are
gonna have one export.
"""
# It's possible to have multiple data exports, but the events tests do not
# exercise that case, so we do strict validation for a single export here.
assert isinstance(event["exports"], list)
assert len(event["exports"]) == 1
export = event["exports"][0]
"""
Now verify which fields have non-None values.
"""
# Now verify which fields have non-None values.
assert has_export_url == (export["export_url"] is not None)
assert has_deleted_timestamp == (export["deleted_timestamp"] is not None)
assert has_failed_timestamp == (export["failed_timestamp"] is not None)
# This type, like other instances of TupleType, is a legacy feature of
# a very old Zulip API; we plan to replace it with an object as those
# are more extensible.
realm_filter_type = TupleType(
[
# we should make this an object
@ -790,9 +808,10 @@ authentication_dict = DictType(
authentication_data = DictType(
required_keys=[
# this single-key dictionary is an annoying
# consequence of us using property_type of
# "default" for legacy reasons
# this single-key dictionary is an annoying consequence of us
# using property_type of "default" for legacy reasons
# (specifically, to avoid breaking old clients when we
# introduced the `update_dict` format).
("authentication_methods", authentication_dict),
]
)
@ -880,6 +899,14 @@ def check_realm_update_dict(
check_data(sub_type, f"{var_name}['data']", event["data"])
# TODO: This type is missing optional fields:
#
# * delivery_email
# * bot-related fields.
# * Users with custom profile fields, where profile_data will
# be nonempty.
#
# Only because our test_events.py tests don't cover the relevant cases.
realm_user_type = DictType(
required_keys=[
("user_id", int),
@ -936,6 +963,8 @@ custom_profile_field_type = DictType(
],
)
# This block of types, each named by the dictionary key, makes it
# possible to validate the type of all the realm_user update events.
realm_user_person_types = dict(
# Note that all flavors of person include user_id.
avatar_fields=DictType(
@ -1170,6 +1199,8 @@ subscription_remove_event = event_dict_type(
)
check_subscription_remove = make_checker(subscription_remove_event)
# TODO: Have better validation for value_type; we don't have
# test_events tests for non-bool fields like `color`.
subscription_update_event = event_dict_type(
required_keys=[
("type", Equals("subscription")),
@ -1308,7 +1339,7 @@ update_message_topic_fields = [
"propagate_mode",
EnumType(
[
# order matches openapi spec
# The order here needs to match the OpenAPI definitions
"change_one",
"change_later",
"change_all",

View File

@ -1,5 +1,9 @@
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html for
# high-level documentation on how this system works.
#
# This module is closely integrated with zerver/lib/event_schema.py
# and zerver/lib/data_types.py systems for validating the schemas of
# events; it also uses the OpenAPI tools to validate our documentation.
import copy
import sys
import time

View File

@ -117,6 +117,16 @@ def send_notification_http(realm: Realm, data: Mapping[str, Any]) -> None:
data=dict(data=orjson.dumps(data), secret=settings.SHARED_SECRET),
)
# The core function for sending an event from Django to Tornado (which
# will then push it to web and mobile clients for the target users).
# By convention, send_event should only be called from
# zerver/lib/actions.py, which helps make it easy to find event
# generation code.
#
# Every call point should be covered by a test in `test_events.py`,
# with the schema verified in `zerver/lib/event_schema.py`.
#
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html
def send_event(realm: Realm, event: Mapping[str, Any],
users: Union[Iterable[int], Iterable[Mapping[str, Any]]]) -> None:
"""`users` is a list of user IDs, or in the case of `message` type