2018-05-15 19:28:42 +02:00
|
|
|
# Set of helper functions to manipulate the OpenAPI files that define our REST
|
|
|
|
# API's specification.
|
|
|
|
import os
|
2020-06-13 17:59:46 +02:00
|
|
|
import re
|
2019-07-08 14:08:02 +02:00
|
|
|
from typing import Any, Dict, List, Optional, Set
|
2018-05-15 19:28:42 +02:00
|
|
|
|
2020-07-01 19:07:31 +02:00
|
|
|
from openapi_schema_validator import OAS30Validator
|
|
|
|
|
2018-05-15 19:28:42 +02:00
|
|
|
OPENAPI_SPEC_PATH = os.path.abspath(os.path.join(
|
|
|
|
os.path.dirname(__file__),
|
|
|
|
'../openapi/zulip.yaml'))
|
|
|
|
|
2020-06-13 17:59:46 +02:00
|
|
|
# A list of endpoint-methods such that the endpoint
|
|
|
|
# has documentation but not with this particular method.
|
2020-07-01 19:07:31 +02:00
|
|
|
EXCLUDE_UNDOCUMENTED_ENDPOINTS = {"/realm/emoji/{emoji_name}:delete"}
|
|
|
|
# Consists of endpoints with some documentation remaining.
|
|
|
|
# These are skipped but return true as the validator cannot exclude objects
|
|
|
|
EXCLUDE_DOCUMENTED_ENDPOINTS = {"/events:get", "/register:post", "/settings/notifications:patch"}
|
2018-08-07 23:40:07 +02:00
|
|
|
class OpenAPISpec():
|
|
|
|
def __init__(self, path: str) -> None:
|
|
|
|
self.path = path
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
self.last_update: Optional[float] = None
|
2020-06-13 17:59:46 +02:00
|
|
|
self.data: Dict[str, Any] = {}
|
|
|
|
self.regex_dict: Dict[str, str] = {}
|
2018-08-07 23:40:07 +02:00
|
|
|
|
|
|
|
def reload(self) -> None:
|
2018-08-08 22:33:49 +02:00
|
|
|
# Because importing yamole (and in turn, yaml) takes
|
|
|
|
# significant time, and we only use python-yaml for our API
|
|
|
|
# docs, importing it lazily here is a significant optimization
|
|
|
|
# to `manage.py` startup.
|
2018-09-07 01:30:19 +02:00
|
|
|
#
|
|
|
|
# There is a bit of a race here...we may have two processes
|
|
|
|
# accessing this module level object and both trying to
|
|
|
|
# populate self.data at the same time. Hopefully this will
|
|
|
|
# only cause some extra processing at startup and not data
|
|
|
|
# corruption.
|
2018-08-08 22:33:49 +02:00
|
|
|
from yamole import YamoleParser
|
2018-08-07 23:40:07 +02:00
|
|
|
with open(self.path) as f:
|
|
|
|
yaml_parser = YamoleParser(f)
|
2018-09-07 01:30:19 +02:00
|
|
|
|
2018-08-07 23:40:07 +02:00
|
|
|
self.data = yaml_parser.data
|
2020-06-13 17:59:46 +02:00
|
|
|
self.create_regex_dict()
|
2018-09-07 01:30:19 +02:00
|
|
|
self.last_update = os.path.getmtime(self.path)
|
2018-08-07 23:40:07 +02:00
|
|
|
|
2020-06-13 17:59:46 +02:00
|
|
|
def create_regex_dict(self) -> None:
|
|
|
|
# Alogrithm description:
|
|
|
|
# We have 2 types of endpoints
|
|
|
|
# 1.with path arguments 2. without path arguments
|
|
|
|
# In validate_against_openapi_schema we directly check
|
|
|
|
# if we have a without path endpoint, since it does not
|
|
|
|
# require regex. Hence they are not part of the regex dict
|
|
|
|
# and now we are left with only:
|
|
|
|
# endpoint with path arguments.
|
|
|
|
# Now for this case, the regex has been created carefully,
|
|
|
|
# numeric arguments are matched with [0-9] only and
|
|
|
|
# emails are matched with their regex. This is why there are zero
|
|
|
|
# collisions. Hence if this regex matches
|
|
|
|
# an incorrect endpoint then there is some backend problem.
|
|
|
|
# For example if we have users/{name}/presence then it will
|
|
|
|
# conflict with users/me/presence even in the backend.
|
|
|
|
# Care should be taken though that if we have special strings
|
|
|
|
# such as email they must be substituted with proper regex.
|
|
|
|
|
|
|
|
email_regex = r'([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})'
|
|
|
|
self.regex_dict = {}
|
|
|
|
for key in self.data['paths']:
|
|
|
|
if '{' not in key:
|
|
|
|
continue
|
|
|
|
regex_key = '^' + key + '$'
|
|
|
|
# Numeric arguments have id at their end
|
|
|
|
# so find such arguments and replace them with numeric
|
|
|
|
# regex
|
|
|
|
regex_key = re.sub(r'{[^}]*id}', r'[0-9]*', regex_key)
|
|
|
|
# Email arguments end with email
|
|
|
|
regex_key = re.sub(r'{[^}]*email}', email_regex, regex_key)
|
|
|
|
# All other types of arguments are supposed to be
|
|
|
|
# all-encompassing string.
|
|
|
|
regex_key = re.sub(r'{[^}]*}', r'[^\/]*', regex_key)
|
|
|
|
regex_key = regex_key.replace(r'/', r'\/')
|
|
|
|
regex_key = fr'{regex_key}'
|
|
|
|
self.regex_dict[regex_key] = key
|
|
|
|
|
2018-08-07 23:40:07 +02:00
|
|
|
def spec(self) -> Dict[str, Any]:
|
|
|
|
"""Reload the OpenAPI file if it has been modified after the last time
|
|
|
|
it was read, and then return the parsed data.
|
|
|
|
"""
|
|
|
|
last_modified = os.path.getmtime(self.path)
|
|
|
|
# Using != rather than < to cover the corner case of users placing an
|
|
|
|
# earlier version than the current one
|
|
|
|
if self.last_update != last_modified:
|
|
|
|
self.reload()
|
2020-06-13 17:59:46 +02:00
|
|
|
assert(len(self.data) > 0)
|
2018-08-07 23:40:07 +02:00
|
|
|
return self.data
|
2018-06-20 19:31:24 +02:00
|
|
|
|
2020-06-13 17:59:46 +02:00
|
|
|
def regex_keys(self) -> Dict[str, str]:
|
|
|
|
"""Reload the OpenAPI file if it has been modified after the last time
|
|
|
|
it was read, and then return the parsed data.
|
|
|
|
"""
|
|
|
|
last_modified = os.path.getmtime(self.path)
|
|
|
|
# Using != rather than < to cover the corner case of users placing an
|
|
|
|
# earlier version than the current one
|
|
|
|
if self.last_update != last_modified:
|
|
|
|
self.reload()
|
|
|
|
assert(len(self.regex_dict) > 0)
|
|
|
|
return self.regex_dict
|
|
|
|
|
|
|
|
|
2018-05-31 19:41:17 +02:00
|
|
|
class SchemaError(Exception):
|
|
|
|
pass
|
2018-05-15 19:28:42 +02:00
|
|
|
|
2018-08-07 23:40:07 +02:00
|
|
|
openapi_spec = OpenAPISpec(OPENAPI_SPEC_PATH)
|
|
|
|
|
2020-04-17 19:16:43 +02:00
|
|
|
def get_schema(endpoint: str, method: str, response: str) -> Dict[str, Any]:
|
2020-06-13 17:59:46 +02:00
|
|
|
if len(response) == 3 and ('oneOf' in (openapi_spec.spec())['paths'][endpoint]
|
|
|
|
[method.lower()]['responses'][response]['content']
|
|
|
|
['application/json']['schema']):
|
|
|
|
# Currently at places where multiple schemas are defined they only
|
|
|
|
# differ in example so either can be used.
|
|
|
|
response += '_0'
|
2020-04-17 19:16:43 +02:00
|
|
|
if len(response) == 3:
|
|
|
|
schema = (openapi_spec.spec()['paths'][endpoint][method.lower()]['responses']
|
|
|
|
[response]['content']['application/json']['schema'])
|
|
|
|
return schema
|
|
|
|
else:
|
|
|
|
resp_code = int(response[4])
|
|
|
|
response = response[0:3]
|
|
|
|
schema = (openapi_spec.spec()['paths'][endpoint][method.lower()]['responses']
|
|
|
|
[response]['content']['application/json']['schema']["oneOf"][resp_code])
|
|
|
|
return schema
|
|
|
|
|
2018-05-15 19:28:42 +02:00
|
|
|
def get_openapi_fixture(endpoint: str, method: str,
|
2020-06-13 01:57:21 +02:00
|
|
|
response: str='200') -> Dict[str, Any]:
|
2018-05-31 19:41:17 +02:00
|
|
|
"""Fetch a fixture from the full spec object.
|
|
|
|
"""
|
2020-04-17 19:16:43 +02:00
|
|
|
return (get_schema(endpoint, method, response)['example'])
|
2018-05-15 19:28:42 +02:00
|
|
|
|
2020-04-28 12:13:46 +02:00
|
|
|
def get_openapi_description(endpoint: str, method: str) -> str:
|
|
|
|
"""Fetch a description from the full spec object.
|
|
|
|
"""
|
|
|
|
description = openapi_spec.spec()['paths'][endpoint][method.lower()]['description']
|
|
|
|
return description
|
|
|
|
|
2019-07-08 14:08:02 +02:00
|
|
|
def get_openapi_paths() -> Set[str]:
|
|
|
|
return set(openapi_spec.spec()['paths'].keys())
|
|
|
|
|
2019-08-17 01:21:08 +02:00
|
|
|
def get_openapi_parameters(endpoint: str, method: str,
|
|
|
|
include_url_parameters: bool=True) -> List[Dict[str, Any]]:
|
2019-07-15 22:33:16 +02:00
|
|
|
openapi_endpoint = openapi_spec.spec()['paths'][endpoint][method.lower()]
|
|
|
|
# We do a `.get()` for this last bit to distinguish documented
|
|
|
|
# endpoints with no parameters (empty list) from undocumented
|
|
|
|
# endpoints (KeyError exception).
|
2019-08-17 01:21:08 +02:00
|
|
|
parameters = openapi_endpoint.get('parameters', [])
|
|
|
|
# Also, we skip parameters defined in the URL.
|
|
|
|
if not include_url_parameters:
|
|
|
|
parameters = [parameter for parameter in parameters if
|
|
|
|
parameter['in'] != 'path']
|
|
|
|
return parameters
|
2018-05-31 19:41:17 +02:00
|
|
|
|
2020-05-20 11:57:57 +02:00
|
|
|
def get_openapi_return_values(endpoint: str, method: str,
|
|
|
|
include_url_parameters: bool=True) -> List[Dict[str, Any]]:
|
|
|
|
openapi_endpoint = openapi_spec.spec()['paths'][endpoint][method.lower()]
|
|
|
|
response = openapi_endpoint['responses']['200']['content']['application/json']['schema']
|
|
|
|
# In cases where we have used oneOf, the schemas only differ in examples
|
|
|
|
# So we can choose any.
|
|
|
|
if 'oneOf' in response:
|
|
|
|
response = response['oneOf'][0]
|
|
|
|
response = response['properties']
|
|
|
|
return response
|
|
|
|
|
2020-06-13 17:59:46 +02:00
|
|
|
def match_against_openapi_regex(endpoint: str) -> Optional[str]:
|
|
|
|
for key in openapi_spec.regex_keys():
|
|
|
|
matches = re.match(fr'{key}', endpoint)
|
|
|
|
if matches:
|
|
|
|
return openapi_spec.regex_keys()[key]
|
|
|
|
return None
|
2020-06-02 18:04:03 +02:00
|
|
|
|
2018-05-31 19:41:17 +02:00
|
|
|
def validate_against_openapi_schema(content: Dict[str, Any], endpoint: str,
|
2020-06-13 17:59:46 +02:00
|
|
|
method: str, response: str) -> bool:
|
2018-05-31 19:41:17 +02:00
|
|
|
"""Compare a "content" dict with the defined schema for a specific method
|
2020-06-13 17:59:46 +02:00
|
|
|
in an endpoint. Return true if validated and false if skipped.
|
2018-05-31 19:41:17 +02:00
|
|
|
"""
|
2020-07-01 19:07:31 +02:00
|
|
|
|
|
|
|
# This first set of checks are primarily training wheels that we
|
|
|
|
# hope to eliminate over time as we improve our API documentation.
|
|
|
|
|
2020-06-13 17:59:46 +02:00
|
|
|
# No 500 responses have been documented, so skip them
|
|
|
|
if response.startswith('5'):
|
|
|
|
return False
|
|
|
|
if endpoint not in openapi_spec.spec()['paths'].keys():
|
|
|
|
match = match_against_openapi_regex(endpoint)
|
|
|
|
# If it doesn't match it hasn't been documented yet.
|
|
|
|
if match is None:
|
|
|
|
return False
|
|
|
|
endpoint = match
|
|
|
|
# Excluded endpoint/methods
|
2020-07-01 19:07:31 +02:00
|
|
|
if endpoint + ':' + method in EXCLUDE_UNDOCUMENTED_ENDPOINTS:
|
2020-06-13 17:59:46 +02:00
|
|
|
return False
|
2020-07-01 19:07:31 +02:00
|
|
|
# Return true for endpoints with only response documentation remaining
|
|
|
|
if endpoint + ':' + method in EXCLUDE_DOCUMENTED_ENDPOINTS:
|
|
|
|
return True
|
2020-06-10 21:18:27 +02:00
|
|
|
# Check if the response matches its code
|
|
|
|
if response.startswith('2') and (content.get('result', 'success').lower() != 'success'):
|
|
|
|
raise SchemaError("Response is not 200 but is validating against 200 schema")
|
2020-07-01 19:07:31 +02:00
|
|
|
# Code is not declared but appears in various 400 responses. If
|
|
|
|
# common, it can be added to 400 response schema
|
2020-06-10 19:39:24 +02:00
|
|
|
if response.startswith('4'):
|
2020-07-01 19:07:31 +02:00
|
|
|
# This return statement should ideally be not here. But since
|
|
|
|
# we have not defined 400 responses for various paths this has
|
|
|
|
# been added as all 400 have the same schema. When all 400
|
|
|
|
# response have been defined this should be removed.
|
2020-06-13 17:59:46 +02:00
|
|
|
return True
|
2020-07-01 19:07:31 +02:00
|
|
|
|
|
|
|
# The actual work of validating that the response matches the
|
|
|
|
# schema is done via the third-party OAS30Validator.
|
2020-06-13 17:59:46 +02:00
|
|
|
schema = get_schema(endpoint, method, response)
|
2020-07-01 19:07:31 +02:00
|
|
|
validator = OAS30Validator(schema)
|
|
|
|
validator.validate(content)
|
2020-06-13 17:59:46 +02:00
|
|
|
return True
|
2020-06-02 18:04:03 +02:00
|
|
|
|
2020-07-01 19:07:31 +02:00
|
|
|
def validate_schema_array(schema: Dict[str, Any]) -> None:
|
|
|
|
"""
|
|
|
|
Helper function for validate_schema
|
|
|
|
"""
|
2020-06-02 18:04:03 +02:00
|
|
|
if 'oneOf' in schema['items']:
|
2020-06-20 19:25:32 +02:00
|
|
|
for oneof_schema in schema['items']['oneOf']:
|
|
|
|
if oneof_schema['type'] == 'array':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema_array(oneof_schema)
|
2020-06-20 19:25:32 +02:00
|
|
|
elif oneof_schema['type'] == 'object':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema(oneof_schema)
|
2020-06-02 18:04:03 +02:00
|
|
|
else:
|
2020-06-20 19:25:32 +02:00
|
|
|
if schema['items']['type'] == 'array':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema_array(schema['items'])
|
2020-06-20 19:25:32 +02:00
|
|
|
elif schema['items']['type'] == 'object':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema(schema['items'])
|
2020-06-20 19:25:32 +02:00
|
|
|
|
2020-07-01 19:07:31 +02:00
|
|
|
def validate_schema(schema: Dict[str, Any]) -> None:
|
|
|
|
"""Check if opaque objects are present in the OpenAPI spec; this is an
|
|
|
|
important part of our policy for ensuring every detail of Zulip's
|
|
|
|
API responses is correct.
|
2020-06-02 18:04:03 +02:00
|
|
|
|
2020-07-01 19:07:31 +02:00
|
|
|
This is done by checking for the presence of the
|
|
|
|
`additionalProperties` attribute for all objects (dictionaries).
|
|
|
|
"""
|
|
|
|
if 'additionalProperties' not in schema:
|
|
|
|
raise SchemaError('additionalProperties needs to be defined for objects to make' +
|
|
|
|
'sure they have no additional properties left to be documented.')
|
|
|
|
for key in schema.get('properties', dict()):
|
2020-06-10 19:39:24 +02:00
|
|
|
if 'oneOf' in schema['properties'][key]:
|
|
|
|
for types in schema['properties'][key]['oneOf']:
|
2020-06-20 19:25:32 +02:00
|
|
|
if types['type'] == 'object':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema(types)
|
2020-06-20 19:25:32 +02:00
|
|
|
elif types['type'] == 'array':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema_array(types)
|
2020-06-10 19:39:24 +02:00
|
|
|
else:
|
2020-06-20 19:25:32 +02:00
|
|
|
if schema['properties'][key]['type'] == 'object':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema(schema['properties'][key])
|
2020-06-20 19:25:32 +02:00
|
|
|
elif schema['properties'][key]['type'] == 'array':
|
2020-07-01 19:07:31 +02:00
|
|
|
validate_schema_array(schema['properties'][key])
|
|
|
|
if schema['additionalProperties']:
|
|
|
|
if schema['additionalProperties']['type'] == 'array':
|
|
|
|
validate_schema_array(schema['additionalProperties'])
|
|
|
|
elif schema['additionalProperties']['type'] == 'object':
|
|
|
|
validate_schema(schema['additionalProperties'])
|
2018-05-31 19:41:17 +02:00
|
|
|
|
|
|
|
def to_python_type(py_type: str) -> type:
|
2020-03-28 01:25:56 +01:00
|
|
|
"""Transform an OpenAPI-like type to a Python one.
|
2018-05-31 19:41:17 +02:00
|
|
|
https://swagger.io/docs/specification/data-models/data-types
|
|
|
|
"""
|
|
|
|
TYPES = {
|
|
|
|
'string': str,
|
|
|
|
'number': float,
|
|
|
|
'integer': int,
|
|
|
|
'boolean': bool,
|
|
|
|
'array': list,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
'object': dict,
|
2018-05-31 19:41:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return TYPES[py_type]
|
2020-06-26 16:18:27 +02:00
|
|
|
|
|
|
|
def likely_deprecated_parameter(parameter_description: str) -> bool:
|
|
|
|
if '**Changes**: Deprecated' in parameter_description:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return "**Deprecated**" in parameter_description
|