presence: Add history_limit_days param to the API.

This param allows clients to specify how much presence history they want
to fetch. Previously, the server always returned 14 days of history.
With the recent migration of the presence API to the much more efficient
system relying on incremental fetches via the last_update_id param added
in #29999, we can now afford to provide much more history to clients
that request it - as all that historical data will only be fetched once.

There are three endpoints involved:
- `/register` - this is the main useful endpoint for this, used by API
clients to fetch initial data and register an events queue. Clients can
pass the `presence_history_limit_days` param here.
- `/users/me/presence` - this endpoint is currently used by clients to
update their presence status and fetch incremental data, making the new
functionality not particularly useful here. However, we still add the
new `history_limit_days` param here, in case in the future clients
transition to using this also for the initial presence data fetch.
- `/` - used when opening the webapp. Naturally, params aren't passed
here, so the server just assumes a value from
`settings.PRESENCE_HISTORY_LIMIT_DAYS_FOR_WEB_APP` and returns
information about this default value in page_params.
This commit is contained in:
Mateusz Mandera 2024-08-30 00:55:09 +02:00 committed by Tim Abbott
parent 6ce096c0ff
commit a36f906d1a
14 changed files with 150 additions and 24 deletions

View File

@ -20,6 +20,17 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 10.0 ## Changes in Zulip 10.0
**Feature level 288**
* [`POST /register`](/api/register-queue):
A new `presence_history_limit_days` parameter can be given, instructing
the server to only fetch presence data more recent than the given
number of days ago.
* [`POST /users/me/presence`](/api/update-presence):
A new `history_limit_days` parameter can be given, with the
same meaning as in the `presence_history_limit_days` parameter of
[`POST /register`](/api/register-queue) above.
**Feature level 287** **Feature level 287**
* [Markdown message * [Markdown message

View File

@ -34,8 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# new level means in api_docs/changelog.md, as well as "**Changes**" # new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`. # entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 288 # Last bumped for presence_history_limit_days
API_FEATURE_LEVEL = 287 # original-dimensions on spinner
# Bump the minor PROVISION_VERSION to indicate that folks should provision # Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump # only when going from an old version of the code to a newer version. Bump

View File

@ -43,6 +43,7 @@ const home_params_schema = default_params_schema
narrow: z.optional(z.array(narrow_term_schema)), narrow: z.optional(z.array(narrow_term_schema)),
narrow_stream: z.optional(z.string()), narrow_stream: z.optional(z.string()),
narrow_topic: z.optional(z.string()), narrow_topic: z.optional(z.string()),
presence_history_limit_days_for_web_app: z.number(),
promote_sponsoring_zulip: z.boolean(), promote_sponsoring_zulip: z.boolean(),
// `realm_rendered_description` is only sent for spectators, because // `realm_rendered_description` is only sent for spectators, because
// it isn't displayed for logged-in users and requires markdown // it isn't displayed for logged-in users and requires markdown

View File

@ -1,7 +1,10 @@
import assert from "minimalistic-assert";
import * as hash_util from "./hash_util"; import * as hash_util from "./hash_util";
import {$t} from "./i18n"; import {$t} from "./i18n";
import * as muted_users from "./muted_users"; import * as muted_users from "./muted_users";
import * as narrow_state from "./narrow_state"; import * as narrow_state from "./narrow_state";
import {page_params} from "./page_params";
import * as people from "./people"; import * as people from "./people";
import * as presence from "./presence"; import * as presence from "./presence";
import {realm} from "./state_data"; import {realm} from "./state_data";
@ -142,13 +145,10 @@ export function user_last_seen_time_status(user_id: number): string {
return $t({defaultMessage: "Activity unknown"}); return $t({defaultMessage: "Activity unknown"});
} else if (last_active_date === undefined) { } else if (last_active_date === undefined) {
// There are situations where the client has incomplete presence // There are situations where the client has incomplete presence
// history on a user. This can happen when users are deactivated, // history on a user. This can happen when users are deactivated,
// or when they just haven't been present in a long time (and we // or when the user's last activity is older than what we fetch.
// may have queries on presence that go back only N weeks). assert(page_params.presence_history_limit_days_for_web_app === 365);
// return $t({defaultMessage: "Not active in the last year"});
// We give this vague status for such users; we will get to
// delete this code when we finish rewriting the presence API.
return $t({defaultMessage: "Active more than 2 weeks ago"});
} }
return timerender.last_seen_status_from_date(last_active_date); return timerender.last_seen_status_from_date(last_active_date);
} }

View File

@ -6,7 +6,7 @@ const _ = require("lodash");
const {mock_esm, zrequire} = require("./lib/namespace"); const {mock_esm, zrequire} = require("./lib/namespace");
const {run_test} = require("./lib/test"); const {run_test} = require("./lib/test");
const {current_user, realm, user_settings} = require("./lib/zpage_params"); const {current_user, page_params, realm, user_settings} = require("./lib/zpage_params");
mock_esm("../src/settings_data", { mock_esm("../src/settings_data", {
user_can_access_all_other_users: () => true, user_can_access_all_other_users: () => true,
@ -160,6 +160,7 @@ test("user_circle, level", () => {
}); });
test("title_data", () => { test("title_data", () => {
page_params.presence_history_limit_days_for_web_app = 365;
add_canned_users(); add_canned_users();
// Groups // Groups
@ -210,7 +211,7 @@ test("title_data", () => {
expected_data = { expected_data = {
first_line: "Old User", first_line: "Old User",
second_line: "translated: Active more than 2 weeks ago", second_line: "translated: Not active in the last year",
third_line: "", third_line: "",
show_you: false, show_you: false,
}; };
@ -398,6 +399,7 @@ test("compare_function", () => {
}); });
test("user_last_seen_time_status", ({override}) => { test("user_last_seen_time_status", ({override}) => {
page_params.presence_history_limit_days_for_web_app = 365;
set_presence(selma.user_id, "active"); set_presence(selma.user_id, "active");
set_presence(me.user_id, "active"); set_presence(me.user_id, "active");
@ -411,7 +413,7 @@ test("user_last_seen_time_status", ({override}) => {
realm.realm_is_zephyr_mirror_realm = false; realm.realm_is_zephyr_mirror_realm = false;
assert.equal( assert.equal(
buddy_data.user_last_seen_time_status(old_user.user_id), buddy_data.user_last_seen_time_status(old_user.user_id),
"translated: Active more than 2 weeks ago", "translated: Not active in the last year",
); );
presence.presence_info.set(old_user.user_id, {last_active: 1526137743}); presence.presence_info.set(old_user.user_id, {last_active: 1526137743});

View File

@ -135,6 +135,7 @@ def fetch_initial_state_data(
user_settings_object: bool = False, user_settings_object: bool = False,
slim_presence: bool = False, slim_presence: bool = False,
presence_last_update_id_fetched_by_client: int | None = None, presence_last_update_id_fetched_by_client: int | None = None,
presence_history_limit_days: int | None = None,
include_subscribers: bool = True, include_subscribers: bool = True,
include_streams: bool = True, include_streams: bool = True,
spectator_requested_language: str | None = None, spectator_requested_language: str | None = None,
@ -247,6 +248,7 @@ def fetch_initial_state_data(
realm, realm,
slim_presence, slim_presence,
last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client, last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client,
history_limit_days=presence_history_limit_days,
requesting_user_profile=user_profile, requesting_user_profile=user_profile,
) )
state["presences"] = presences state["presences"] = presences
@ -1676,6 +1678,7 @@ def do_events_register(
client_gravatar: bool = False, client_gravatar: bool = False,
slim_presence: bool = False, slim_presence: bool = False,
presence_last_update_id_fetched_by_client: int | None = None, presence_last_update_id_fetched_by_client: int | None = None,
presence_history_limit_days: int | None = None,
event_types: Sequence[str] | None = None, event_types: Sequence[str] | None = None,
queue_lifespan_secs: int = 0, queue_lifespan_secs: int = 0,
all_public_streams: bool = False, all_public_streams: bool = False,
@ -1736,6 +1739,7 @@ def do_events_register(
# These presence params are a noop, because presence is not included. # These presence params are a noop, because presence is not included.
slim_presence=True, slim_presence=True,
presence_last_update_id_fetched_by_client=None, presence_last_update_id_fetched_by_client=None,
presence_history_limit_days=None,
# Force include_subscribers=False for security reasons. # Force include_subscribers=False for security reasons.
include_subscribers=include_subscribers, include_subscribers=include_subscribers,
# Force include_streams=False for security reasons. # Force include_streams=False for security reasons.
@ -1784,6 +1788,7 @@ def do_events_register(
user_settings_object=user_settings_object, user_settings_object=user_settings_object,
slim_presence=slim_presence, slim_presence=slim_presence,
presence_last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client, presence_last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client,
presence_history_limit_days=presence_history_limit_days,
include_subscribers=include_subscribers, include_subscribers=include_subscribers,
include_streams=include_streams, include_streams=include_streams,
pronouns_field_type_supported=pronouns_field_type_supported, pronouns_field_type_supported=pronouns_field_type_supported,

View File

@ -168,6 +168,7 @@ def build_page_params_for_home_page_load(
client_gravatar=True, client_gravatar=True,
slim_presence=True, slim_presence=True,
presence_last_update_id_fetched_by_client=-1, presence_last_update_id_fetched_by_client=-1,
presence_history_limit_days=settings.PRESENCE_HISTORY_LIMIT_DAYS_FOR_WEB_APP,
client_capabilities=client_capabilities, client_capabilities=client_capabilities,
narrow=narrow, narrow=narrow,
include_streams=False, include_streams=False,
@ -224,6 +225,7 @@ def build_page_params_for_home_page_load(
# 2FA is not enabled. # 2FA is not enabled.
two_fa_enabled_user=two_fa_enabled and bool(default_device(user_profile)), two_fa_enabled_user=two_fa_enabled and bool(default_device(user_profile)),
is_spectator=user_profile is None, is_spectator=user_profile is None,
presence_history_limit_days_for_web_app=settings.PRESENCE_HISTORY_LIMIT_DAYS_FOR_WEB_APP,
# There is no event queue for spectators since # There is no event queue for spectators since
# events support for spectators is not implemented yet. # events support for spectators is not implemented yet.
no_event_queue=user_profile is None, no_event_queue=user_profile is None,

View File

@ -151,23 +151,41 @@ def get_presence_dict_by_realm(
realm: Realm, realm: Realm,
slim_presence: bool = False, slim_presence: bool = False,
last_update_id_fetched_by_client: int | None = None, last_update_id_fetched_by_client: int | None = None,
history_limit_days: int | None = None,
requesting_user_profile: UserProfile | None = None, requesting_user_profile: UserProfile | None = None,
) -> tuple[dict[str, dict[str, Any]], int]: ) -> tuple[dict[str, dict[str, Any]], int]:
two_weeks_ago = timezone_now() - timedelta(weeks=2) now = timezone_now()
if history_limit_days is not None:
fetch_since_datetime = now - timedelta(days=history_limit_days)
else:
# The original behavior for this API was to return last two weeks
# of data at most, so we preserve that when the history_limit_days
# param is not provided.
fetch_since_datetime = now - timedelta(days=14)
kwargs: dict[str, object] = dict() kwargs: dict[str, object] = dict()
if last_update_id_fetched_by_client is not None: if last_update_id_fetched_by_client is not None:
kwargs["last_update_id__gt"] = last_update_id_fetched_by_client kwargs["last_update_id__gt"] = last_update_id_fetched_by_client
query = UserPresence.objects.filter( if last_update_id_fetched_by_client is None or last_update_id_fetched_by_client <= 0:
realm_id=realm.id, # If the client already has fetched some presence data, as indicated by
user_profile__is_active=True, # last_update_id_fetched_by_client, then filtering by last_connected_time
user_profile__is_bot=False, # is redundant, as it shouldn't affect the results.
# We can consider tweaking this value when last_update_id is being used, kwargs["last_connected_time__gte"] = fetch_since_datetime
# to potentially fetch more data since such a client is expected to only
# do it once and then only do small, incremental fetches. if history_limit_days != 0:
last_connected_time__gte=two_weeks_ago, query = UserPresence.objects.filter(
**kwargs, realm_id=realm.id,
) user_profile__is_active=True,
user_profile__is_bot=False,
**kwargs,
)
else:
# If history_limit_days is 0, the client doesn't want any presence data.
# Explicitly return an empty QuerySet to avoid a query or races which
# might cause a UserPresence row to get fetched if it gets updated
# during the execution of this function.
query = UserPresence.objects.none()
if settings.CAN_ACCESS_ALL_USERS_GROUP_LIMITS_PRESENCE and not check_user_can_access_all_users( if settings.CAN_ACCESS_ALL_USERS_GROUP_LIMITS_PRESENCE and not check_user_can_access_all_users(
requesting_user_profile requesting_user_profile
@ -213,6 +231,7 @@ def get_presences_for_realm(
realm: Realm, realm: Realm,
slim_presence: bool, slim_presence: bool,
last_update_id_fetched_by_client: int | None, last_update_id_fetched_by_client: int | None,
history_limit_days: int | None,
requesting_user_profile: UserProfile, requesting_user_profile: UserProfile,
) -> tuple[dict[str, dict[str, dict[str, Any]]], int]: ) -> tuple[dict[str, dict[str, dict[str, Any]]], int]:
if realm.presence_disabled: if realm.presence_disabled:
@ -223,6 +242,7 @@ def get_presences_for_realm(
realm, realm,
slim_presence, slim_presence,
last_update_id_fetched_by_client, last_update_id_fetched_by_client,
history_limit_days,
requesting_user_profile=requesting_user_profile, requesting_user_profile=requesting_user_profile,
) )
@ -231,6 +251,7 @@ def get_presence_response(
requesting_user_profile: UserProfile, requesting_user_profile: UserProfile,
slim_presence: bool, slim_presence: bool,
last_update_id_fetched_by_client: int | None = None, last_update_id_fetched_by_client: int | None = None,
history_limit_days: int | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
realm = requesting_user_profile.realm realm = requesting_user_profile.realm
server_timestamp = time.time() server_timestamp = time.time()
@ -238,6 +259,7 @@ def get_presence_response(
realm, realm,
slim_presence, slim_presence,
last_update_id_fetched_by_client, last_update_id_fetched_by_client,
history_limit_days,
requesting_user_profile=requesting_user_profile, requesting_user_profile=requesting_user_profile,
) )

View File

@ -9190,6 +9190,21 @@ paths:
server sent user presence data for all users who had been active in the server sent user presence data for all users who had been active in the
last two weeks unconditionally. last two weeks unconditionally.
example: 5 example: 5
history_limit_days:
type: integer
description: |
Limits how far back in time to fetch user presence data. If not specified,
defaults to 14 days. A value of N means that the oldest presence data
fetched will be from at most N days ago.
Note that this is only useful during the initial user presence data fetch,
as subsequent fetches should use the `last_update_id` parameter, which
will act as the limit on how much presence data is returned. `history_limit_days`
is ignored if `last_update_id` is passed with a value greater than `0`,
indicating that the client already has some presence data.
**Changes**: New in Zulip 10.0 (feature level 288).
example: 365
new_user_input: new_user_input:
type: boolean type: boolean
description: | description: |
@ -13350,6 +13365,15 @@ paths:
type: boolean type: boolean
default: false default: false
example: true example: true
presence_history_limit_days:
description: |
Limits how far back in time to fetch user presence data. If not specified,
defaults to 14 days. A value of N means that the oldest presence data
fetched will be from at most N days ago.
**Changes**: New in Zulip 10.0 (feature level 288).
type: integer
example: 365
event_types: event_types:
$ref: "#/components/schemas/Event_types" $ref: "#/components/schemas/Event_types"
all_public_streams: all_public_streams:

View File

@ -57,6 +57,7 @@ class HomeTest(ZulipTestCase):
"narrow_stream", "narrow_stream",
"no_event_queue", "no_event_queue",
"page_type", "page_type",
"presence_history_limit_days_for_web_app",
"promote_sponsoring_zulip", "promote_sponsoring_zulip",
"request_language", "request_language",
"show_billing", "show_billing",
@ -362,6 +363,7 @@ class HomeTest(ZulipTestCase):
"login_page", "login_page",
"no_event_queue", "no_event_queue",
"page_type", "page_type",
"presence_history_limit_days_for_web_app",
"promote_sponsoring_zulip", "promote_sponsoring_zulip",
"realm_rendered_description", "realm_rendered_description",
"request_language", "request_language",

View File

@ -248,6 +248,53 @@ class UserPresenceTests(ZulipTestCase):
result = self.client_post("/json/users/me/presence", {"status": "foo"}) result = self.client_post("/json/users/me/presence", {"status": "foo"})
self.assert_json_error(result, "Invalid status: foo") self.assert_json_error(result, "Invalid status: foo")
def test_history_limit_days_api(self) -> None:
UserPresence.objects.all().delete()
hamlet = self.example_user("hamlet")
othello = self.example_user("othello")
iago = self.example_user("iago")
self.login_user(hamlet)
params = dict(status="idle", last_update_id=-1)
result = self.client_post("/json/users/me/presence", params)
self.assert_json_success(result)
self.login_user(othello)
params = dict(status="idle", last_update_id=-1)
result = self.client_post("/json/users/me/presence", params)
self.assert_json_success(result)
othello_presence = UserPresence.objects.get(user_profile=othello)
assert othello_presence.last_connected_time is not None
othello_presence.last_connected_time = othello_presence.last_connected_time - timedelta(
days=100
)
othello_presence.save()
# The initial presence state has been set up for the test. Now we can proceed with verifying
# the behavior of the history_limit_days parameter.
self.login_user(iago)
params = dict(status="idle", last_update_id=-1, history_limit_days=50)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
presences = json["presences"]
self.assertEqual(set(presences.keys()), {str(hamlet.id), str(iago.id)})
params = dict(status="idle", last_update_id=-1, history_limit_days=101)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
presences = json["presences"]
self.assertEqual(set(presences.keys()), {str(hamlet.id), str(iago.id), str(othello.id)})
# history_limit_days=0 means the client doesn't want any presence data.
params = dict(status="idle", last_update_id=-1, history_limit_days=0)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
presences = json["presences"]
self.assertEqual(set(presences.keys()), set())
def test_last_update_id_api(self) -> None: def test_last_update_id_api(self) -> None:
UserPresence.objects.all().delete() UserPresence.objects.all().delete()

View File

@ -43,6 +43,7 @@ def events_register_backend(
apply_markdown: Json[bool] = False, apply_markdown: Json[bool] = False,
client_gravatar_raw: Annotated[Json[bool | None], ApiParamConfig("client_gravatar")] = None, client_gravatar_raw: Annotated[Json[bool | None], ApiParamConfig("client_gravatar")] = None,
slim_presence: Json[bool] = False, slim_presence: Json[bool] = False,
presence_history_limit_days: Json[int] | None = None,
all_public_streams: Json[bool] | None = None, all_public_streams: Json[bool] | None = None,
include_subscribers: Json[bool] = False, include_subscribers: Json[bool] = False,
client_capabilities: Json[ClientCapabilities] | None = None, client_capabilities: Json[ClientCapabilities] | None = None,
@ -120,6 +121,7 @@ def events_register_backend(
client_gravatar, client_gravatar,
slim_presence, slim_presence,
None, None,
presence_history_limit_days,
event_types, event_types,
queue_lifespan_secs, queue_lifespan_secs,
all_public_streams, all_public_streams,

View File

@ -154,6 +154,7 @@ def update_active_status_backend(
new_user_input: Json[bool] = False, new_user_input: Json[bool] = False,
slim_presence: Json[bool] = False, slim_presence: Json[bool] = False,
last_update_id: Json[int] | None = None, last_update_id: Json[int] | None = None,
history_limit_days: Json[int] | None = None,
) -> HttpResponse: ) -> HttpResponse:
if last_update_id is not None: if last_update_id is not None:
# This param being submitted by the client, means they want to use # This param being submitted by the client, means they want to use
@ -172,7 +173,10 @@ def update_active_status_backend(
ret: dict[str, Any] = {} ret: dict[str, Any] = {}
else: else:
ret = get_presence_response( ret = get_presence_response(
user_profile, slim_presence, last_update_id_fetched_by_client=last_update_id user_profile,
slim_presence,
last_update_id_fetched_by_client=last_update_id,
history_limit_days=history_limit_days,
) )
if user_profile.realm.is_zephyr_mirror_realm: if user_profile.realm.is_zephyr_mirror_realm:

View File

@ -574,6 +574,11 @@ PRESENCE_UPDATE_MIN_FREQ_SECONDS = 55
# we will specify ACTIVE status as long as the timedelta is within this limit and IDLE otherwise. # we will specify ACTIVE status as long as the timedelta is within this limit and IDLE otherwise.
PRESENCE_LEGACY_EVENT_OFFSET_FOR_ACTIVITY_SECONDS = 70 PRESENCE_LEGACY_EVENT_OFFSET_FOR_ACTIVITY_SECONDS = 70
# The web app doesn't pass params to / when initially loading, so it can't directly
# pick its history_limit_days value. Instead, the server chooses the value and
# passes it to the web app in page_params.
PRESENCE_HISTORY_LIMIT_DAYS_FOR_WEB_APP = 365
# How many days deleted messages data should be kept before being # How many days deleted messages data should be kept before being
# permanently deleted. # permanently deleted.
ARCHIVED_DATA_VACUUMING_DELAY_DAYS = 30 ARCHIVED_DATA_VACUUMING_DELAY_DAYS = 30