refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
import _ from "lodash";
|
|
|
|
|
2021-02-28 00:44:37 +01:00
|
|
|
import {FoldDict} from "./fold_dict";
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
import * as message_store from "./message_store";
|
2022-11-19 07:39:00 +01:00
|
|
|
import * as people from "./people";
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
|
|
|
|
// This class is only exported for unit testing purposes.
|
|
|
|
// If we find reuse opportunities, we should just put it into
|
|
|
|
// its own module.
|
|
|
|
export class IdTracker {
|
|
|
|
ids = new Set();
|
|
|
|
|
|
|
|
// We cache the max message id to make sure that
|
|
|
|
// typeahead code is efficient. We don't eagerly
|
|
|
|
// compute it, since it's plausible a spammy bot
|
|
|
|
// could cause us to process many messages at a time
|
|
|
|
// during fetching.
|
|
|
|
_cached_max_id = undefined;
|
|
|
|
|
|
|
|
add(id) {
|
|
|
|
this.ids.add(id);
|
|
|
|
if (this._cached_max_id !== undefined && id > this._cached_max_id) {
|
|
|
|
this._cached_max_id = id;
|
|
|
|
}
|
|
|
|
}
|
2019-02-08 11:56:33 +01:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
remove(id) {
|
|
|
|
this.ids.delete(id);
|
|
|
|
this._cached_max_id = undefined;
|
|
|
|
}
|
|
|
|
|
|
|
|
max_id() {
|
|
|
|
if (this._cached_max_id === undefined) {
|
2023-03-02 01:58:25 +01:00
|
|
|
this._cached_max_id = _.max([...this.ids]);
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
}
|
|
|
|
return this._cached_max_id || -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
empty() {
|
|
|
|
return this.ids.size === 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// topic_senders[stream_id][sender_id] = IdTracker
|
2020-05-09 02:27:41 +02:00
|
|
|
const stream_senders = new Map();
|
2017-06-01 07:42:57 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
// topic_senders[stream_id][topic_id][sender_id] = IdTracker
|
|
|
|
const topic_senders = new Map();
|
|
|
|
|
2022-11-19 07:39:00 +01:00
|
|
|
// pm_senders[user_ids_string][user_id] = IdTracker
|
|
|
|
const pm_senders = new Map();
|
|
|
|
|
2021-03-12 16:11:56 +01:00
|
|
|
export function clear_for_testing() {
|
|
|
|
stream_senders.clear();
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
topic_senders.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
function max_id_for_stream_topic_sender({stream_id, topic, sender_id}) {
|
|
|
|
const topic_dict = topic_senders.get(stream_id);
|
|
|
|
if (!topic_dict) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
const sender_dict = topic_dict.get(topic);
|
|
|
|
if (!sender_dict) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
const id_tracker = sender_dict.get(sender_id);
|
|
|
|
return id_tracker ? id_tracker.max_id() : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
function max_id_for_stream_sender({stream_id, sender_id}) {
|
|
|
|
const sender_dict = stream_senders.get(stream_id);
|
|
|
|
if (!sender_dict) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
const id_tracker = sender_dict.get(sender_id);
|
|
|
|
return id_tracker ? id_tracker.max_id() : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
function add_stream_message({stream_id, sender_id, message_id}) {
|
|
|
|
const sender_dict = stream_senders.get(stream_id) || new Map();
|
|
|
|
const id_tracker = sender_dict.get(sender_id) || new IdTracker();
|
|
|
|
stream_senders.set(stream_id, sender_dict);
|
|
|
|
sender_dict.set(sender_id, id_tracker);
|
|
|
|
id_tracker.add(message_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
function add_topic_message({stream_id, topic, sender_id, message_id}) {
|
|
|
|
const topic_dict = topic_senders.get(stream_id) || new FoldDict();
|
|
|
|
const sender_dict = topic_dict.get(topic) || new Map();
|
|
|
|
const id_tracker = sender_dict.get(sender_id) || new IdTracker();
|
|
|
|
topic_senders.set(stream_id, topic_dict);
|
|
|
|
topic_dict.set(topic, sender_dict);
|
|
|
|
sender_dict.set(sender_id, id_tracker);
|
|
|
|
id_tracker.add(message_id);
|
2021-03-12 16:11:56 +01:00
|
|
|
}
|
|
|
|
|
2022-11-22 11:45:29 +01:00
|
|
|
export function process_stream_message(message) {
|
2020-02-01 04:47:49 +01:00
|
|
|
const stream_id = message.stream_id;
|
2020-02-19 00:04:12 +01:00
|
|
|
const topic = message.topic;
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
const sender_id = message.sender_id;
|
|
|
|
const message_id = message.id;
|
2017-08-29 16:57:47 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
add_stream_message({stream_id, sender_id, message_id});
|
|
|
|
add_topic_message({stream_id, topic, sender_id, message_id});
|
|
|
|
}
|
2017-06-01 07:42:57 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
function remove_topic_message({stream_id, topic, sender_id, message_id}) {
|
|
|
|
const topic_dict = topic_senders.get(stream_id);
|
|
|
|
if (!topic_dict) {
|
|
|
|
return;
|
2017-06-01 07:42:57 +02:00
|
|
|
}
|
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
const sender_dict = topic_dict.get(topic);
|
|
|
|
|
|
|
|
if (!sender_dict) {
|
|
|
|
return;
|
|
|
|
}
|
2017-08-29 16:57:47 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
const id_tracker = sender_dict.get(sender_id);
|
2017-08-29 16:57:47 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
if (!id_tracker) {
|
|
|
|
return;
|
2017-08-29 16:57:47 +02:00
|
|
|
}
|
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
id_tracker.remove(message_id);
|
|
|
|
if (id_tracker.empty()) {
|
|
|
|
sender_dict.delete(sender_id);
|
|
|
|
}
|
2017-06-01 07:42:57 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
if (sender_dict.size === 0) {
|
|
|
|
topic_dict.delete(topic);
|
2020-05-01 08:29:08 +02:00
|
|
|
}
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
}
|
2020-05-01 08:29:08 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
export function process_topic_edit({
|
|
|
|
message_ids,
|
|
|
|
old_stream_id,
|
|
|
|
old_topic,
|
|
|
|
new_stream_id,
|
|
|
|
new_topic,
|
|
|
|
}) {
|
2020-05-01 08:29:08 +02:00
|
|
|
// Note that we don't delete anything from stream_senders here.
|
|
|
|
// Our view is that it's probably better to not do so; users who
|
|
|
|
// recently posted to a stream are relevant for typeahead even if
|
|
|
|
// the messages were moved to another stream or deleted.
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
|
|
|
|
for (const message_id of message_ids) {
|
|
|
|
const message = message_store.get(message_id);
|
|
|
|
if (!message) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
const sender_id = message.sender_id;
|
|
|
|
|
|
|
|
remove_topic_message({stream_id: old_stream_id, topic: old_topic, sender_id, message_id});
|
|
|
|
add_topic_message({stream_id: new_stream_id, topic: new_topic, sender_id, message_id});
|
|
|
|
|
|
|
|
add_stream_message({stream_id: new_stream_id, sender_id, message_id});
|
|
|
|
}
|
2021-02-28 00:44:37 +01:00
|
|
|
}
|
2020-05-01 08:29:08 +02:00
|
|
|
|
2021-02-28 00:44:37 +01:00
|
|
|
export function update_topics_of_deleted_message_ids(message_ids) {
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
for (const message_id of message_ids) {
|
|
|
|
const message = message_store.get(message_id);
|
|
|
|
if (!message) {
|
|
|
|
continue;
|
2020-07-15 09:36:03 +02:00
|
|
|
}
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
|
|
|
|
const stream_id = message.stream_id;
|
|
|
|
const topic = message.topic;
|
|
|
|
const sender_id = message.sender_id;
|
|
|
|
|
|
|
|
remove_topic_message({stream_id, topic, sender_id, message_id});
|
2020-07-15 09:36:03 +02:00
|
|
|
}
|
2021-02-28 00:44:37 +01:00
|
|
|
}
|
2020-07-15 09:36:03 +02:00
|
|
|
|
2021-02-28 00:44:37 +01:00
|
|
|
export function compare_by_recency(user_a, user_b, stream_id, topic) {
|
2019-11-02 00:06:25 +01:00
|
|
|
let a_message_id;
|
|
|
|
let b_message_id;
|
2017-08-29 16:57:47 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
a_message_id = max_id_for_stream_topic_sender({stream_id, topic, sender_id: user_a.user_id});
|
|
|
|
b_message_id = max_id_for_stream_topic_sender({stream_id, topic, sender_id: user_b.user_id});
|
2017-08-29 16:57:47 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
if (a_message_id !== b_message_id) {
|
|
|
|
return b_message_id - a_message_id;
|
2017-08-29 16:57:47 +02:00
|
|
|
}
|
2017-07-28 19:39:44 +02:00
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
a_message_id = max_id_for_stream_sender({stream_id, sender_id: user_a.user_id});
|
|
|
|
b_message_id = max_id_for_stream_sender({stream_id, sender_id: user_b.user_id});
|
|
|
|
|
|
|
|
return b_message_id - a_message_id;
|
2021-02-28 00:44:37 +01:00
|
|
|
}
|
2017-06-01 07:42:57 +02:00
|
|
|
|
2021-02-28 00:44:37 +01:00
|
|
|
export function get_topic_recent_senders(stream_id, topic) {
|
2020-05-01 08:29:08 +02:00
|
|
|
const topic_dict = topic_senders.get(stream_id);
|
|
|
|
if (topic_dict === undefined) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
const sender_dict = topic_dict.get(topic);
|
|
|
|
if (sender_dict === undefined) {
|
2020-05-01 08:29:08 +02:00
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
function by_max_message_id(item1, item2) {
|
|
|
|
const list1 = item1[1];
|
|
|
|
const list2 = item2[1];
|
2022-11-24 07:37:21 +01:00
|
|
|
return list2.max_id() - list1.max_id();
|
refactor: Simplify recent_senders code.
This reduces our dependency on message_list code (via
message_util), and it makes moving streams/topics and
deleting messages more performant.
For every single message that was being updated or
deleted, the previous code was basically re-computing
lots of things, including having to iterate through
every message in memory to find the messages matching
your topic.
Now everything basically happens in O(1) time.
The only O(N) computation is that we now lazily
re-compute the max message id every time you need it
for typeahead logic, and then we cache it for
subsequent use. The N here is the number of messages
that the particular sender has sent to the particular
stream/topic combination, so it should always be quite
small, except for certain spammy bots.
Once the max has been calculated, the common operation
of adding a message doesn't invalidate our cached
value. We only invalidate the cache on deletes.
The main change that we make here from a data
standpoint is that we just keep track of all
message_ids for all senders. The storage overhead here
should be negligible. By keeping track of our own
messages, we don't have to punt to other code for
update/delete situations.
There is similar code in recent_topics that I think can
be improved in similar ways, and it would allow us to
eliminate functions like this one:
export function get_messages_in_topic(stream_id, topic) {
return message_list.all
.all_messages()
.filter(
(x) =>
x.type === "stream" &&
x.stream_id === stream_id &&
x.topic.toLowerCase() === topic.toLowerCase(),
);
}
2021-03-29 19:42:44 +02:00
|
|
|
}
|
|
|
|
|
2023-03-02 01:58:25 +01:00
|
|
|
const sorted_senders = [...sender_dict.entries()].sort(by_max_message_id);
|
2020-05-01 08:29:08 +02:00
|
|
|
const recent_senders = [];
|
|
|
|
for (const item of sorted_senders) {
|
|
|
|
recent_senders.push(item[0]);
|
|
|
|
}
|
|
|
|
return recent_senders;
|
2021-02-28 00:44:37 +01:00
|
|
|
}
|
2022-11-19 07:39:00 +01:00
|
|
|
|
|
|
|
export function process_private_message({to_user_ids, sender_id, id}) {
|
|
|
|
const sender_dict = pm_senders.get(to_user_ids) || new Map();
|
|
|
|
const id_tracker = sender_dict.get(sender_id) || new IdTracker();
|
|
|
|
pm_senders.set(to_user_ids, sender_dict);
|
|
|
|
sender_dict.set(sender_id, id_tracker);
|
|
|
|
id_tracker.add(id);
|
|
|
|
}
|
|
|
|
|
|
|
|
export function get_pm_recent_senders(user_ids_string) {
|
2023-02-24 21:43:42 +01:00
|
|
|
const user_ids = [...people.get_participants_from_user_ids_string(user_ids_string)];
|
2022-11-19 07:39:00 +01:00
|
|
|
const sender_dict = pm_senders.get(user_ids_string);
|
|
|
|
const pm_senders_info = {participants: [], non_participants: []};
|
|
|
|
if (!sender_dict) {
|
|
|
|
return pm_senders_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
function compare_pm_user_ids_by_recency(user_id1, user_id2) {
|
|
|
|
const max_id1 = sender_dict.get(user_id1)?.max_id() || -1;
|
|
|
|
const max_id2 = sender_dict.get(user_id2)?.max_id() || -1;
|
|
|
|
return max_id2 - max_id1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pm_senders_info.non_participants = user_ids.filter((user_id) => {
|
|
|
|
if (sender_dict.get(user_id)) {
|
|
|
|
pm_senders_info.participants.push(user_id);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
});
|
|
|
|
pm_senders_info.participants.sort(compare_pm_user_ids_by_recency);
|
|
|
|
return pm_senders_info;
|
|
|
|
}
|
2023-11-24 15:06:42 +01:00
|
|
|
|
|
|
|
export function get_topic_message_ids_for_sender(stream_id, topic, sender_id) {
|
|
|
|
const id_tracker = topic_senders?.get(stream_id)?.get(topic)?.get(sender_id);
|
|
|
|
if (id_tracker === undefined) {
|
|
|
|
return new Set();
|
|
|
|
}
|
|
|
|
return id_tracker.ids;
|
|
|
|
}
|