-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Re-use work of getting state for a given state_group (_get_state_groups_from_groups)
#15617
Changes from all commits
4676e53
6a19afc
02a9959
5704e3b
3d80449
ab576b6
1f60fcb
333fc51
17aeee7
7abb745
79e6d61
4b2a5fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Make `/messages` faster by efficiently grabbing state out of database whenever we have to backfill and process new events. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ | |
| # limitations under the License. | ||
|
|
||
| import logging | ||
| from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union | ||
| from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Set, Tuple, Union | ||
|
|
||
| from synapse.storage._base import SQLBaseStore | ||
| from synapse.storage.database import ( | ||
|
|
@@ -89,6 +89,18 @@ def _get_state_groups_from_groups_txn( | |
| groups: List[int], | ||
| state_filter: Optional[StateFilter] = None, | ||
| ) -> Mapping[int, StateMap[str]]: | ||
| """ | ||
| Given a number of state groups, fetch the latest state for each group. | ||
|
|
||
| Args: | ||
| txn: The transaction object. | ||
| groups: The given state groups that you want to fetch the latest state for. | ||
| state_filter: The state filter to apply the state we fetch state from the database. | ||
|
|
||
| Returns: | ||
| Map from state_group to a StateMap at that point. | ||
| """ | ||
|
|
||
| state_filter = state_filter or StateFilter.all() | ||
|
|
||
| results: Dict[int, MutableStateMap[str]] = {group: {} for group in groups} | ||
|
|
@@ -98,24 +110,49 @@ def _get_state_groups_from_groups_txn( | |
| # a temporary hack until we can add the right indices in | ||
| txn.execute("SET LOCAL enable_seqscan=off") | ||
|
|
||
| # The below query walks the state_group tree so that the "state" | ||
| # The query below walks the state_group tree so that the "state" | ||
| # table includes all state_groups in the tree. It then joins | ||
| # against `state_groups_state` to fetch the latest state. | ||
| # It assumes that previous state groups are always numerically | ||
| # lesser. | ||
| # This may return multiple rows per (type, state_key), but last_value | ||
| # should be the same. | ||
| sql = """ | ||
| WITH RECURSIVE sgs(state_group) AS ( | ||
| VALUES(?::bigint) | ||
| WITH RECURSIVE sgs(state_group, state_group_reached) AS ( | ||
| VALUES(?::bigint, NULL::bigint) | ||
| UNION ALL | ||
| SELECT prev_state_group FROM state_group_edges e, sgs s | ||
| WHERE s.state_group = e.state_group | ||
| SELECT | ||
| prev_state_group, | ||
| CASE | ||
| /* Specify state_groups we have already done the work for */ | ||
| WHEN @prev_state_group IN (%s /* state_groups_we_have_already_fetched_string */) THEN prev_state_group | ||
| ELSE NULL | ||
| END AS state_group_reached | ||
| FROM | ||
| state_group_edges e, sgs s | ||
| WHERE | ||
| s.state_group = e.state_group | ||
| /* Stop when we connect up to another state_group that we already did the work for */ | ||
| AND s.state_group_reached IS NULL | ||
| ) | ||
| %s | ||
| %s /* overall_select_clause */ | ||
| """ | ||
|
|
||
| overall_select_query_args: List[Union[int, str]] = [] | ||
| # Make sure we always have a row that tells us if we linked up to another | ||
| # state_group chain that we already processed (indicated by | ||
| # `state_group_reached`) regardless of whether we find any state according | ||
| # to the state_filter. | ||
| # | ||
| # We use a `UNION ALL` to make sure it is always the first row returned. | ||
| # `UNION` will merge and sort in with the rows from the next query | ||
| # otherwise. | ||
| overall_select_clause = """ | ||
| ( | ||
| SELECT NULL, NULL, NULL, state_group_reached | ||
| FROM sgs | ||
| ORDER BY state_group ASC | ||
| LIMIT 1 | ||
| ) UNION ALL (%s /* main_select_clause */) | ||
| """ | ||
|
|
||
| # This is an optimization to create a select clause per-condition. This | ||
| # makes the query planner a lot smarter on what rows should pull out in the | ||
|
|
@@ -154,7 +191,7 @@ def _get_state_groups_from_groups_txn( | |
| f""" | ||
| ( | ||
| SELECT DISTINCT ON (type, state_key) | ||
| type, state_key, event_id | ||
| type, state_key, event_id, state_group | ||
| FROM state_groups_state | ||
| INNER JOIN sgs USING (state_group) | ||
| WHERE {where_clause} | ||
|
|
@@ -163,7 +200,7 @@ def _get_state_groups_from_groups_txn( | |
| """ | ||
| ) | ||
|
|
||
| overall_select_clause = " UNION ".join(select_clause_list) | ||
| main_select_clause = " UNION ".join(select_clause_list) | ||
| else: | ||
| where_clause, where_args = state_filter.make_sql_filter_clause() | ||
| # Unless the filter clause is empty, we're going to append it after an | ||
|
|
@@ -173,25 +210,83 @@ def _get_state_groups_from_groups_txn( | |
|
|
||
| overall_select_query_args.extend(where_args) | ||
|
|
||
| overall_select_clause = f""" | ||
| main_select_clause = f""" | ||
| SELECT DISTINCT ON (type, state_key) | ||
| type, state_key, event_id | ||
| type, state_key, event_id, state_group | ||
| FROM state_groups_state | ||
| WHERE state_group IN ( | ||
| SELECT state_group FROM sgs | ||
| ) {where_clause} | ||
| ORDER BY type, state_key, state_group DESC | ||
| """ | ||
|
|
||
| for group in groups: | ||
| # We can sort from least to greatest state_group and re-use the work from a | ||
| # lesser state_group for a greater one if we see that the edge chain links | ||
| # up. | ||
| # | ||
| # What this means in practice is that if we fetch the latest state for | ||
| # `state_group = 20`, and then we want `state_group = 30`, it will traverse | ||
| # down the edge chain to `20`, see that we linked up to `20` and bail out | ||
| # early and re-use the work we did for `20`. This can have massive savings | ||
| # in rooms like Matrix HQ where the edge chain is 88k events long and | ||
| # fetching the mostly-same chain over and over isn't very efficient. | ||
| sorted_groups = sorted(groups) | ||
| state_groups_we_have_already_fetched: Set[int] = { | ||
| # We default to `[-1]` just to fill in the query with something that | ||
| # will have no effect but not bork our query when it would be empty | ||
| # otherwise | ||
| -1 | ||
| } | ||
| for group in sorted_groups: | ||
| args: List[Union[int, str]] = [group] | ||
| args.extend(state_groups_we_have_already_fetched) | ||
| args.extend(overall_select_query_args) | ||
|
|
||
| txn.execute(sql % (overall_select_clause,), args) | ||
| state_groups_we_have_already_fetched_string = ", ".join( | ||
| ["?::bigint"] * len(state_groups_we_have_already_fetched) | ||
| ) | ||
|
|
||
| txn.execute( | ||
| sql | ||
| % ( | ||
| state_groups_we_have_already_fetched_string, | ||
| overall_select_clause % (main_select_clause,), | ||
| ), | ||
| args, | ||
| ) | ||
|
|
||
| # The first row is always our special `state_group_reached` row which | ||
| # tells us if we linked up to any other existing state_group that we | ||
| # already fetched and if so, which one we linked up to (see the `UNION | ||
| # ALL` above which drives this special row) | ||
| first_row = txn.fetchone() | ||
| if first_row: | ||
| _, _, _, state_group_reached = first_row | ||
|
|
||
| partial_state_map_for_state_group: MutableStateMap[str] = {} | ||
| for row in txn: | ||
| typ, state_key, event_id = row | ||
| typ, state_key, event_id, _state_group = row | ||
| key = (intern_string(typ), intern_string(state_key)) | ||
| results[group][key] = event_id | ||
| partial_state_map_for_state_group[key] = event_id | ||
|
|
||
| # If we see a state_group edge link to a previous state_group that we | ||
| # already fetched from the database, link up the base state to the | ||
| # partial state we retrieved from the database to build on top of. | ||
| if state_group_reached in results: | ||
| resultant_state_map = dict(results[state_group_reached]) | ||
| resultant_state_map.update(partial_state_map_for_state_group) | ||
|
|
||
| results[group] = resultant_state_map | ||
| else: | ||
| # It's also completely normal for us not to have a previous | ||
| # state_group to build on top of if this is the first group being | ||
| # processed or we are processing a bunch of groups from different | ||
| # rooms which of course will never link together (competely | ||
| # different DAGs). | ||
| results[group] = partial_state_map_for_state_group | ||
|
|
||
| state_groups_we_have_already_fetched.add(group) | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This PR hasn't made as big of an impact as I thought it would for that type of request. Are we still interested in a change like this? It may work well for sequential events that we backfill. It seems like our Maybe it's more interesting to improve our |
||
| else: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To not complicate the diff, I've held off on applying the same treatment to SQLite. We can iterate on this in another PR or just opt for people to use Postgres in order to see performance |
||
| max_entries_returned = state_filter.max_entries_returned() | ||
|
|
||
|
|
@@ -201,8 +296,9 @@ def _get_state_groups_from_groups_txn( | |
| if where_clause: | ||
| where_clause = " AND (%s)" % (where_clause,) | ||
|
|
||
| # We don't use WITH RECURSIVE on sqlite3 as there are distributions | ||
| # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) | ||
| # XXX: We could `WITH RECURSIVE` here since it's supported on SQLite 3.8.3 | ||
| # or higher and our minimum supported version is greater than that. We just | ||
| # haven't put in the time to refactor this. | ||
| for group in groups: | ||
| next_group: Optional[int] = group | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.