Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
69817ca
Typescriptify & use service worker for MSC3916 authentication
turt2live Apr 11, 2024
296c82c
appease the linter
turt2live Apr 11, 2024
8542ce2
appease jest
turt2live Apr 11, 2024
b333b29
appease linter
turt2live Apr 11, 2024
6cf7dca
Merge branch 'develop' into travis/msc3916
turt2live Apr 18, 2024
af1ba39
Get the access token directly
turt2live Apr 18, 2024
9494257
Add a bit of jitter
turt2live Apr 18, 2024
7d63b90
Merge remote-tracking branch 'origin/develop' into travis/msc3916
turt2live Apr 22, 2024
8067197
Improve legibility, use factored-out functions for pickling
turt2live Apr 22, 2024
ea7e8fb
Add docs
turt2live Apr 22, 2024
d0dcf89
Appease the linter
turt2live Apr 22, 2024
3fa2a42
Merge remote-tracking branch 'origin/develop' into travis/msc3916
turt2live Apr 23, 2024
80dd415
Document risks of postMessage
turt2live Apr 23, 2024
0395ee4
Split service worker post message handling out to function
turt2live Apr 23, 2024
2ad00c0
Move registration to async function
turt2live Apr 23, 2024
0951fe7
Use more early returns
turt2live Apr 23, 2024
c20d5f1
Thanks(?), WebStorm
turt2live Apr 23, 2024
3947d90
Handle case of no access token for /versions
turt2live Apr 23, 2024
7d9e7d6
Appease linter
turt2live Apr 23, 2024
d4efdf2
Merge branch 'develop' into travis/msc3916
turt2live May 1, 2024
37e3dfd
Apply suggestions from code review
turt2live May 1, 2024
0d5e2a9
Remove spurious try/catch
turt2live May 1, 2024
ec159a3
Factor out fetch config stuff
turt2live May 1, 2024
310284b
Merge branch 'develop' into travis/msc3916
turt2live May 13, 2024
b80adc5
Apply suggestions from code review
turt2live May 14, 2024
cdbfd80
Merge branch 'develop' into travis/msc3916
turt2live May 14, 2024
a57c111
Finish applying code review suggestions
turt2live May 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion res/sw.js

This file was deleted.

161 changes: 161 additions & 0 deletions src/serviceworker/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
Copyright 2024 New Vector Ltd

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

import { idbLoad } from "matrix-react-sdk/src/utils/StorageAccess";
import { ACCESS_TOKEN_IV, tryDecryptToken } from "matrix-react-sdk/src/utils/tokens/tokens";
import { buildAndEncodePickleKey } from "matrix-react-sdk/src/utils/tokens/pickling";

const serverSupportMap: {
[serverUrl: string]: {
supportsMSC3916: boolean;
cacheExpires: number;
Comment thread
turt2live marked this conversation as resolved.
Outdated
};
} = {};

self.addEventListener("install", (event) => {
// We skipWaiting() to update the service worker more frequently, particularly in development environments.
// @ts-expect-error - service worker types are not available. See 'fetch' event handler.
event.waitUntil(skipWaiting());
});

self.addEventListener("activate", (event) => {
// We force all clients to be under our control, immediately. This could be old tabs.
// @ts-expect-error - service worker types are not available. See 'fetch' event handler.
event.waitUntil(clients.claim());
});

// @ts-expect-error - the service worker types conflict with the DOM types available through TypeScript. Many hours
// have been spent trying to convince the type system that there's no actual conflict, but it has yet to work. Instead
// of trying to make it do the thing, we force-cast to something close enough where we can (and ignore errors otherwise).
self.addEventListener("fetch", (event: FetchEvent) => {
// This is the authenticated media (MSC3916) check, proxying what was unauthenticated to the authenticated variants.

if (event.request.method !== "GET") {
return; // not important to us
}

// Note: ideally we'd keep the request headers and etc, but in practice we can't even see those details.
Comment thread
turt2live marked this conversation as resolved.
Outdated
// See https://stackoverflow.com/a/59152482
let url = event.request.url;

// We only intercept v3 download and thumbnail requests as presumably everything else is deliberate.
// For example, `/_matrix/media/unstable` or `/_matrix/media/v3/preview_url` are something well within
// the control of the application, and appear to be choices made at a higher level than us.
if (url.includes("/_matrix/media/v3/download") || url.includes("/_matrix/media/v3/thumbnail")) {
Comment thread
turt2live marked this conversation as resolved.
Outdated
// We need to call respondWith synchronously, otherwise we may never execute properly. This means
// later on we need to proxy the request through if it turns out the server doesn't support authentication.
event.respondWith(
(async (): Promise<Response> => {
let fetchConfig: { headers?: { [key: string]: string } } = {};
try {
// Figure out which homeserver we're communicating with
const csApi = url.substring(0, url.indexOf("/_matrix/media/v3"));

// Add jitter to reduce request spam, particularly to `/versions` on initial page load
await new Promise<void>((resolve) => setTimeout(() => resolve(), Math.random() * 10));

// Locate our access token, and populate the fetchConfig with the authentication header.
// @ts-expect-error - service worker types are not available. See 'fetch' event handler.
const client = await self.clients.get(event.clientId);
const accessToken = await getAccessToken(client);
if (accessToken) {
fetchConfig = {
headers: {
Authorization: `Bearer ${accessToken}`,
},
};
}

// Update or populate the server support map using a (usually) authenticated `/versions` call.
if (!serverSupportMap[csApi] || serverSupportMap[csApi].cacheExpires <= new Date().getTime()) {
const versions = await (await fetch(`${csApi}/_matrix/client/versions`, fetchConfig)).json();
serverSupportMap[csApi] = {
supportsMSC3916: Boolean(versions?.unstable_features?.["org.matrix.msc3916"]),
cacheExpires: new Date().getTime() + 2 * 60 * 60 * 1000, // 2 hours from now
};
}

// If we have server support (and a means of authentication), rewrite the URL to use MSC3916 endpoints.
if (serverSupportMap[csApi].supportsMSC3916 && accessToken) {
// Currently unstable only.
// TODO: Support stable endpoints when available.
url = url.replace(/\/media\/v3\/(.*)\//, "/client/unstable/org.matrix.msc3916/media/$1/");
} // else by default we make no changes
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rewriting the URL in the depths of the serviceworker feels very, very magical. Shouldn't this happen in the js-sdk?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where in the js-sdk? Our URL generation for image elements and etc is synchronous, and making it async would require massive refactoring. This prevents us from doing a /versions check.

Instead, the idea is we let the service worker rewrite the url and if that fails or is unsupported, the natural fallback behaviour exists instead.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of a method on MatrixClient or something for turning mxc URIs into http URLs. Cache the versions check early on, I guess.

Instead, the idea is we let the service worker rewrite the url and if that fails or is unsupported, the natural fallback behaviour exists instead.

Seems like we can't rely on this anyway, because we're going to need authentication. Or rather: if it fails often enough that we need that fallback, then the whole approach seems doomed.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The functions which convert MXC URIs to HTTP URLs are not async-capable, unfortunately, and there's nowhere in the app where we can realistically cache /versions early enough. By the time we load the MatrixClientPeg for instance, the user's own avatar is already shown - service workers allow us to intercept this (relatively) synchronous call with an async one.

While the MSC is unstable I think we can (and should) maintain the fallback, though as we get closer to freezing the unauthenticated endpoints mentioned in MSC3916, we can change the js-sdk to use the authenticated endpoints by default (or always?). The service worker would detect that route and append an access token rather than rewriting it at that point.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well: we can't use the authenticated endpoint until we get hold of an access token, and getting hold of an access token is an async operation. That seems to me a good time to decide whether we ought to use that access token in media requests.

I guess that's a bunch more work, though :(.

The fact we might be able to get rid of this code in future does little to quell my concerns. Experience strongly suggests that this code will still be here in 8 years time.

I think my concerns here are twofold:

  • We're basically saying the js-sdk is useless without this service worker. That's actually inherent in the fact we have to add authorization, but the less the service worker has to do the better. Anyway, at the very least we need to add documentation to the js-sdk so that people using it understand what their service worker has to do.

  • I feel like rewriting URLs in a service worker is subtle, unexpected behaviour. A future generation of developers is going to be extremely confused by this. For reasons I can't entirely explain, it feels worse than "just" adding an Authorization header. Hence, I'm pretty keen we do our best to find alternatives.

At the end of the day, if we can't do better then we can't do better. We can at least do our best to document our way out of it though.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tldr - js-sdk PR for documentation is here: matrix-org/matrix-js-sdk#4185

Well: we can't use the authenticated endpoint until we get hold of an access token, and getting hold of an access token is an async operation. That seems to me a good time to decide whether we ought to use that access token in media requests.

I guess that's a bunch more work, though :(.

The issue is that the call stack is not capable of having any async code in it. The getHttpUriForMxc function in the js-sdk (and client.mxcUrlToHttp, which just calls getHttpUriForMxc) is not async - introducing a promise of any kind would be a breaking change. We could add secondary functions (getAuthenticatedHttpUriForMxc, for example), though then we reach the react-sdk layer where the Media class exists and is engrained throughout the entire layer, and of course doesn't support async operations in its general cases either. This same problem propagates outwards to the remaining components, functions, classes, etc of the react-sdk.

It is indeed a bunch more work to fix this, and matrix-org/matrix-react-sdk@5cdd706 only covers some of it (as an alternative I wrote to avoid using service workers). Even if we replace the URL rewrite with a js-sdk function call, we're still collecting blobs and filling up our memory buffers. The hardest things to fix are the hidden uses of non-image elements, like in the case of the composer where avatars are applied using CSS.

Service workers cover all of these nuanced cases, though ideally I think the js-sdk would indeed be producing authentication-first URLs (which the service worker can detect and append an Authorization header to). This is however a pretty breaking change to the app, so at least while things are unstable I think it's best to keep the fallback.

The fact we might be able to get rid of this code in future does little to quell my concerns. Experience strongly suggests that this code will still be here in 8 years time.

This is my experience too, fwiw. I'm hopeful our rollout plan (internal only) will help actually make this code temporary and work to eradicate the unauthenticated option, though the service worker itself will need to be maintained. We can at least remove/reduce some of the magic as the MSC progresses towards total stability.

  • We're basically saying the js-sdk is useless without this service worker. That's actually inherent in the fact we have to add authorization, but the less the service worker has to do the better. Anyway, at the very least we need to add documentation to the js-sdk so that people using it understand what their service worker has to do.

I'm not sure I'd classify it as "useless" given servers aren't likely to turn on an unstable feature just yet, but it is something worth documenting. I've done so here: matrix-org/matrix-js-sdk#4185

I suspect it's possible for the js-sdk to support these sorts of operations when it's running in a NodeJS environment, though in a browser environment it's almost certainly a service worker problem given the consumer's (react-sdk) usage.

  • I feel like rewriting URLs in a service worker is subtle, unexpected behaviour. A future generation of developers is going to be extremely confused by this. For reasons I can't entirely explain, it feels worse than "just" adding an Authorization header. Hence, I'm pretty keen we do our best to find alternatives.

Mentioned above, it is pretty magical. With the browser devtools making it clear what's happening and the rollout strategy though, I think we'll be okay. At the very least we can switch to using stable endpoints in the js-sdk pretty quickly by default (once the spec process allows us to), and convert the service worker to 'just' adding the Authorization header in place. This would break compatibility with older servers, but at least on T&S we consider that to be a feature (once the MSC is stable and/or released - doing so right now would require servers to advertise unstable functionality, which effectively bypasses the MSC process and leads to defacto spec).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tldr - js-sdk PR for documentation is here: matrix-org/matrix-js-sdk#4185

Thanks for starting to document this. It certainly helps quell some of my concerns here.

The issue is that the call stack is not capable of having any async code in it. The getHttpUriForMxc function in the js-sdk (and client.mxcUrlToHttp, which just calls getHttpUriForMxc) is not async - introducing a promise of any kind would be a breaking change.

I don't really follow this. Why can't we make the /versions request before we construct a MatrixClient, just as we currently make a /login request? That would mean that client.mxcUrlToHttp could still be synchronous, but do the right thing.

So is the problem that there are some places that we construct media links where there is no access to a MatrixClient? Or before a MatrixClient is even constructed? It feels like such things probably need to work without authentication anyway?

... which is not to say it's not a bunch of work, but I'm still keen to understand if we're doing this for pragmatic reasons or because it's the only way it can possibly work.

At the very least we can switch to using stable endpoints in the js-sdk pretty quickly by default (once the spec process allows us to), and convert the service worker to 'just' adding the Authorization header in place.

Well, I'll believe it when I see it...

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moving comment into thread

@t3chguy says:

I don't really follow this. Why can't we make the /versions request before we construct a MatrixClient, just as we currently make a /login request? That would mean that client.mxcUrlToHttp could still be synchronous, but do the right thing.

This would break the offline mode in Element Web

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really follow this. Why can't we make the /versions request before we construct a MatrixClient, just as we currently make a /login request? That would mean that client.mxcUrlToHttp could still be synchronous, but do the right thing.

So is the problem that there are some places that we construct media links where there is no access to a MatrixClient? Or before a MatrixClient is even constructed? It feels like such things probably need to work without authentication anyway?

... which is not to say it's not a bunch of work, but I'm still keen to understand if we're doing this for pragmatic reasons or because it's the only way it can possibly work.

/login I think is trivial to support. If a user has an existing session though, we seem to show their avatar very early in the cycle, making some code a bit racy. The earliest we can check /versions is sometimes too late, at least as far as my browser is concerned: matrix-org/matrix-react-sdk@0a1d45c (this commit has other problems, like lack of re-checking server support, linting, etc).

The service worker avoids the lack of MatrixClient by reaching into idb directly for the token. The postMessage for user information also reaches directly into localstorage, avoiding MatrixClient too.

This is out of pragmatism imo, though I'm still open to suggestions/hints on where to put this code otherwise.

} catch (err) {
console.error("SW: Error in request rewrite.", err);
}

// Add authentication and send the request. We add authentication even if MSC3916 endpoints aren't
// being used to ensure patches like this work:
// https://github.com/matrix-org/synapse/commit/2390b66bf0ec3ff5ffb0c7333f3c9b239eeb92bb
return fetch(url, fetchConfig);
})(),
);
}
});

// Ideally we'd use the `Client` interface for `client`, but since it's not available (see 'fetch' listener), we use
// unknown for now and force-cast it to something close enough later.
async function getAccessToken(client: unknown): Promise<string | undefined> {
// Access tokens are encrypted at rest, so while we can grab the "access token", we'll need to do work to get the
// real thing.
const encryptedAccessToken = await idbLoad("account", "mx_access_token");

// We need to extract a user ID and device ID from localstorage, which means calling WebPlatform for the
// read operation. Service workers can't access localstorage.
const { userId, deviceId } = await askClientForUserIdParams(client);

// ... and this is why we need the user ID and device ID: they're index keys for the pickle key table.
const pickleKeyData = await idbLoad("pickleKey", [userId, deviceId]);
if (pickleKeyData && (!pickleKeyData.encrypted || !pickleKeyData.iv || !pickleKeyData.cryptoKey)) {
console.error("SW: Invalid pickle key loaded - ignoring");
return undefined;
}

// Finally, try decrypting the thing and return that. This may fail, but that's okay.
try {
const pickleKey = await buildAndEncodePickleKey(pickleKeyData, userId, deviceId);
return tryDecryptToken(pickleKey, encryptedAccessToken, ACCESS_TOKEN_IV);
} catch (e) {
console.error("SW: Error decrypting access token.", e);
return undefined;
}
}

// Ideally we'd use the `Client` interface for `client`, but since it's not available (see 'fetch' listener), we use
// unknown for now and force-cast it to something close enough inside the function.
async function askClientForUserIdParams(client: unknown): Promise<{ userId: string; deviceId: string }> {
Comment thread
turt2live marked this conversation as resolved.
return new Promise((resolve, reject) => {
// Avoid stalling the tab in case something goes wrong.
const timeoutId = setTimeout(() => reject(new Error("timeout in postMessage")), 1000);

// We don't need particularly good randomness here - we just use this to generate a request ID, so we know
// which postMessage reply is for our active request.
const responseKey = Math.random().toString(36);

// Add the listener first, just in case the tab is *really* fast.
const listener = (event: MessageEvent): void => {
if (event.data?.responseKey !== responseKey) return; // not for us
clearTimeout(timeoutId); // do this as soon as possible, avoiding a race between resolve and reject.
resolve(event.data); // "unblock" the remainder of the thread, if that were such a thing in JavaScript.
self.removeEventListener("message", listener); // cleanup, since we're not going to do anything else.
};
self.addEventListener("message", listener);

// Ask the tab for the information we need. This is handled by WebPlatform.
(client as Window).postMessage({ responseKey, type: "userinfo" });
});
}
33 changes: 31 additions & 2 deletions src/vector/platform/WebPlatform.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
Copyright 2016 Aviral Dasgupta
Copyright 2016 OpenMarket Ltd
Copyright 2017-2020 New Vector Ltd
Copyright 2017-2020, 2024 New Vector Ltd

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -46,7 +46,36 @@ export default class WebPlatform extends VectorBasePlatform {
super();
// Register service worker if available on this platform
if ("serviceWorker" in navigator) {
navigator.serviceWorker.register("sw.js");
// sw.js is exported by webpack, sourced from `/src/serviceworker/index.ts`
const swPromise = navigator.serviceWorker.register("sw.js");

// Jest causes `register()` to return undefined, so swallow that case.
if (swPromise) {
swPromise
.then(async (r) => {
// always ask the browser to update. The browser might not actually do it, but at least we asked.
await r.update();
return r;
})
.then((r) => {
navigator.serviceWorker.addEventListener("message", (e) => {
try {
if (e.data?.["type"] === "userinfo" && e.data?.["responseKey"]) {
const userId = localStorage.getItem("mx_user_id");
const deviceId = localStorage.getItem("mx_device_id");
r.active!.postMessage({
responseKey: e.data["responseKey"],
userId,
deviceId,
});
}
Comment thread
turt2live marked this conversation as resolved.
Outdated
} catch (e) {
console.error("Error responding to service worker: ", e);
}
});
})
.catch((e) => console.error("Error registering/updating service worker:", e));
}
Comment thread
turt2live marked this conversation as resolved.
Outdated
}
}

Expand Down
7 changes: 5 additions & 2 deletions webpack.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ module.exports = (env, argv) => {
mobileguide: "./src/vector/mobile_guide/index.ts",
jitsi: "./src/vector/jitsi/index.ts",
usercontent: "./node_modules/matrix-react-sdk/src/usercontent/index.ts",
serviceworker: {
import: "./src/serviceworker/index.ts",
filename: "sw.js", // update WebPlatform if this changes
},
...(useHMR ? {} : cssThemes),
},

Expand Down Expand Up @@ -666,7 +670,7 @@ module.exports = (env, argv) => {
// HtmlWebpackPlugin will screw up our formatting like the names
// of the themes and which chunks we actually care about.
inject: false,
excludeChunks: ["mobileguide", "usercontent", "jitsi"],
excludeChunks: ["mobileguide", "usercontent", "jitsi", "serviceworker"],
minify: false,
templateParameters: {
og_image_url: ogImageUrl,
Expand Down Expand Up @@ -739,7 +743,6 @@ module.exports = (env, argv) => {
"res/jitsi_external_api.min.js",
"res/jitsi_external_api.min.js.LICENSE.txt",
"res/manifest.json",
"res/sw.js",
"res/welcome.html",
{ from: "welcome/**", context: path.resolve(__dirname, "res") },
{ from: "themes/**", context: path.resolve(__dirname, "res") },
Expand Down