vojo/src/app/plugins/call/CallWidgetDriver.ts

483 lines
17 KiB
TypeScript

import {
type Capability,
type ISendDelayedEventDetails,
type ISendEventDetails,
type IReadEventRelationsResult,
type IRoomEvent,
WidgetDriver,
type IWidgetApiErrorResponseDataDetails,
type ISearchUserDirectoryResult,
type IGetMediaConfigResult,
type UpdateDelayedEventAction,
OpenIDRequestState,
SimpleObservable,
IOpenIDUpdate,
} from 'matrix-widget-api';
import {
ClientPrefix,
EventType,
type IContent,
MatrixError,
type MatrixEvent,
Direction,
Method,
type SendDelayedEventResponse,
type StateEvents,
type TimelineEvents,
MatrixClient,
} from 'matrix-js-sdk';
import { getCallCapabilities } from './utils';
import { downloadMedia, mxcUrlToHttp } from '../../utils/matrix';
// Cleartext ring metadata for Android FCM classification — NOT "unencrypted calls".
//
// Why this exists. matrix-js-sdk encrypts every room event in an e2ee room before
// PUT /send. Server stores it as `m.room.encrypted`; Sygnal/HS push rules can only
// match outer cleartext fields, so an `EventMatch type=org.matrix.msc4075.rtc.notification`
// override never fires for encrypted DMs. Push falls back to the default message rule,
// FCM payload arrives with `type=m.room.encrypted, cn_type=null`, the Java classifier
// in `VojoFirebaseMessagingService.onMessageReceived` cannot recognise it as a ring,
// and the user sees a generic message banner instead of CallStyle. Element Web mitigates
// this by fetching+decrypting in the SW; Element X does it via native matrix-rust-sdk.
// Vojo Android has neither path, so the only short fix is to send the ring signal cleartext.
//
// Threat model. The signaling layer "an incoming ring exists in this room at time T"
// becomes server- and push-gateway-visible. Message bodies, LiveKit tokens, media
// encryption keys, SDP/session secrets, and call audio remain end-to-end encrypted —
// MSC4075 keeps those out of `m.rtc.notification` content by design. Equivalent to
// legacy `m.call.invite` cleartext signaling in WebRTC-over-Matrix. Stricter threat
// models that require hiding even the call-existence fact need the native-decrypt path
// tracked in `docs/plans/dm_calls_techdebt.md` §5.51.
//
// Reconstruct the ring payload field-by-field from validated primitives.
// We do NOT use a top-level allowlist + shallow copy: nested objects like
// `m.relates_to`, `m.mentions`, and a future opaque `m.call.intent` would
// then leak any sub-fields Element Call upstream might add. Each accepted
// field below is rebuilt from primitive-typed inputs only, so unknown
// sub-fields cannot ride along.
const RTC_RING_LIFETIME_MAX_MS = 5 * 60 * 1000;
const RTC_RING_MENTIONS_MAX_USERS = 64;
const isObject = (v: unknown): v is Record<string, unknown> =>
typeof v === 'object' && v !== null && !Array.isArray(v);
// Validate a widget-provided ring payload before we send it cleartext. Returns
// the sanitized object on a happy path or `null` if any required field is
// missing/malformed — caller then falls back to the encrypted send path so the
// in-app strip still gets the event via /sync, even if Android push misses.
// Prevents a hostile or buggy widget from leaking arbitrary cleartext under
// the guise of "ring metadata".
function sanitizeRingContent(content: IContent): IContent | null {
const relates = content['m.relates_to'];
if (!isObject(relates)) return null;
if (relates.rel_type !== 'm.reference') return null;
if (typeof relates.event_id !== 'string' || relates.event_id.length === 0) return null;
const { sender_ts: senderTs, lifetime } = content;
if (typeof senderTs !== 'number' || !Number.isFinite(senderTs) || senderTs <= 0) return null;
if (
typeof lifetime !== 'number' ||
!Number.isFinite(lifetime) ||
lifetime <= 0 ||
lifetime > RTC_RING_LIFETIME_MAX_MS
) {
return null;
}
const out: IContent = {
notification_type: 'ring',
'm.relates_to': {
rel_type: 'm.reference',
event_id: relates.event_id,
},
sender_ts: senderTs,
lifetime,
};
// m.mentions per Matrix spec: { user_ids?: string[]; room?: boolean }.
// Reconstruct from primitives so an upstream-added field can't leak.
const mentions = content['m.mentions'];
if (isObject(mentions)) {
const sanitizedMentions: { user_ids?: string[]; room?: boolean } = {};
if (Array.isArray(mentions.user_ids)) {
sanitizedMentions.user_ids = mentions.user_ids
.filter((u): u is string => typeof u === 'string')
.slice(0, RTC_RING_MENTIONS_MAX_USERS);
}
if (typeof mentions.room === 'boolean') {
sanitizedMentions.room = mentions.room;
}
out['m.mentions'] = sanitizedMentions;
}
// m.call.intent per MSC4310 is a free-form string hint ('audio' / 'video').
// Forward only if it's a primitive string.
const intent = content['m.call.intent'];
if (typeof intent === 'string' && intent.length > 0 && intent.length < 64) {
out['m.call.intent'] = intent;
}
return out;
}
export class CallWidgetDriver extends WidgetDriver {
private allowedCapabilities: Set<Capability>;
private readonly mx: MatrixClient;
public constructor(mx: MatrixClient, private inRoomId: string) {
super();
this.mx = mx;
const deviceId = mx.getDeviceId();
if (!deviceId) throw new Error('Failed to initialize CallWidgetDriver! Device ID not found.');
this.allowedCapabilities = getCallCapabilities(inRoomId, mx.getSafeUserId(), deviceId);
}
public async validateCapabilities(requested: Set<Capability>): Promise<Set<Capability>> {
const allow = Array.from(requested).filter((cap) => this.allowedCapabilities.has(cap));
return new Set(allow);
}
public async sendEvent(
eventType: string,
content: IContent,
stateKey: string | null = null,
targetRoomId: string | null = null
): Promise<ISendEventDetails> {
const client = this.mx;
const roomId = targetRoomId || this.inRoomId;
if (!client || !roomId) throw new Error('Not in a room or not attached to a client');
let r: { event_id: string } | null;
const sanitizedRing =
stateKey === null &&
eventType === EventType.RTCNotification &&
content.notification_type === 'ring'
? sanitizeRingContent(content)
: null;
// Defense-in-depth against the legacy `EventType.CallNotify`
// (`org.matrix.msc4075.call.notify`) sibling event that
// `MatrixRTCSession.sendCallNotify` upstream still emits in parallel
// with `m.rtc.notification`. `getCallCapabilities` already omits Send
// for it, so the widget capability check should reject the request
// before it reaches us — but if a future widget release bypasses
// capability validation, the encrypted send would surface as a
// default-rule message banner alongside the real ring (Vojo's ring
// listener watches RTCNotification, not CallNotify). A push-rule
// suppression cannot fix this: the homeserver only sees
// `type=m.room.encrypted` for encrypted DMs and `EventMatch` against
// the inner type silently no-ops. So we silently no-op the legacy send
// here, returning a sentinel event_id. Throwing is tempting but would
// poison `MatrixRTCSession.sendCallNotify`'s `Promise.all` umbrella —
// its `.then` doesn't fire on reject, `DidSendCallNotification` never
// emits, and an "Unhandled promise rejection" log flickers per ring.
// Vojo doesn't consume `DidSendCallNotification`, but a clean resolve
// keeps the sibling new-RTCNotification ack path identical to upstream.
if (stateKey === null && eventType === EventType.CallNotify) {
return { roomId, eventId: '$vojo:suppressed-legacy-call-notify' };
}
if (typeof stateKey === 'string') {
r = await client.sendStateEvent(
roomId,
eventType as keyof StateEvents,
content as StateEvents[keyof StateEvents],
stateKey
);
} else if (eventType === EventType.RoomRedaction) {
// special case: extract the `redacts` property and call redact
r = await client.redactEvent(roomId, content.redacts);
} else if (sanitizedRing) {
// Bypass the encryption pipeline for ring signaling. See the rationale block
// above the allowlist for full reasoning. Only `notification_type === 'ring'`
// is special-cased — group-call `notification_type === 'notification'` keeps
// the default encrypted path because it isn't routed through the Android
// CallStyle classifier. If the payload fails shape validation we fall through
// to the regular encrypted send below so the in-app strip still gets the
// event via /sync; only Android background CallStyle is lost on that branch.
const txnId = client.makeTxnId();
const path = `/rooms/${encodeURIComponent(roomId)}/send/${encodeURIComponent(
eventType
)}/${encodeURIComponent(txnId)}`;
r = await client.http.authedRequest<{ event_id: string }>(
Method.Put,
path,
undefined,
sanitizedRing,
{ prefix: ClientPrefix.V3 }
);
} else {
r = await client.sendEvent(
roomId,
eventType as keyof TimelineEvents,
content as TimelineEvents[keyof TimelineEvents]
);
}
return { roomId, eventId: r.event_id };
}
public async sendDelayedEvent(
delay: number | null,
parentDelayId: string | null,
eventType: string,
content: IContent,
stateKey: string | null = null,
targetRoomId: string | null = null
): Promise<ISendDelayedEventDetails> {
const client = this.mx;
const roomId = targetRoomId || this.inRoomId;
if (!client || !roomId) throw new Error('Not in a room or not attached to a client');
let delayOpts;
if (delay !== null) {
delayOpts = {
delay,
...(parentDelayId !== null && { parent_delay_id: parentDelayId }),
};
} else if (parentDelayId !== null) {
delayOpts = {
parent_delay_id: parentDelayId,
};
} else {
throw new Error('Must provide at least one of delay or parentDelayId');
}
let r: SendDelayedEventResponse | null;
if (stateKey !== null) {
// state event
r = await client._unstable_sendDelayedStateEvent(
roomId,
delayOpts,
eventType as keyof StateEvents,
content as StateEvents[keyof StateEvents],
stateKey
);
} else {
// message event
r = await client._unstable_sendDelayedEvent(
roomId,
delayOpts,
null,
eventType as keyof TimelineEvents,
content as TimelineEvents[keyof TimelineEvents]
);
}
return {
roomId,
delayId: r.delay_id,
};
}
public async updateDelayedEvent(
delayId: string,
action: UpdateDelayedEventAction
): Promise<void> {
const client = this.mx;
if (!client) throw new Error('Not in a room or not attached to a client');
await client._unstable_updateDelayedEvent(delayId, action);
}
public async sendToDevice(
eventType: string,
encrypted: boolean,
contentMap: { [userId: string]: { [deviceId: string]: object } }
): Promise<void> {
const client = this.mx;
if (encrypted) {
const crypto = client.getCrypto();
if (!crypto) throw new Error('E2EE not enabled');
// attempt to re-batch these up into a single request
const invertedContentMap: { [content: string]: { userId: string; deviceId: string }[] } = {};
// eslint-disable-next-line no-restricted-syntax
for (const userId of Object.keys(contentMap)) {
const userContentMap = contentMap[userId];
// eslint-disable-next-line no-restricted-syntax
for (const deviceId of Object.keys(userContentMap)) {
const content = userContentMap[deviceId];
const stringifiedContent = JSON.stringify(content);
invertedContentMap[stringifiedContent] = invertedContentMap[stringifiedContent] || [];
invertedContentMap[stringifiedContent].push({ userId, deviceId });
}
}
await Promise.all(
Object.entries(invertedContentMap).map(async ([stringifiedContent, recipients]) => {
const batch = await crypto.encryptToDeviceMessages(
eventType,
recipients,
JSON.parse(stringifiedContent)
);
await client.queueToDevice(batch);
})
);
} else {
await client.queueToDevice({
eventType,
batch: Object.entries(contentMap).flatMap(([userId, userContentMap]) =>
Object.entries(userContentMap).map(([deviceId, content]) => ({
userId,
deviceId,
payload: content,
}))
),
});
}
}
public async readRoomTimeline(
roomId: string,
eventType: string,
msgtype: string | undefined,
stateKey: string | undefined,
limit: number,
since: string | undefined
): Promise<IRoomEvent[]> {
const safeLimit =
limit > 0 ? Math.min(limit, Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER; // relatively arbitrary
const room = this.mx.getRoom(roomId);
if (room === null) return [];
const results: MatrixEvent[] = [];
const events = room.getLiveTimeline().getEvents();
for (let i = events.length - 1; i >= 0; i -= 1) {
const ev = events[i];
if (results.length >= safeLimit) break;
if (since !== undefined && ev.getId() === since) break;
if (
ev.getType() === eventType &&
!ev.isState() &&
(eventType !== EventType.RoomMessage || !msgtype || msgtype === ev.getContent().msgtype) &&
(ev.getStateKey() === undefined || stateKey === undefined || ev.getStateKey() === stateKey)
) {
results.push(ev);
}
}
return results.map((e) => e.getEffectiveEvent() as IRoomEvent);
}
public async askOpenID(observer: SimpleObservable<IOpenIDUpdate>): Promise<void> {
return observer.update({
state: OpenIDRequestState.Allowed,
token: await this.mx.getOpenIdToken(),
});
}
public async readRoomState(
roomId: string,
eventType: string,
stateKey: string | undefined
): Promise<IRoomEvent[]> {
const room = this.mx.getRoom(roomId);
if (room === null) return [];
const state = room.getLiveTimeline().getState(Direction.Forward);
if (state === undefined) return [];
if (stateKey === undefined)
return state.getStateEvents(eventType).map((e) => e.getEffectiveEvent() as IRoomEvent);
const event = state.getStateEvents(eventType, stateKey);
return event === null ? [] : [event.getEffectiveEvent() as IRoomEvent];
}
public async readEventRelations(
eventId: string,
roomId?: string,
relationType?: string,
eventType?: string,
from?: string,
to?: string,
limit?: number,
direction?: 'f' | 'b'
): Promise<IReadEventRelationsResult> {
const client = this.mx;
const dir = direction as Direction;
const targetRoomId = roomId ?? this.inRoomId ?? undefined;
if (typeof targetRoomId !== 'string') {
throw new Error('Error while reading the current room');
}
const { events, nextBatch, prevBatch } = await client.relations(
targetRoomId,
eventId,
relationType ?? null,
eventType ?? null,
{ from, to, limit, dir }
);
return {
chunk: events.map((e) => e.getEffectiveEvent() as IRoomEvent),
nextBatch: nextBatch ?? undefined,
prevBatch: prevBatch ?? undefined,
};
}
public async searchUserDirectory(
searchTerm: string,
limit?: number
): Promise<ISearchUserDirectoryResult> {
const client = this.mx;
const { limited, results } = await client.searchUserDirectory({ term: searchTerm, limit });
return {
limited,
results: results.map((r) => ({
userId: r.user_id,
displayName: r.display_name,
avatarUrl: r.avatar_url,
})),
};
}
public async getMediaConfig(): Promise<IGetMediaConfigResult> {
const client = this.mx;
return client.getMediaConfig();
}
public async uploadFile(file: XMLHttpRequestBodyInit): Promise<{ contentUri: string }> {
const client = this.mx;
const uploadResult = await client.uploadContent(file);
return { contentUri: uploadResult.content_uri };
}
public async downloadFile(contentUri: string): Promise<{ file: XMLHttpRequestBodyInit }> {
const httpUrl = mxcUrlToHttp(this.mx, contentUri, true);
if (!httpUrl) {
throw new Error('Call widget failed to download file! No http url!');
}
const blob = await downloadMedia(httpUrl);
return { file: blob };
}
public getKnownRooms(): string[] {
return this.mx.getVisibleRooms().map((r) => r.roomId);
}
// eslint-disable-next-line class-methods-use-this
public processError(error: unknown): IWidgetApiErrorResponseDataDetails | undefined {
return error instanceof MatrixError
? { matrix_api_error: error.asWidgetApiErrorData() }
: undefined;
}
}