From 156570826a495fa940edca820b2f04b936165756 Mon Sep 17 00:00:00 2001 From: heaven Date: Tue, 5 May 2026 01:02:36 +0300 Subject: [PATCH] feat(bots-telegram): land QR-code login flow rendered client-side from m.image body via qrcode-generator with bridge-race-tolerant state machine --- apps/widget-telegram/package-lock.json | 9 +- apps/widget-telegram/package.json | 3 +- apps/widget-telegram/src/App.tsx | 423 ++++++++++++++++-- .../src/bridge-protocol/dialects/go_v2604.ts | 200 ++++++++- .../src/bridge-protocol/parser.ts | 19 +- .../src/bridge-protocol/types.ts | 36 ++ apps/widget-telegram/src/i18n/en.ts | 18 +- apps/widget-telegram/src/i18n/ru.ts | 42 +- apps/widget-telegram/src/state.ts | 414 ++++++++++++++++- apps/widget-telegram/src/styles.css | 84 ++++ apps/widget-telegram/src/widget-api.ts | 35 +- src/app/features/bots/BotWidgetDriver.ts | 140 +++++- src/app/features/bots/BotWidgetEmbed.ts | 33 +- 13 files changed, 1358 insertions(+), 98 deletions(-) diff --git a/apps/widget-telegram/package-lock.json b/apps/widget-telegram/package-lock.json index 1b1d7515..13c15bec 100644 --- a/apps/widget-telegram/package-lock.json +++ b/apps/widget-telegram/package-lock.json @@ -8,7 +8,8 @@ "name": "@vojo/widget-telegram", "version": "0.0.1", "dependencies": { - "preact": "10.22.1" + "preact": "10.22.1", + "qrcode-generator": "^1.4.4" }, "devDependencies": { "@preact/preset-vite": "2.9.0", @@ -1756,6 +1757,12 @@ "url": "https://opencollective.com/preact" } }, + "node_modules/qrcode-generator": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/qrcode-generator/-/qrcode-generator-1.4.4.tgz", + "integrity": "sha512-HM7yY8O2ilqhmULxGMpcHSF1EhJJ9yBj8gvDEuZ6M+KGJ0YY2hKpnXvRD+hZPLrDVck3ExIGhmPtSdcjC+guuw==", + "license": "MIT" + }, "node_modules/resolve": { "version": "1.22.12", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.12.tgz", diff --git a/apps/widget-telegram/package.json b/apps/widget-telegram/package.json index d11f9685..503f0703 100644 --- a/apps/widget-telegram/package.json +++ b/apps/widget-telegram/package.json @@ -10,7 +10,8 @@ "preview": "vite preview" }, "dependencies": { - "preact": "10.22.1" + "preact": "10.22.1", + "qrcode-generator": "^1.4.4" }, "devDependencies": { "@preact/preset-vite": "2.9.0", diff --git a/apps/widget-telegram/src/App.tsx b/apps/widget-telegram/src/App.tsx index d7a21188..45b6eb6d 100644 --- a/apps/widget-telegram/src/App.tsx +++ b/apps/widget-telegram/src/App.tsx @@ -1,10 +1,11 @@ import { useCallback, useEffect, useMemo, useReducer, useRef, useState } from 'preact/hooks'; import type { Dispatch } from 'preact/hooks'; import type { ComponentChildren } from 'preact'; +import qrcodeGenerator from 'qrcode-generator'; import type { WidgetBootstrap } from './bootstrap'; import { WidgetApi, type RoomEvent } from './widget-api'; import { createT, type T } from './i18n'; -import { parseReply } from './bridge-protocol/parser'; +import { parseEvent } from './bridge-protocol/parser'; import { hydrateFromTimeline, initialLoginState, @@ -280,7 +281,16 @@ const PhoneForm = ({ inputmode="tel" placeholder={t('auth-card.phone.placeholder')} value={value} - onInput={(e) => setValue((e.currentTarget as HTMLInputElement).value)} + onInput={(e) => { + // Auto-prepend `+` so the user never has to remember to type + // it — bridgev2 rejects anything without a leading `+` per + // its E.164 input validator. Skipping the special-case + // formatting (8→+7 etc.) on purpose: keeping the rule at one + // line of logic means there's nothing to misinterpret a + // pasted international number as a Russian trunk number. + const raw = (e.currentTarget as HTMLInputElement).value; + setValue(raw.length > 0 && !raw.startsWith('+') ? `+${raw}` : raw); + }} disabled={submitting} /> + + + ); +}; + // -------------------------------------------------------------------------- // About card + modal // -------------------------------------------------------------------------- @@ -672,6 +840,16 @@ export function App({ bootstrap, api }: Props) { const seenEventIds = useRef(new Set()); const [state, dispatch] = useReducer(loginReducer, initialLoginState); + // stateRef mirrors the latest reducer state so async callbacks (live + // event listeners attached once at mount) can read current state + // without their stale closure capturing the initial `unknown` snapshot. + // Used by the transcript diag gate for `qr_redacted` (only show + // «QR использован» when the redaction targets the active QR). + const stateRef = useRef(state); + useEffect(() => { + stateRef.current = state; + }, [state]); + const t = useMemo(() => createT(bootstrap.clientLanguage), [bootstrap.clientLanguage]); useEffect(() => { @@ -748,50 +926,120 @@ export function App({ bootstrap, api }: Props) { // a list-logins probe replies «You're not logged in» and the UI // helpfully offers a fresh login button, losing the SMS code. // Reading the timeline lets us restore the form the user actually - // had open. Hydrate is read-only and never sends commands; only - // bot m.notice events drive state, never user m.text. + // had open. M13 extends this to also pick up active QR-login + // flows: m.image events carry the rotating tg://login URL, and + // m.room.redaction events signal a successful scan. All three + // streams are merged chronologically and fed to the same hydrate + // reducer. let hydrated = false; try { - const events = await api.readTimeline({ limit: 30, msgtype: 'm.notice' }); + // Promise.allSettled (not all): if a single readTimeline rejects + // — driver capability denied, network blip, transport timeout — + // we still want to feed whatever we got into the hydrate + // reducer. With Promise.all, one failed read takes down the + // notice-only path that worked since M12.5 (phone/code/password + // resume) just because the QR-extension read couldn't satisfy. + const settled = await Promise.allSettled([ + api.readTimeline({ limit: 30, type: 'm.room.message', msgtype: 'm.notice' }), + // QR images: bridgev2 edits the original event with each token + // rotation (~30 s per Telegram MTProto), so a full 10-minute + // login attempt produces up to ~20 events. We pull 50 to give + // headroom for slower rotations and out-of-order delivery, + // and so the redaction's `redacts: $original` target is in + // our scan window even if the user reloads near the timeout + // boundary. + api.readTimeline({ limit: 50, type: 'm.room.message', msgtype: 'm.image' }), + api.readTimeline({ limit: 10, type: 'm.room.redaction' }), + ]); if (disposed) return; + const pickValue = (s: PromiseSettledResult): RoomEvent[] => + s.status === 'fulfilled' ? s.value : []; + const notices = pickValue(settled[0]); + const qrImages = pickValue(settled[1]); + const redactions = pickValue(settled[2]); - // Driver returns events newest-first; reverse to chronological so - // the hydrate reducer can walk past→present. - const fromBot = events.filter((e) => e.sender === bootstrap.botMxid).reverse(); - const inputs: HydrateInput[] = fromBot.map((e) => ({ - ev: parseReply(e.content.body ?? ''), + // Driver returns events newest-first; reverse each stream and + // merge by origin_server_ts ascending so the hydrate reducer + // walks past→present in true chronological order across event + // types (a redaction landing between two image edits must be + // applied at the right point in the chain). + const fromBot = (events: RoomEvent[]) => + events.filter((e) => e.sender === bootstrap.botMxid); + // Sort by origin_server_ts ascending, tie-breaking on event_id + // lexicographically. Without the tie-break, equal-timestamp + // events from different streams (notice vs image vs redaction) + // could process in nondeterministic order — e.g. a redaction + // landing AFTER the image edit it cleans up versus before + // would change which qr_displayed our hydrate latches onto. + // event_id is opaque but stable, which is enough for + // determinism even if not semantically meaningful. + const merged = [...fromBot(notices), ...fromBot(qrImages), ...fromBot(redactions)].sort( + (a, b) => { + const tsDiff = a.origin_server_ts - b.origin_server_ts; + if (tsDiff !== 0) return tsDiff; + return a.event_id < b.event_id ? -1 : a.event_id > b.event_id ? 1 : 0; + } + ); + + const inputs: HydrateInput[] = merged.map((e) => ({ + ev: parseEvent(e), ts: e.origin_server_ts, })); const restored = hydrateFromTimeline(inputs); if (restored) { - // Conservative transcript replay: ONLY bot m.notice lines, plus - // a trailing marker. User m.text is intentionally NOT replayed — - // bridgev2 does not redact 2FA codes server-side, and replaying - // user echoes would surface a code from history that the - // original submit had masked locally. Bot prompts alone give - // enough context for the user to know what step they're on. + // Conservative transcript replay: bot m.notice lines + a + // history marker. m.image events are replaced with a generic + // «QR обновлён» diag — never replay the raw `tg://login?token=...` + // body, that would persist the login token in DOM history past + // the bridge's redaction. Redactions get a «QR consumed» diag. + // User m.text is intentionally NOT replayed — bridgev2 does + // not redact 2FA codes server-side, and replaying user echoes + // would surface a code from history that the original submit + // had masked locally. // - // Dedupe via seenEventIds: a live event for the same notice may - // already have arrived during the readTimeline await (the host - // pushes new bot replies via send_event as soon as they hit the - // room). Skipping seen ids in this loop avoids the duplicate - // line, AND the .add side-effect simultaneously pre-seeds the - // set so any post-hydrate live replay of these same notices - // (matrix-js-sdk timeline forward-push) is suppressed too. - // - // Append order: bot lines first, THEN the marker. With the - // newest-at-top render reversal, the marker visually sits ABOVE - // the historical bot block (acting as a divider between live - // activity and replayed history). The marker is gated on at - // least one bot line actually being appended — otherwise it - // would float on its own labelling nothing. + // Dedupe via seenEventIds: a live event for the same notice/ + // image/redaction may already have arrived during the + // readTimeline await. Skipping seen ids in this loop avoids + // the duplicate line, AND the .add side-effect simultaneously + // pre-seeds the set so any post-hydrate live replay of these + // same events is suppressed too. let appendedAnyHistory = false; - for (const e of fromBot) { + // Track which QR event ids we've seen during this scan so a + // `qr_redacted` diag fires only for redactions that match a + // QR we actually replayed. Without this, a stale redaction + // from a previous flow (or an unrelated bot redaction) would + // print a misleading «QR использован» line in history. + const seenQrIds = new Set(); + for (const e of merged) { if (seenEventIds.current.has(e.event_id)) continue; seenEventIds.current.add(e.event_id); - append({ kind: 'from-bot', text: `← ${e.content.body ?? ''}` }); - appendedAnyHistory = true; + const parsed = parseEvent(e); + // Only flip `appendedAnyHistory` when an actual line is + // emitted. Otherwise the trailing «─── история ───» + // marker would float above an empty pane (e.g. timeline + // had only m.image events that parsed to `unknown`, + // skipped silently below). + if (parsed.kind === 'qr_displayed') { + seenQrIds.add(parsed.eventId); + if (parsed.replacesEventId) seenQrIds.add(parsed.replacesEventId); + append({ kind: 'diag', text: t('diag.qr-issued') }); + appendedAnyHistory = true; + } else if (parsed.kind === 'qr_redacted') { + if (seenQrIds.has(parsed.redactsEventId)) { + append({ kind: 'diag', text: t('diag.qr-consumed') }); + appendedAnyHistory = true; + } + } else if (e.type === 'm.room.message' && e.content.msgtype !== 'm.image') { + // m.text / m.notice — body is safe to replay verbatim. + // m.image without tg URL falls into `unknown` upstream + // and we silently skip it (no diag, no body) — there's + // no scenario where the bridge sends arbitrary images + // to the control DM and an opaque «image» line would + // just be noise. + append({ kind: 'from-bot', text: `← ${e.content.body ?? ''}` }); + appendedAnyHistory = true; + } } if (appendedAnyHistory) { append({ kind: 'diag', text: t('diag.history-marker') }); @@ -837,13 +1085,38 @@ export function App({ bootstrap, api }: Props) { // (b) ignore any third-party noise that somehow slips past the // 1:1 invariant. if (ev.sender !== bootstrap.botMxid) return; - const body = ev.content.body ?? ''; - append({ kind: 'from-bot', text: `← ${body}` }); - // Bot reply → LoginEvent → state machine. Ignore msgtype-specific - // routing — bridgev2 sends every login reply as m.notice; the host - // driver already filters to m.text/m.notice on the receive path. - const event = parseReply(body); + // Bot reply → LoginEvent → state machine. parseEvent dispatches by + // event type — m.text/m.notice keep the existing body-regex flow, + // m.image becomes qr_displayed, m.room.redaction becomes qr_redacted. + const event = parseEvent(ev); + + // Transcript routing is GATED on the parser's verdict, not on the + // raw event type. That way: + // * an unrelated bot-side redaction (e.g. of an old text reply) + // doesn't print «QR использован» — only a redaction parsed as + // `qr_redacted` against the CURRENTLY-ACTIVE QR does (checked + // against stateRef.current); + // * an `m.image` whose body lacks `tg://login?` doesn't print + // «QR обновлён» — the diag now corresponds to a real QR + // payload our reducer is going to act on; + // * the body of an `m.image` is NEVER appended verbatim — the + // body IS the `tg://login?token=...` secret. + if (event.kind === 'qr_displayed') { + append({ kind: 'diag', text: t('diag.qr-issued') }); + } else if (event.kind === 'qr_redacted') { + const liveState = stateRef.current; + if ( + liveState.kind === 'awaiting_qr_scan' && + liveState.qrEventId === event.redactsEventId + ) { + append({ kind: 'diag', text: t('diag.qr-consumed') }); + } + } else if (ev.type === 'm.room.message' && ev.content.msgtype !== 'm.image') { + const body = ev.content.body ?? ''; + append({ kind: 'from-bot', text: `← ${body}` }); + } + dispatch({ kind: 'event', event }); // After a fresh login_success the bridge doesn't send the loginId in @@ -924,15 +1197,47 @@ export function App({ bootstrap, api }: Props) { } }, [sendBare]); + + // In-flight guard against double-tap. The button is on the disconnected + // screen which unmounts as soon as state advances, BUT a rapid second + // click can fire in the microtask window between dispatch and the next + // React commit (especially on Android WebView, where a tap-rebound can + // synthesise a second click). For phone login, a duplicate `!tg login + // phone` would burn an extra SMS; for QR it's just a redundant + // round-trip but the bridge replies `login_in_progress` and we'd surface + // a confusing yellow warning. Sync ref closes the gap. + const loginInFlight = useRef(false); + // Optimistic awaiting_phone + rollback to disconnected on send failure. // Without rollback the user would see the phone form open with no command // ever delivered to the bot. const onClickLogin = useCallback(async () => { + if (loginInFlight.current) return; + loginInFlight.current = true; dispatch({ kind: 'start_login' }); try { await sendBare('login phone'); } catch { dispatch({ kind: 'cancel_pending' }); + } finally { + loginInFlight.current = false; + } + }, [sendBare]); + + // Same optimistic pattern for the QR-login flow. The reducer's + // start_qr_login transitions us into a placeholder awaiting_qr_scan + // (tgUrl='', qrEventId=''); the live `qr_displayed` event shortly after + // overwrites both with the real bridge payload. + const onClickLoginQr = useCallback(async () => { + if (loginInFlight.current) return; + loginInFlight.current = true; + dispatch({ kind: 'start_qr_login' }); + try { + await sendBare('login qr'); + } catch { + dispatch({ kind: 'cancel_pending' }); + } finally { + loginInFlight.current = false; } }, [sendBare]); @@ -1033,6 +1338,19 @@ export function App({ bootstrap, api }: Props) { › + {/* QR-login peer to phone-login. Telegram-side flow is the + * cleanest path on a phone — no SMS, no code-typing — so we + * surface it as a primary action rather than burying it in a + * sub-menu. Same card vocabulary as login-by-phone. */} + {/* Refresh as a peer card to login (same size + style). The * `refreshing` class + disabled attribute drive the in-flight * spinner state — disabled gates :hover/:focus via :not(:disabled) @@ -1074,6 +1392,33 @@ export function App({ bootstrap, api }: Props) { ) : null} + {state.kind === 'awaiting_qr_scan' ? ( +
+ +
+ ) : null} + {state.kind === 'qr_verifying' ? ( +
+
+ + + {t('status.qr-verifying')} + + {/* If the bridge stalls between «scan accepted» and the + * follow-up (twofa_required / login_success), there's no + * other affordance to dig out — the QR panel is gone, no + * form is open. The recovery button refires `list-logins`, + * which routes the reducer back to disconnected/connected + * via not_logged_in / logins_listed. Without it, a network + * split mid-handshake would freeze the user on the «check» + * pill until they reload the whole page. */} + +
+
+ ) : null} {state.kind === 'logging_out' ? (
diff --git a/apps/widget-telegram/src/bridge-protocol/dialects/go_v2604.ts b/apps/widget-telegram/src/bridge-protocol/dialects/go_v2604.ts index e4ef4d27..ea5095ba 100644 --- a/apps/widget-telegram/src/bridge-protocol/dialects/go_v2604.ts +++ b/apps/widget-telegram/src/bridge-protocol/dialects/go_v2604.ts @@ -11,7 +11,7 @@ // (Phase 2 contract), so the widget only ever sees the markdown source — // backticks, asterisks, escaped angle-brackets stay literal. -import type { LoginEvent, ListedLogin } from '../types'; +import type { LoginEvent, ListedLogin, ParsableEvent } from '../types'; // --- Regex table ---------------------------------------------------------- @@ -91,6 +91,11 @@ const INVALID_VALUE_RE = /^invalid value:\s*(.*)$/i; const SUBMIT_FAILED_RE = /^failed to submit input:\s*(.*)$/i; const PREPARE_FAILED_RE = /^failed to prepare login process:\s*(.*)$/i; const START_FAILED_RE = /^failed to start login:\s*(.*)$/i; +// bridgev2/commands/login.go:366 — `Login failed: %v` from +// doLoginDisplayAndWait Wait error path. Captures both the 10-minute +// LoginTimeout (`login process timed out`) and post-cancel +// (`context canceled`) cases. +const LOGIN_FAILED_RE = /^login failed:\s*(.*)$/i; // --- Parser --------------------------------------------------------------- @@ -171,6 +176,9 @@ export const parseGoV2604 = (rawBody: string): LoginEvent => { const startFailedMatch = START_FAILED_RE.exec(body); if (startFailedMatch) return { kind: 'start_failed', reason: startFailedMatch[1].trim() }; + const loginFailedMatch = LOGIN_FAILED_RE.exec(body); + if (loginFailedMatch) return { kind: 'login_failed', reason: loginFailedMatch[1].trim() }; + // Fall-through to login-list AFTER the error traps so a row that happens to // start with `* ` mid-error-message doesn't get mistaken for a login list. const logins = parseLoginList(body); @@ -179,6 +187,94 @@ export const parseGoV2604 = (rawBody: string): LoginEvent => { return { kind: 'unknown' }; }; +// --- Full-event parser ---------------------------------------------------- +// +// `parseEventGoV2604` dispatches on `event.type` and routes: +// +// * `m.room.redaction` → `qr_redacted`. We don't need to verify the redacted +// target here; the state machine pairs the redaction's `redacts` against +// the active QR event id and decides whether it's a meaningful signal or +// an unrelated cleanup. +// +// * `m.room.message` + `msgtype=m.image` → `qr_displayed` when the body +// contains a `tg://login?token=...` URL. The bridge sets that as the +// image's text body explicitly (mautrix/go bridgev2 commands/login.go +// sendQR sets `Body: qr` where `qr` is the token URL string). Anything +// else on m.image we don't recognise — fall through to `unknown` so the +// transcript still surfaces the line as a diag. +// +// * `m.room.message` + `msgtype=m.text|m.notice` → existing +// `parseGoV2604(body)` path. + +// Telegram QR-login URLs encode the token in `tg://login?token=...`. The +// bridge wraps it in markdown backticks inside `formatted_body` (we never +// see formatted_body — driver strips it), but `body` carries the raw URL +// per upstream `bridgev2/commands/login.go::sendQR` line 297 (`Body: qr`). +// The regex tolerates surrounding whitespace and a possible markdown +// backtick wrap on either side as defence-in-depth, even though the +// current wire shape doesn't include backticks in the plain body. +const TG_LOGIN_URL_RE = /tg:\/\/login\?[^\s`<>]+/i; + +const isObject = (value: unknown): value is Record => + typeof value === 'object' && value !== null && !Array.isArray(value); + +export const parseEventGoV2604 = (event: ParsableEvent): LoginEvent => { + if (event.type === 'm.room.redaction') { + // `redacts` is mirrored at the top level by the host sanitizer (see + // `sanitizeBotWidgetRedactionEvent` in BotWidgetDriver.ts), but check + // both spots for forward-compat with future drivers / SDK shapes. + const target = + typeof event.redacts === 'string' + ? event.redacts + : isObject(event.content) && typeof event.content.redacts === 'string' + ? event.content.redacts + : undefined; + if (!target) return { kind: 'unknown' }; + return { kind: 'qr_redacted', redactsEventId: target }; + } + + if (event.type !== 'm.room.message') return { kind: 'unknown' }; + + const msgtype = event.content?.msgtype; + + if (msgtype === 'm.image') { + // Edits replace `body` by spec; bridgev2 ALSO mirrors the new URL into + // `m.new_content.body`. Prefer `m.new_content.body` when present (so an + // older SDK pre-flattening edit content still lets us extract the new + // token) and fall back to `body`. + const newContent = isObject(event.content['m.new_content']) + ? (event.content['m.new_content'] as { body?: unknown }) + : undefined; + const editedBody = + typeof newContent?.body === 'string' ? newContent.body : undefined; + const directBody = typeof event.content.body === 'string' ? event.content.body : ''; + const body = editedBody ?? directBody; + + const match = body.match(TG_LOGIN_URL_RE); + if (!match) return { kind: 'unknown' }; + + const relatesTo = isObject(event.content['m.relates_to']) + ? (event.content['m.relates_to'] as { rel_type?: unknown; event_id?: unknown }) + : undefined; + const replacesEventId = + relatesTo?.rel_type === 'm.replace' && typeof relatesTo.event_id === 'string' + ? relatesTo.event_id + : undefined; + + return { + kind: 'qr_displayed', + tgUrl: match[0], + eventId: event.event_id, + replacesEventId, + }; + } + + if (msgtype !== 'm.text' && msgtype !== 'm.notice') return { kind: 'unknown' }; + + const body = typeof event.content.body === 'string' ? event.content.body : ''; + return parseGoV2604(body); +}; + // --- DEV sanity assertions ------------------------------------------------ // Vite tree-shakes this branch in production builds: `import.meta.env.DEV` // is replaced with the literal `false` and the call site collapses, so the @@ -234,6 +330,14 @@ function runSanityChecks(): void { 'Failed to start login: telegram connect timeout', { kind: 'start_failed', reason: 'telegram connect timeout' }, ], + [ + 'Login failed: login process timed out', + { kind: 'login_failed', reason: 'login process timed out' }, + ], + [ + 'Login failed: context canceled', + { kind: 'login_failed', reason: 'context canceled' }, + ], ['Invalid value: must start with +', { kind: 'invalid_value', reason: 'must start with +' }], [ 'Please specify a login flow, e.g. `login phone`.\n\n* `phone` - Login using your Telegram phone number\n* `qr` - Login by scanning a QR code from your phone\n* `bot` - Log in as a bot using the bot token provided by BotFather.\n', @@ -298,6 +402,100 @@ function runSanityChecks(): void { ); } } + + // parseEventGoV2604 — exercises the full-event dispatch (m.image, + // m.room.redaction, m.notice fall-through). Same throw-on-mismatch + // pattern as the body-only parser cases above. + const eventCases: Array<[ParsableEvent, LoginEvent]> = [ + [ + { + type: 'm.room.message', + event_id: '$qr1', + sender: '@telegrambot:vojo.chat', + content: { msgtype: 'm.image', body: 'tg://login?token=ABCDEF' }, + }, + { kind: 'qr_displayed', tgUrl: 'tg://login?token=ABCDEF', eventId: '$qr1' }, + ], + [ + // QR rotation edit — `m.relates_to.rel_type=m.replace` + new body + // inside `m.new_content.body`. The edited token must take precedence + // over the literal `body` (which the sender SDK may keep as the + // original to satisfy clients that don't render edits). + { + type: 'm.room.message', + event_id: '$qr2', + sender: '@telegrambot:vojo.chat', + content: { + msgtype: 'm.image', + body: 'tg://login?token=OLD', + 'm.relates_to': { rel_type: 'm.replace', event_id: '$qr1' }, + 'm.new_content': { msgtype: 'm.image', body: 'tg://login?token=ROTATED' }, + }, + }, + { + kind: 'qr_displayed', + tgUrl: 'tg://login?token=ROTATED', + eventId: '$qr2', + replacesEventId: '$qr1', + }, + ], + [ + // Bare m.image without a tg URL — the bridge has no business sending + // these to the control DM, but if it does we keep the line as + // unknown (transcript surfaces a diag, no QR-state mutation). + { + type: 'm.room.message', + event_id: '$rand', + sender: '@telegrambot:vojo.chat', + content: { msgtype: 'm.image', body: 'random non-tg image caption' }, + }, + { kind: 'unknown' }, + ], + [ + // Redaction — top-level `redacts` (host sanitizer mirrors at top-level). + { + type: 'm.room.redaction', + event_id: '$red1', + sender: '@telegrambot:vojo.chat', + content: { redacts: '$qr1' }, + redacts: '$qr1', + }, + { kind: 'qr_redacted', redactsEventId: '$qr1' }, + ], + [ + // Redaction missing target — the sanitizer should already reject this, + // but defence-in-depth: parser declines to invent a target. + { + type: 'm.room.redaction', + event_id: '$red2', + sender: '@telegrambot:vojo.chat', + content: {}, + }, + { kind: 'unknown' }, + ], + [ + // m.notice fall-through — preserves existing behaviour for plain + // text replies that already had body-side parser coverage. + { + type: 'm.room.message', + event_id: '$n1', + sender: '@telegrambot:vojo.chat', + content: { msgtype: 'm.notice', body: "You're not logged in" }, + }, + { kind: 'not_logged_in' }, + ], + ]; + + for (const [event, expected] of eventCases) { + const actual = parseEventGoV2604(event); + if (!sameEvent(actual, expected)) { + // eslint-disable-next-line no-console + console.error('[go_v2604 event sanity] mismatch', { event, actual, expected }); + throw new Error( + `go_v2604 event-parser sanity failed for type=${event.type} msgtype=${event.content?.msgtype ?? ''}` + ); + } + } } function sameEvent(a: LoginEvent, b: LoginEvent): boolean { diff --git a/apps/widget-telegram/src/bridge-protocol/parser.ts b/apps/widget-telegram/src/bridge-protocol/parser.ts index d6bb32a0..7f3745a3 100644 --- a/apps/widget-telegram/src/bridge-protocol/parser.ts +++ b/apps/widget-telegram/src/bridge-protocol/parser.ts @@ -1,14 +1,17 @@ -// Parser shim. The widget consumes a single `parseReply(body)` from -// elsewhere; this file picks the active dialect. M12 ships exactly one — -// `go_v2604` — for the operator's current bridge image. When bridgev2 -// strings drift in a future Go release, add a sibling dialect file and -// switch the import below. +// Parser shim. The widget consumes a single `parseEvent(rawEvent)` and +// the dialect handles the full event surface — m.text, m.notice, m.image +// (QR broadcasts), m.room.redaction (post-scan cleanup). M13 ships one +// dialect, `go_v2604`, for the operator's current bridge image. When +// bridgev2 strings drift in a future Go release, add a sibling dialect +// file and switch the import below. // // The dialects/ subdirectory is kept as a seam for that swap; we don't // implement runtime autodetect (the operator owns one bridge image at a // time and a parser pin is honest about that). -import type { LoginEvent } from './types'; -import { parseGoV2604 } from './dialects/go_v2604'; +import type { LoginEvent, ParsableEvent } from './types'; +import { parseEventGoV2604 } from './dialects/go_v2604'; -export const parseReply = (body: string): LoginEvent => parseGoV2604(body); +export type { ParsableEvent }; + +export const parseEvent = (event: ParsableEvent): LoginEvent => parseEventGoV2604(event); diff --git a/apps/widget-telegram/src/bridge-protocol/types.ts b/apps/widget-telegram/src/bridge-protocol/types.ts index 0ea8b863..6d480176 100644 --- a/apps/widget-telegram/src/bridge-protocol/types.ts +++ b/apps/widget-telegram/src/bridge-protocol/types.ts @@ -17,6 +17,19 @@ export type ListedLogin = { state: string; }; +// Shape of an inbound event the dialect parser needs to look at. Matches +// the wire shape produced by the host's BotWidgetDriver sanitizer; declared +// here (not in widget-api.ts) so the dialect doesn't import from the +// transport layer. +export type ParsableEvent = { + type: string; + event_id: string; + sender: string; + origin_server_ts?: number; + content: { msgtype?: string; body?: string; [k: string]: unknown }; + redacts?: string; +}; + export type LoginEvent = | { kind: 'logins_listed'; logins: ListedLogin[] } | { kind: 'not_logged_in' } @@ -44,4 +57,27 @@ export type LoginEvent = | { kind: 'submit_failed'; reason?: string } | { kind: 'prepare_failed'; reason?: string } | { kind: 'start_failed'; reason?: string } + // bridgev2/commands/login.go:366 — `Login failed: ` after a + // display-and-wait branch returns an error from `login.Wait()`. Most + // common reasons: server-side `login process timed out` (10-min + // LoginTimeout in pkg/connector/loginqr.go:43) and `context canceled` + // when the user cancelled mid-QR (we've usually already moved to + // disconnected via cancel_pending in that case — see reducer). + | { kind: 'login_failed'; reason?: string } + // QR-login lifecycle (M13). The bridge ships `m.image` events whose + // `body` carries the raw `tg://login?token=...` URL; the widget renders + // the QR client-side from that URL and never touches the uploaded PNG. + // `replacesEventId` is set when this event is an `m.replace` edit of a + // prior QR event — the bridge rotates the token roughly every 30 s + // (anti-replay per Telegram MTProto spec) and edits the original event + // each time, so subsequent rotations carry the original event_id in + // `m.relates_to.event_id`. The widget treats that as «same QR-flow, + // updated payload» and just repaints; without it, every rotation would + // re-issue the «awaiting_qr_scan» state and reset transient form state. + | { kind: 'qr_displayed'; tgUrl: string; eventId: string; replacesEventId?: string } + // Bridge redacted the QR event after a successful scan. NOT terminal — + // a 2FA prompt or login success line typically follows; the state + // machine moves us into a `qr_verifying` interstitial until the next + // signal lands. + | { kind: 'qr_redacted'; redactsEventId: string } | { kind: 'unknown' }; diff --git a/apps/widget-telegram/src/i18n/en.ts b/apps/widget-telegram/src/i18n/en.ts index c95396dd..accd21e9 100644 --- a/apps/widget-telegram/src/i18n/en.ts +++ b/apps/widget-telegram/src/i18n/en.ts @@ -9,8 +9,11 @@ export const EN: Record = { 'status.connected': 'Telegram linked', 'status.connected-as': 'Telegram linked as {handle}', 'status.logging-out': 'Signing out…', - 'card.login.name': 'Sign in to Telegram', - 'card.login.desc': 'By phone number, with an SMS code', + 'status.qr-verifying': 'Verifying sign-in…', + 'card.login.name': 'Sign in by phone number', + 'card.login.desc': 'Code arrives in Telegram or via SMS', + 'card.login-qr.name': 'Sign in with QR code', + 'card.login-qr.desc': 'Scan a QR code from the Telegram app on your phone', 'card.refresh.aria': 'Refresh status', 'card.refresh.label': 'Refresh status', 'card.refresh.name': 'Refresh status', @@ -54,6 +57,15 @@ export const EN: Record = { 'auth-card.waiting-hint': 'The bot is still thinking… replies may take up to 30 seconds.', 'auth-card.code.countdown': 'Code arriving in {seconds}s', 'auth-card.code.countdown-done': 'No code yet — tap Cancel and try again.', + 'auth-card.qr.title': 'QR code sign-in', + 'auth-card.qr.hint': 'Open Telegram on your phone and scan this QR code.', + 'auth-card.qr.preparing': 'Preparing QR code…', + 'auth-card.qr.aria': 'QR code for Telegram sign-in. Scan it with your phone.', + 'auth-card.qr.countdown': 'Time left to scan: {minutes}:{seconds}', + 'auth-card.qr.expired': 'Sign-in window expired. Tap Cancel and try again.', + 'auth-card.qr.step-1': 'Open Settings → Devices in the Telegram app.', + 'auth-card.qr.step-2': 'Tap “Link Device” and scan this QR code.', + 'auth-card.qr.step-3': 'If two-step verification is on, enter your cloud password on the next step.', 'auth-error.invalid-code': 'Code is invalid. Please try again.', 'auth-error.wrong-password': 'Password is incorrect. Please try again.', 'auth-error.invalid-value': 'Value not accepted: {reason}', @@ -77,6 +89,8 @@ export const EN: Record = { 'diag.send-failed': 'send failed: {message}', 'diag.history-marker': '─── history ───', 'diag.history-unavailable': 'Could not read history — re-checking status.', + 'diag.qr-issued': 'QR code refreshed.', + 'diag.qr-consumed': 'QR code consumed — bridge confirmed the scan.', 'bootstrap.failed': 'Widget failed to start', 'bootstrap.missing-params': 'Missing required URL params: {names}.', 'bootstrap.embedded-only': 'This page is meant to be embedded by Vojo at {route}.', diff --git a/apps/widget-telegram/src/i18n/ru.ts b/apps/widget-telegram/src/i18n/ru.ts index b2b9ea92..a9dd9424 100644 --- a/apps/widget-telegram/src/i18n/ru.ts +++ b/apps/widget-telegram/src/i18n/ru.ts @@ -22,13 +22,22 @@ export const RU = { 'status.connected': 'Telegram привязан', 'status.connected-as': 'Telegram привязан как {handle}', 'status.logging-out': 'Завершение сеанса…', + // QR-вход: после успешного скана мост стирает QR и переходит к 2FA или + // подтверждению логина. Это короткий промежуточный pill между скан-моментом + // и реальным результатом — обычно секунды. + 'status.qr-verifying': 'Проверяем вход…', // --- Section headers --------------------------------------------------- // Human-readable name; bridgev2's `!tg login` is sent under the hood, but // surfacing «/login» on the button makes the UI read like a CLI. - 'card.login.name': 'Войти в Telegram', + 'card.login.name': 'Войти по номеру', // Card desc is descriptive (noun-style), not a third call-to-action — the - // section status carries state, the card carries action + how-to. - 'card.login.desc': 'По номеру телефона, через SMS-код', + // section status carries state, the card carries action + how-to. The + // mention of «приложение или SMS» reflects Telegram's actual delivery: + // for users already logged in on another device the OTP arrives as a + // Telegram-app push first, only falling back to SMS if no other session. + 'card.login.desc': 'Код придёт в Telegram или по SMS', + 'card.login-qr.name': 'Войти по QR-коду', + 'card.login-qr.desc': 'Отсканировать QR из приложения Telegram на телефоне', 'card.refresh.aria': 'Обновить статус', 'card.refresh.label': 'Обновить статус', // Refresh-as-card variant for the disconnected state where it sits in @@ -81,6 +90,27 @@ export const RU = { 'auth-card.waiting-hint': 'Бот ещё думает… ответ может идти до 30 секунд.', 'auth-card.code.countdown': 'Код придёт через {seconds} сек', 'auth-card.code.countdown-done': 'Не пришло — нажмите «Отмена» и попробуйте снова.', + // --- QR form ----------------------------------------------------------- + // Заголовок и подсказка над самим QR. Шаги ниже расписывают, где открыть + // сканер в приложении Telegram — без этого у пользователя без опыта + // обычно теряется минута на поиски пункта меню. + 'auth-card.qr.title': 'Вход по QR-коду', + 'auth-card.qr.hint': 'Откройте Telegram на телефоне и отсканируйте этот QR-код.', + 'auth-card.qr.preparing': 'Готовим QR-код…', + 'auth-card.qr.aria': 'QR-код для входа в Telegram. Отсканируйте его телефоном.', + // Обратный отсчёт до серверного таймаута моста (10 минут). Сам QR + // ротируется ~раз в 30 секунд (Telegram-серверный пуш через MTProto), + // и тут отображается всегда свежий — отсчёт показывает оставшееся + // окно ВСЕГО ВХОДА, а не валидность конкретного отображаемого QR. + // Формат «MM:SS» нагляднее «через N секунд» при минутном масштабе. + 'auth-card.qr.countdown': 'На сканирование осталось {minutes}:{seconds}', + 'auth-card.qr.expired': 'Окно входа истекло. Нажмите «Отмена» и попробуйте снова.', + // Шаги для пользователя — соответствуют пути в актуальной версии Telegram + // на момент M13. Если Telegram перенесёт пункт меню, это правится тут + // одной строкой; код кнопок не зависит от текста шагов. + 'auth-card.qr.step-1': 'Откройте «Настройки → Устройства» в Telegram.', + 'auth-card.qr.step-2': 'Нажмите «Подключить устройство» и отсканируйте этот QR-код.', + 'auth-card.qr.step-3': 'Если включён облачный пароль — введите его в следующем шаге.', // --- Inline errors ----------------------------------------------------- 'auth-error.invalid-code': 'Код неверный. Попробуйте снова.', 'auth-error.wrong-password': 'Пароль неверный. Попробуйте снова.', @@ -109,6 +139,12 @@ export const RU = { 'diag.send-failed': 'ошибка отправки: {message}', 'diag.history-marker': '─── история ───', 'diag.history-unavailable': 'Не удалось прочитать историю — проверяю статус заново.', + // QR-сообщения никогда не выводятся целиком в transcript — body содержит + // токен `tg://login?token=…`, который мост стирает после скана; сохранять + // его в DOM-логе виджета означало бы пережить эту защиту. Поэтому в логе + // только нейтральные диагностические строки. + 'diag.qr-issued': 'QR-код обновлён.', + 'diag.qr-consumed': 'QR-код использован — мост подтверждает скан.', // --- Bootstrap failure ------------------------------------------------- 'bootstrap.failed': 'Widget не запустился', 'bootstrap.missing-params': 'Отсутствуют обязательные параметры URL: {names}.', diff --git a/apps/widget-telegram/src/state.ts b/apps/widget-telegram/src/state.ts index 9fd012e3..d454bc7d 100644 --- a/apps/widget-telegram/src/state.ts +++ b/apps/widget-telegram/src/state.ts @@ -31,12 +31,34 @@ export type LoginErrorFlag = | { kind: 'unknown_command' }; // A live form is open and waiting for user input. M12.5's hydrate path -// can ONLY ever produce one of these — every other final state falls -// through to live `list-logins` reconciliation. +// can produce a phone/code/password form OR a QR-scan state — every other +// final state falls through to live `list-logins` reconciliation. +// +// `awaiting_qr_scan` carries: +// tgUrl — `tg://login?token=...` to render as a QR matrix. +// qrEventId — current event id of the QR `m.image`. The bridge +// rotates the token ~every 30 s and edits the original +// event; rotations carry the original id in +// `m.relates_to.event_id` and the state machine matches +// on this field to decide between «same flow, repaint» +// and «something else replaced our QR» (the latter is a +// no-op — we keep the current qrEventId until the bridge +// redacts or sends a new top-level QR). +// firstShownAt — wall-clock ts of the first QR render in this flow. +// Drives the UX countdown to the bridge's 10-min server- +// side LoginTimeout. NOT a hard kill — when the timer +// expires we just show «попробуйте снова». export type PendingFormState = | { kind: 'awaiting_phone'; lastError?: LoginErrorFlag } | { kind: 'awaiting_code'; lastError?: LoginErrorFlag } - | { kind: 'awaiting_password'; lastError?: LoginErrorFlag }; + | { kind: 'awaiting_password'; lastError?: LoginErrorFlag } + | { + kind: 'awaiting_qr_scan'; + tgUrl: string; + qrEventId: string; + firstShownAt: number; + lastError?: LoginErrorFlag; + }; export type LoginState = // Pre-handshake / pre-list-logins. Status pill: --faint. @@ -49,7 +71,16 @@ export type LoginState = // After code submit (when the bot decided 2fa is needed) — waiting for // password submission. lastError carries `wrong_password` after a failed // password retry. Status pill: --amber for all three. + // `awaiting_qr_scan` is the QR-login analog of `awaiting_phone` — the + // bridge has fired its first `m.image` carrying a `tg://login?token=…` + // URL and we're waiting for the user to scan it on their phone. | PendingFormState + // QR was redacted (i.e. the bridge accepted a scan), but we don't yet + // know whether 2FA is required or login succeeded outright. Held as an + // intermediate spinner until the next bridge signal arrives. Status + // pill: --amber. NOT terminal — `twofa_required` lifts us into + // `awaiting_password`, `login_success` into `connected`. + | { kind: 'qr_verifying' } // logout in flight — waiting for `Logged out`. Status pill: --amber. | { kind: 'logging_out'; loginId: string } // Live session. login carries the parsed handle/numericId from @@ -62,6 +93,14 @@ export type LoginState = loginId?: string; }; +// States that the hydrate path can restore after a reload. Equals +// PendingFormState (live forms waiting for input) plus `qr_verifying` +// (the brief interstitial after a successful QR scan but before the bot +// emits twofa_required / login_success). Without `qr_verifying` here a +// reload during that ~1 s gap reads the bridge's empty list-logins and +// routes the user to disconnected, losing the scanned QR. +export type HydrateRestoredState = PendingFormState | { kind: 'qr_verifying' }; + // Outbound user actions the App dispatches. Form-submit actions clear any // pending lastError; structural transitions (start_login, request_logout, // cancel_pending) optimistically advance state — the App rolls them back @@ -69,12 +108,13 @@ export type LoginState = export type LoginAction = | { kind: 'event'; event: LoginEvent } | { kind: 'start_login' } // user clicked "Войти по номеру" + | { kind: 'start_qr_login' } // user clicked "Войти по QR-коду" | { kind: 'submit_phone' } // user clicked submit on phone form | { kind: 'submit_code' } // user clicked submit on code form | { kind: 'submit_password' } // user clicked submit on 2fa form | { kind: 'request_logout'; loginId: string } // user clicked "Выйти" | { kind: 'cancel_pending' } // user clicked "Отмена" - | { kind: 'hydrate'; state: PendingFormState }; // M12.5 timeline-resume seed + | { kind: 'hydrate'; state: HydrateRestoredState }; // M12.5 timeline-resume seed export const initialLoginState: LoginState = { kind: 'unknown' }; @@ -111,7 +151,17 @@ const acceptsTwofa = (s: LoginState): boolean => s.kind === 'awaiting_code'; // when no form is open. Shared by the live reducer and the hydrate path — // the predicate body and the resulting type are identical. const isFormState = (s: LoginState): s is PendingFormState => - s.kind === 'awaiting_phone' || s.kind === 'awaiting_code' || s.kind === 'awaiting_password'; + s.kind === 'awaiting_phone' || + s.kind === 'awaiting_code' || + s.kind === 'awaiting_password' || + s.kind === 'awaiting_qr_scan'; + +// Whether `twofa_required` is plausible from the current state. After a code +// submit, after a successful QR scan (which enters qr_verifying), and as a +// late re-entry from awaiting_qr_scan if the bridge skips its redaction +// step (shouldn't happen against bridgev2 v0.2604.0, but the path exists). +const acceptsQrScanTwofa = (s: LoginState): boolean => + s.kind === 'awaiting_qr_scan' || s.kind === 'qr_verifying'; export const loginReducer = (state: LoginState, action: LoginAction): LoginState => { if (action.kind === 'hydrate') { @@ -130,6 +180,19 @@ export const loginReducer = (state: LoginState, action: LoginAction): LoginState if (action.kind === 'start_login') { return { kind: 'awaiting_phone' }; } + if (action.kind === 'start_qr_login') { + // Optimistic transition into a placeholder QR-scan state. The actual QR + // payload arrives as a `qr_displayed` live event and overwrites tgUrl + // / qrEventId / firstShownAt then; until then the panel renders a + // spinner («Готовим QR-код…»). If the `!tg login qr` send fails, the + // App rolls back to `disconnected`. + return { + kind: 'awaiting_qr_scan', + tgUrl: '', + qrEventId: '', + firstShownAt: Date.now(), + }; + } if (action.kind === 'submit_phone') { // Stay on the phone form until the bot confirms with `awaiting_code`. // Optimistic transition to awaiting_code would mis-surface a phone-side @@ -172,10 +235,14 @@ export const loginReducer = (state: LoginState, action: LoginAction): LoginState // `You're not logged in` from a list-logins fired before the user // started a fresh login flow would otherwise wipe an active form. // Accept only from states where flipping to disconnected is correct. + // `qr_verifying` is included because the App fires `list-logins` + // as a recovery probe after long QR-verifying stalls — the answer + // there means «scan didn't actually take», back to disconnected. if ( state.kind === 'unknown' || state.kind === 'disconnected' || - state.kind === 'logging_out' + state.kind === 'logging_out' || + state.kind === 'qr_verifying' ) { return { kind: 'disconnected' }; } @@ -204,9 +271,12 @@ export const loginReducer = (state: LoginState, action: LoginAction): LoginState return state; case 'twofa_required': - // First of the two-reply 2fa transition. Only valid after a code - // submit. Ignored from disconnected/connected/etc. - if (!acceptsTwofa(state)) return state; + // First of the two-reply 2fa transition. Valid after a code submit + // (phone-flow path) AND after a successful QR scan (the bridge + // skips straight from QR redaction to «You have two-factor + // authentication enabled.»). Ignored from disconnected/connected + // and from awaiting_phone (where it'd indicate a bridge bug). + if (!acceptsTwofa(state) && !acceptsQrScanTwofa(state)) return state; return { kind: 'awaiting_password' }; case 'invalid_code': @@ -298,6 +368,26 @@ export const loginReducer = (state: LoginState, action: LoginAction): LoginState case 'start_failed': return { kind: 'disconnected', lastError: { kind: 'start_failed', reason: event.reason } }; + case 'login_failed': + // bridgev2/commands/login.go:366 sends `Login failed: ` after + // the display-and-wait branch's `login.Wait()` returns. The error + // string we get here splits cleanly in two: + // + // 1. `context canceled` — fires whenever a `!tg cancel` tears + // down a running login flow. ALWAYS a no-op for our state: + // it's an echo of OUR cancel (or of an auto-cancel during a + // cancel-race recovery). If we transitioned to disconnected + // here, a stale «context canceled» from a previous flow + // could clobber a brand-new QR flow the user just started — + // observed in prod 2026-05-04 logs as a state-flapping loop. + // + // 2. anything else (most commonly `login process timed out` + // after the 10-min server-side LoginTimeout) — real failure + // of the live flow; route to disconnected with the warning. + if (event.reason === 'context canceled') return state; + if (state.kind === 'disconnected') return state; + return { kind: 'disconnected', lastError: { kind: 'start_failed', reason: event.reason } }; + case 'flow_required': case 'flow_invalid': // We always send `login phone` so this shouldn't happen. If it does, @@ -311,6 +401,92 @@ export const loginReducer = (state: LoginState, action: LoginAction): LoginState // loudly on the disconnected screen so the misconfig is visible. return { kind: 'disconnected', lastError: { kind: 'unknown_command' } }; + case 'qr_displayed': { + // `qrEventId` tracks the ORIGINAL bridge event — bridgev2 emits the + // QR as a single `m.image`, then on each token rotation (every ~30 s + // per Telegram MTProto QR-auth spec) edits the SAME event with + // `m.relates_to.rel_type=m.replace` + `m.relates_to.event_id=`. + // The eventual redaction also targets the original. So we only ever + // bind to the original id and repaint tgUrl on edits. + + // Defence-in-depth: an inbound `qr_displayed` MUST carry a non-empty + // event id (otherwise an adversarial bridge / spoofed event could + // land in the placeholder slot and never be dislodged because every + // subsequent check would also see empty ids). The parser produces + // `eventId: event.event_id` and the host driver rejects events with + // empty event_id at the sanitizer; this is a redundant guard. + if (event.eventId.length === 0) return state; + + // Initial QR for this flow — set both anchors. We accept from: + // * `unknown` — cold-start before list-logins resolves; + // * placeholder `awaiting_qr_scan{qrEventId=''}` set optimistically + // by `start_qr_login`; + // * `disconnected` — handles bridgev2's startup race. If the user + // clicks Cancel while bridge is still in `auth_key generation` + // (~2 s), the cancel arrives BEFORE the bridge's CommandState + // is registered, so it replies «No ongoing command» (cancel_no_op, + // state→disconnected via cancel_pending). Bridge then continues + // with the original login as if cancel never happened, and a + // few seconds later emits the m.image. Accepting from + // `disconnected` re-surfaces that QR so the user can either scan + // it or click Cancel again (this time the bridge has a real + // CommandState and the cancel will actually take). REJECTING + // here causes the user to be stuck on a disconnected screen + // while the bridge is happily hosting a 10-min QR-display-and- + // wait — bad UX, observed on production 2026-05-04. + if ( + state.kind === 'unknown' || + state.kind === 'disconnected' || + (state.kind === 'awaiting_qr_scan' && state.qrEventId === '') + ) { + return { + kind: 'awaiting_qr_scan', + tgUrl: event.tgUrl, + qrEventId: event.eventId, + firstShownAt: + state.kind === 'awaiting_qr_scan' && state.firstShownAt + ? state.firstShownAt + : Date.now(), + }; + } + + if (state.kind !== 'awaiting_qr_scan') return state; + + // Rotation edit pointing at our original — repaint tgUrl, keep id. + if (event.replacesEventId === state.qrEventId) { + return { ...state, tgUrl: event.tgUrl }; + } + + // A fresh non-edit qr_displayed while we're already tracking one. + // Could be the bridge restarting the QR-login internally (rare). + // Adopt the new event as the new anchor — the old one will be + // either redacted or simply abandoned by the bridge. + if (!event.replacesEventId) { + return { + kind: 'awaiting_qr_scan', + tgUrl: event.tgUrl, + qrEventId: event.eventId, + firstShownAt: Date.now(), + }; + } + + // Edit pointing at something we don't track — ignore. Don't let + // foreign edits or stale-on-redacted events destabilise the panel. + return state; + } + + case 'qr_redacted': { + // Bridge cleaned up the QR after a successful scan. Held as + // `qr_verifying` until the next signal (twofa_required or + // login_success) lands. Only honour from awaiting_qr_scan with a + // matching event id — a redaction targeting some unrelated event + // (or a redaction arriving while we're already past the QR step) + // must not destabilise the current state. + if (state.kind !== 'awaiting_qr_scan') return state; + if (state.qrEventId !== event.redactsEventId) return state; + return { kind: 'qr_verifying' }; + } + case 'unknown': return state; @@ -373,22 +549,32 @@ type HydrateAccumulator = { // Apply one event with permissive rules. Unlike the live reducer, every // transition is allowed from any predecessor — we're rebuilding past truth, // not protecting against late races. -const stepHydrate = (acc: HydrateAccumulator, input: HydrateInput): HydrateAccumulator => { +const stepHydrate = ( + prevAcc: HydrateAccumulator, + input: HydrateInput +): HydrateAccumulator => { const { ev, ts } = input; // After a terminal event (cancel_ok / logout_ok / login_success / …) we // normally stop tracking — anything that follows is by definition a fresh - // flow that the live `list-logins` will reconcile. EXCEPT for one case: + // flow that the live `list-logins` will reconcile. EXCEPT for two cases: // if `awaiting_phone` shows up, that IS the bridgev2 signature of `!tg - // login phone` being re-issued. The user cancelled (or finished) and is - // now logging in again; the chain should resume tracking from the new + // login phone` being re-issued; if `qr_displayed` shows up, that's the + // same pattern for `!tg login qr`. The user cancelled (or finished) and + // is now logging in again; the chain should resume tracking from the new // start. Without this re-entry, sequences like // [awaiting_code, cancel_ok, awaiting_phone, awaiting_code] // (cancel-then-restart, mid-code) would return null and regress the very // M12.5 bug we set out to fix. - if (acc.terminated && ev.kind !== 'awaiting_phone') { - return acc; + if (prevAcc.terminated && ev.kind !== 'awaiting_phone' && ev.kind !== 'qr_displayed') { + return prevAcc; } + // Restart-on-re-entry: clear the terminated bit AND any prior tracked + // state so the new flow's first event becomes the new anchor without + // inheriting the old QR's eventId. + const acc: HydrateAccumulator = prevAcc.terminated + ? { state: { kind: 'unknown' }, pendingTs: null, terminated: false } + : prevAcc; switch (ev.kind) { case 'awaiting_phone': @@ -403,6 +589,62 @@ const stepHydrate = (acc: HydrateAccumulator, input: HydrateInput): HydrateAccum case 'twofa_required': return { state: { kind: 'awaiting_password' }, pendingTs: ts, terminated: false }; + case 'qr_displayed': { + // Same anchor logic as the live reducer: qrEventId tracks the + // ORIGINAL event, edits repaint tgUrl. In hydrate we always start + // from `unknown` and walk past→present, so the original is the + // first qr_displayed without a `replacesEventId` we've already + // adopted. + if (acc.state.kind !== 'awaiting_qr_scan') { + return { + state: { + kind: 'awaiting_qr_scan', + tgUrl: ev.tgUrl, + qrEventId: ev.eventId, + firstShownAt: ts, + }, + pendingTs: ts, + terminated: false, + }; + } + if (ev.replacesEventId === acc.state.qrEventId) { + return { + state: { ...acc.state, tgUrl: ev.tgUrl }, + pendingTs: ts, + terminated: false, + }; + } + if (!ev.replacesEventId) { + return { + state: { + kind: 'awaiting_qr_scan', + tgUrl: ev.tgUrl, + qrEventId: ev.eventId, + firstShownAt: ts, + }, + pendingTs: ts, + terminated: false, + }; + } + return acc; + } + + case 'qr_redacted': { + // QR was consumed by a successful scan in the past. NOT terminal — + // a 2FA prompt or login_success typically follows in the same + // scan window, and reload-after-scan-but-before-2FA-submit MUST + // restore the password form (otherwise the user reloads, sees + // `disconnected`, list-logins replies «You're not logged in» + // because the bridge hasn't completed login yet, and the user + // has to restart the QR flow from scratch — losing the scan). + // Move into `qr_verifying` (interstitial) and keep the chain + // open so subsequent twofa_required / awaiting_password can lift + // us into the password form. + if (acc.state.kind !== 'awaiting_qr_scan') return acc; + if (acc.state.qrEventId !== ev.redactsEventId) return acc; + return { state: { kind: 'qr_verifying' }, pendingTs: ts, terminated: false }; + } + case 'invalid_code': return { state: { kind: 'awaiting_code', lastError: { kind: 'invalid_code' } }, @@ -434,6 +676,14 @@ const stepHydrate = (acc: HydrateAccumulator, input: HydrateInput): HydrateAccum terminated: false, }; + case 'login_failed': + // `context canceled` is an echo of a previous cancel — never a + // terminal signal for the chain we're hydrating, since the chain + // can immediately re-enter via a fresh `qr_displayed` / `awaiting_phone` + // for a new flow. Treat as a no-op so the chain keeps walking. + if (ev.reason === 'context canceled') return acc; + return { state: acc.state, pendingTs: null, terminated: true }; + // Terminal events — collapse the chain. The state at this point is // whatever-the-bot-confirmed-last; we don't care which, the caller // returns null and lets `list-logins` reconcile. @@ -475,7 +725,7 @@ const stepHydrate = (acc: HydrateAccumulator, input: HydrateInput): HydrateAccum export const hydrateFromTimeline = ( inputs: ReadonlyArray, now: number = Date.now() -): PendingFormState | null => { +): HydrateRestoredState | null => { const acc = inputs.reduce(stepHydrate, { state: { kind: 'unknown' }, pendingTs: null, @@ -483,9 +733,10 @@ export const hydrateFromTimeline = ( }); if (acc.terminated) return null; - if (!isFormState(acc.state)) return null; if (acc.pendingTs === null) return null; if (now - acc.pendingTs > HYDRATE_FRESHNESS_MS) return null; + if (acc.state.kind === 'qr_verifying') return acc.state; + if (!isFormState(acc.state)) return null; return acc.state; }; @@ -659,6 +910,135 @@ function runHydrateSanity(): void { inputs: [{ ev: { kind: 'unknown' }, ts: recent(0) }], expected: null, }, + // QR-login hydrate cases (M13) + { + name: 'lone qr_displayed → awaiting_qr_scan', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + ], + expected: { + kind: 'awaiting_qr_scan', + tgUrl: 'tg://login?token=A', + qrEventId: '$qrA', + firstShownAt: recent(0), + }, + }, + { + name: 'qr rotation edits → repaint url, keep original event id', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + { + ev: { + kind: 'qr_displayed', + tgUrl: 'tg://login?token=B', + eventId: '$qrEdit1', + replacesEventId: '$qrA', + }, + ts: recent(30000), + }, + { + ev: { + kind: 'qr_displayed', + tgUrl: 'tg://login?token=C', + eventId: '$qrEdit2', + replacesEventId: '$qrA', + }, + ts: recent(60000), + }, + ], + expected: { + kind: 'awaiting_qr_scan', + tgUrl: 'tg://login?token=C', + qrEventId: '$qrA', + firstShownAt: recent(0), + }, + }, + { + name: 'qr_redacted with mismatched target → ignored', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + { ev: { kind: 'qr_redacted', redactsEventId: '$other' }, ts: recent(30000) }, + ], + expected: { + kind: 'awaiting_qr_scan', + tgUrl: 'tg://login?token=A', + qrEventId: '$qrA', + firstShownAt: recent(0), + }, + }, + { + name: 'qr scan → twofa pair → awaiting_password (mid-flow reload restores the password form)', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + { ev: { kind: 'qr_redacted', redactsEventId: '$qrA' }, ts: recent(30000) }, + { ev: { kind: 'twofa_required' }, ts: recent(31000) }, + { ev: { kind: 'awaiting_password' }, ts: recent(31100) }, + ], + // qr_redacted moves into qr_verifying without terminating the + // chain — twofa_required + awaiting_password follow and lift the + // chain into the password form. Without this the user would + // reload mid-2FA and lose the QR scan progress. + expected: { kind: 'awaiting_password' }, + }, + { + name: 'qr scan → no follow-up → qr_verifying (reload during the 1 s gap)', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + { ev: { kind: 'qr_redacted', redactsEventId: '$qrA' }, ts: recent(30000) }, + ], + // No twofa_required / login_success in this scan — bridge hasn't + // emitted the next signal yet. Restore qr_verifying so the user + // sees the «Проверяем вход…» pill on reload instead of a flash + // of disconnected. + expected: { kind: 'qr_verifying' }, + }, + { + name: 'qr scan → login_success → null (terminal — let list-logins reconcile)', + inputs: [ + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=A', eventId: '$qrA' }, + ts: recent(0), + }, + { ev: { kind: 'qr_redacted', redactsEventId: '$qrA' }, ts: recent(30000) }, + { + ev: { kind: 'login_success', handle: '@x', numericId: '1' }, + ts: recent(31000), + }, + ], + expected: null, + }, + { + name: 'cancel-then-qr-restart → awaiting_qr_scan', + inputs: [ + { ev: { kind: 'awaiting_phone' }, ts: recent(0) }, + { ev: { kind: 'cancel_ok' }, ts: recent(1000) }, + { + ev: { kind: 'qr_displayed', tgUrl: 'tg://login?token=Z', eventId: '$qrZ' }, + ts: recent(2000), + }, + ], + expected: { + kind: 'awaiting_qr_scan', + tgUrl: 'tg://login?token=Z', + qrEventId: '$qrZ', + firstShownAt: recent(2000), + }, + }, ]; for (const c of cases) { diff --git a/apps/widget-telegram/src/styles.css b/apps/widget-telegram/src/styles.css index 4e57b782..1cce97bd 100644 --- a/apps/widget-telegram/src/styles.css +++ b/apps/widget-telegram/src/styles.css @@ -636,6 +636,78 @@ body { color: var(--amber); } +/* ── QR-login panel ─────────────────────────────────────────────── */ + +/* Override the auth-card row layout — QR panel stacks vertically with the + * matrix as the visual anchor. Keeps the same outer chrome (border, radius, + * padding) so it reads as a sibling to the phone/code/password forms. */ +.auth-card-qr { + align-items: stretch; +} + +/* The QR matrix sits on a hard #fff plate regardless of theme — phone + * camera scanners need maximum contrast, and the bridge's PNG fallback + * also bakes in a white background. The frame is centered, fixed-size, + * with a soft inner padding so the quiet zone (already 4 modules in the + * SVG itself) is reinforced visually for low-contrast displays. */ +.auth-card-qr-frame { + align-self: center; + background: #fff; + border-radius: 12px; + padding: 14px; + display: inline-flex; + align-items: center; + justify-content: center; + /* Lock the inner box to the SVG's rendered size so the placeholder + * variant doesn't collapse to zero height while the matrix is being + * computed (`buildQrModules` is synchronous but the first React commit + * after `start_qr_login` flips state with tgUrl='', and we want the + * placeholder to occupy the same footprint). */ + min-width: 260px; + min-height: 260px; + /* Drop a subtle outer shadow so the white plate visually separates from + * the surrounding dark surface — without this the corners look + * paste-on-paper. */ + box-shadow: 0 1px 0 rgba(255, 255, 255, 0.06), 0 12px 24px rgba(0, 0, 0, 0.32); +} + +/* Placeholder while we wait for the bridge's first qr_displayed event. + * Same visual vocabulary as `.section-status.checking`: amber dot + muted + * text — but inverted onto the white plate so the colors work. */ +.auth-card-qr-placeholder { + display: inline-flex; + align-items: center; + gap: 8px; + color: rgba(26, 26, 29, 0.62); + font-size: 13px; + line-height: 20px; + padding: 96px 16px; +} +.auth-card-qr-placeholder .dot { + width: 8px; + height: 8px; + border-radius: 50%; + background: var(--amber); + flex-shrink: 0; +} + +/* Step list under the QR — explicit phone-side instructions matter more + * here than for SMS, because Telegram's «Link Device» menu isn't a place + * users hit often (vs the typing-an-SMS-code muscle memory). */ +.auth-card-qr-steps { + margin: 0; + padding-left: 1.4em; + display: flex; + flex-direction: column; + gap: 6px; + font-size: 13px; + line-height: 19px; + color: var(--muted); +} +.auth-card-qr-steps li::marker { + color: var(--faint); +} + @media (max-width: 600px) { .auth-card-row { flex-direction: column; @@ -664,6 +736,18 @@ body { .command-grid { grid-template-columns: minmax(0, 1fr); } + + /* Mobile QR plate — keep edge-to-edge readable. The 232px SVG matches + * desktop, but the surrounding plate gets a smaller min-size to fit + * narrower viewports without horizontal scroll. */ + .auth-card-qr-frame { + min-width: 232px; + min-height: 232px; + padding: 10px; + } + .auth-card-qr-placeholder { + padding: 80px 12px; + } } /* ── Linkified transcript bodies ─────────────────────────────────── */ diff --git a/apps/widget-telegram/src/widget-api.ts b/apps/widget-telegram/src/widget-api.ts index 80f433a5..9730e1a0 100644 --- a/apps/widget-telegram/src/widget-api.ts +++ b/apps/widget-telegram/src/widget-api.ts @@ -22,6 +22,10 @@ export type RoomEvent = { origin_server_ts: number; content: { msgtype?: string; body?: string; [k: string]: unknown }; unsigned: Record; + // `m.room.redaction` events carry `redacts` at the top level (room v < 11) + // and/or inside `content.redacts` (v11+). The host driver mirrors at both + // for forward-compat; the widget-side parser reads either. + redacts?: string; }; type ToWidgetMessage = { @@ -120,12 +124,17 @@ export class WidgetApi { // `readRoomTimeline` (single-room cap-checked) rather than the deprecated // `readRoomEvents` fallback. Driver returns events newest-first; reversing // to chronological order is the caller's job. + // + // `type` defaults to `m.room.message`; pass `m.room.redaction` to scan QR + // post-scan cleanup events. `msgtype` is honoured only for m.room.message + // (matches the driver's `readRoomTimeline` semantics). public async readTimeline(opts: { limit: number; - msgtype?: 'm.text' | 'm.notice'; + type?: 'm.room.message' | 'm.room.redaction'; + msgtype?: 'm.text' | 'm.notice' | 'm.image'; }): Promise { const data: Record = { - type: 'm.room.message', + type: opts.type ?? 'm.room.message', limit: opts.limit, room_ids: [this.bootstrap.roomId], }; @@ -220,11 +229,17 @@ export class WidgetApi { return; } case 'send_event': { - // Live event push from host. We forward only m.room.message — - // m.room.member state updates also arrive here but we don't - // surface them in M11. + // Live event push from host. Forward `m.room.message` (carries the + // bot's notices / errors / `m.image` QR-login broadcasts) AND + // `m.room.redaction` (post-scan QR cleanup, see BotWidgetDriver + // `sanitizeBotWidgetRedactionEvent`). State events (m.room.member) + // also arrive on this channel — we still ignore them here. const data = msg.data as Partial | undefined; - if (data && data.type === 'm.room.message' && data.event_id) { + if ( + data && + data.event_id && + (data.type === 'm.room.message' || data.type === 'm.room.redaction') + ) { this.emit('liveEvent', data as RoomEvent); } this.replyTo(msg, {}); @@ -270,10 +285,18 @@ export class WidgetApi { // Capability set must match docs/plans/bots_tab.md (Phase 3 contract) and // the host's BotWidgetDriver.getBotWidgetCapabilities. Anything else is // silently dropped by the host's validateCapabilities — keep this aligned. +// +// `m.image` and `m.room.redaction` are the QR-login additions (M13). The +// host sanitizer for `m.image` strips `url` / `file` / `info`, leaving only +// `body` (the bridge encodes `tg://login?token=...` there) plus +// `m.relates_to` / `m.new_content` for QR rotation edits. Redactions +// signal that the QR was consumed by a successful scan. export const buildCapabilities = (roomId: string): Capability[] => [ `org.matrix.msc2762.timeline:${roomId}`, 'org.matrix.msc2762.send.event:m.room.message#m.text', 'org.matrix.msc2762.receive.event:m.room.message#m.text', 'org.matrix.msc2762.receive.event:m.room.message#m.notice', + 'org.matrix.msc2762.receive.event:m.room.message#m.image', + 'org.matrix.msc2762.receive.event:m.room.redaction', 'org.matrix.msc2762.receive.state_event:m.room.member', ]; diff --git a/src/app/features/bots/BotWidgetDriver.ts b/src/app/features/bots/BotWidgetDriver.ts index cb695be3..f6d2d212 100644 --- a/src/app/features/bots/BotWidgetDriver.ts +++ b/src/app/features/bots/BotWidgetDriver.ts @@ -28,8 +28,14 @@ const BOT_WIDGET_TIMELINE_LIMIT = 100; const isObject = (value: unknown): value is Record => typeof value === 'object' && value !== null && !Array.isArray(value); -const isAllowedMessageType = (msgtype: unknown): msgtype is 'm.text' | 'm.notice' => - msgtype === 'm.text' || msgtype === 'm.notice'; +// `m.image` is allowed strictly so the widget can drive a Telegram QR-login +// flow: the bridge's `m.image` event carries the raw `tg://login?token=...` +// URL inside `content.body`, which is enough for client-side QR rendering +// without ever shipping the uploaded PNG bytes through the widget API. +// `sanitizeBotWidgetMessageEvent` strictly drops `url` / `file` / `info` / +// `thumbnail_url` etc. for `m.image`, so no media payload escapes the host. +const isAllowedMessageType = (msgtype: unknown): msgtype is 'm.text' | 'm.notice' | 'm.image' => + msgtype === 'm.text' || msgtype === 'm.notice' || msgtype === 'm.image'; export const isSafeBotWidgetRoom = ( mx: MatrixClient, @@ -54,6 +60,40 @@ export const isSafeBotWidgetRoom = ( ); }; +// Pull the relate metadata from a content object. We only forward two narrow +// shapes the QR-login flow needs: +// `m.relates_to` STRICTLY for `rel_type=m.replace` edit pointers; +// `m.new_content` STRICTLY for `msgtype=m.image` replacement bodies. +// Anything else — reactions (`m.annotation`), threads (`m.thread`), references, +// custom relate types, or edits whose new_content claims a non-image +// msgtype — is dropped at the boundary. The widget never opted into reactions +// or threads, and forwarding those would be unexpected privilege creep. +type EditMetadata = { + 'm.relates_to'?: { rel_type: 'm.replace'; event_id: string }; + 'm.new_content'?: { msgtype: 'm.image'; body: string }; +}; + +const pickEditMetadata = (content: Record): EditMetadata => { + const out: EditMetadata = {}; + const relatesTo = content['m.relates_to']; + if ( + isObject(relatesTo) && + relatesTo.rel_type === 'm.replace' && + typeof relatesTo.event_id === 'string' + ) { + out['m.relates_to'] = { rel_type: 'm.replace', event_id: relatesTo.event_id }; + } + const newContent = content['m.new_content']; + if ( + isObject(newContent) && + newContent.msgtype === 'm.image' && + typeof newContent.body === 'string' + ) { + out['m.new_content'] = { msgtype: 'm.image', body: newContent.body }; + } + return out; +}; + export const sanitizeBotWidgetMessageEvent = (rawEvent: IRoomEvent): IRoomEvent | undefined => { if (rawEvent.type !== EventType.RoomMessage) return undefined; if (rawEvent.state_key !== undefined) return undefined; @@ -70,23 +110,85 @@ export const sanitizeBotWidgetMessageEvent = (rawEvent: IRoomEvent): IRoomEvent // a future SDK upgrade could attach extra top-level keys (decryption hints, // local-echo flags, custom org.matrix.* fields) and we never want those to // ride out to a widget that only asked for {msgtype, body}. + const baseContent: Record = { msgtype, body }; + + // For `m.image` the widget needs edit-relate metadata so the QR-rotation + // event-replace stream is recognisable, but it must NEVER receive `url` / + // `file` / `info` / `thumbnail_*` — those would either leak the bridge's + // mxc:// authenticated media URL or open an authenticated-media fetch path + // we have not security-reviewed. Allowlist body+relate-only at the + // boundary; m.text / m.notice keep the strict {msgtype, body} shape. + if (msgtype === 'm.image') { + Object.assign(baseContent, pickEditMetadata(rawEvent.content)); + } + return { type: EventType.RoomMessage, event_id: rawEvent.event_id, room_id: rawEvent.room_id, sender: rawEvent.sender, origin_server_ts: rawEvent.origin_server_ts, - content: { msgtype, body }, + content: baseContent, unsigned: {}, }; }; +// QR-login flow: the bridge redacts the QR `m.image` event after a successful +// scan. The widget consumes the redaction as a signal that the QR is no +// longer valid and the flow has progressed (typically straight into 2FA or +// `Successfully logged in`). We forward only event identification + the +// `redacts` target — no `reason`, no other content fields. +export const sanitizeBotWidgetRedactionEvent = (rawEvent: IRoomEvent): IRoomEvent | undefined => { + if (rawEvent.type !== EventType.RoomRedaction) return undefined; + if (rawEvent.state_key !== undefined) return undefined; + if (typeof rawEvent.event_id !== 'string' || rawEvent.event_id.length === 0) return undefined; + if (typeof rawEvent.room_id !== 'string' || rawEvent.room_id.length === 0) return undefined; + if (typeof rawEvent.sender !== 'string') return undefined; + if (typeof rawEvent.origin_server_ts !== 'number') return undefined; + + // Pre-room-v11 servers carry `redacts` at the top level; v11+ moved it + // into `content.redacts`. Tolerate both — the widget reads only one + // resolved value and doesn't care which wire shape it arrived in. + const topLevelRedacts = (rawEvent as IRoomEvent & { redacts?: unknown }).redacts; + const contentRedacts = isObject(rawEvent.content) + ? (rawEvent.content as { redacts?: unknown }).redacts + : undefined; + let redacts: string | undefined; + if (typeof topLevelRedacts === 'string') redacts = topLevelRedacts; + else if (typeof contentRedacts === 'string') redacts = contentRedacts; + if (!redacts) return undefined; + + return { + type: EventType.RoomRedaction, + event_id: rawEvent.event_id, + room_id: rawEvent.room_id, + sender: rawEvent.sender, + origin_server_ts: rawEvent.origin_server_ts, + content: { redacts }, + // Mirror at the top level so widget-side parsers that read either spot + // still pick it up regardless of room version. + redacts, + unsigned: {}, + } as IRoomEvent; +}; + export const getBotWidgetCapabilities = (roomId: string): Set => new Set([ `org.matrix.msc2762.timeline:${roomId}`, WidgetEventCapability.forRoomMessageEvent(EventDirection.Send, 'm.text').raw, WidgetEventCapability.forRoomMessageEvent(EventDirection.Receive, 'm.text').raw, WidgetEventCapability.forRoomMessageEvent(EventDirection.Receive, 'm.notice').raw, + // Telegram QR login: the bridge ships QR as `m.image` with the raw + // `tg://login?token=...` URL inside `content.body`. The driver + // sanitizer strips `url` / `file` / `info` so the widget gets the + // login-token string for client-side QR rendering and nothing else — + // no mxc resolution, no PNG bytes through postMessage. + WidgetEventCapability.forRoomMessageEvent(EventDirection.Receive, 'm.image').raw, + // `m.room.redaction` lets the widget see the post-scan QR cleanup the + // bridge fires after a successful login. Without this the widget would + // keep re-rendering a stale QR until the 2FA prompt or success line + // arrived (which is not guaranteed in the no-2FA branch). + WidgetEventCapability.forRoomEvent(EventDirection.Receive, EventType.RoomRedaction).raw, WidgetEventCapability.forStateEvent(EventDirection.Receive, EventType.RoomMember).raw, ]); @@ -154,34 +256,36 @@ export class BotWidgetDriver extends WidgetDriver { if (eventType === EventType.RoomMember) { return this.readRoomState(targetRoomId, eventType, stateKey); } - if (eventType !== EventType.RoomMessage) return []; - if (msgtype !== undefined && !isAllowedMessageType(msgtype)) return []; const safeLimit = limit > 0 ? Math.min(limit, BOT_WIDGET_TIMELINE_LIMIT) : BOT_WIDGET_TIMELINE_LIMIT; - const events: MatrixEvent[] = []; + const collected: { ev: MatrixEvent; sanitized: IRoomEvent }[] = []; const timelineEvents = room.getLiveTimeline().getEvents(); for (let i = timelineEvents.length - 1; i >= 0; i -= 1) { - if (events.length >= safeLimit) break; + if (collected.length >= safeLimit) break; const ev = timelineEvents[i]; if (since !== undefined && ev.getId() === since) break; - if (!ev.isState() && ev.getType() === EventType.RoomMessage) { - const content = ev.getContent(); - if ( - isAllowedMessageType(content.msgtype) && - (msgtype === undefined || content.msgtype === msgtype) - ) { - events.push(ev); + + if (!ev.isState() && ev.getType() === eventType) { + if (eventType === EventType.RoomMessage) { + const content = ev.getContent(); + if ( + isAllowedMessageType(content.msgtype) && + (msgtype === undefined || content.msgtype === msgtype) + ) { + const sanitized = sanitizeBotWidgetMessageEvent(ev.getEffectiveEvent() as IRoomEvent); + if (sanitized) collected.push({ ev, sanitized }); + } + } else if (eventType === EventType.RoomRedaction) { + const sanitized = sanitizeBotWidgetRedactionEvent(ev.getEffectiveEvent() as IRoomEvent); + if (sanitized) collected.push({ ev, sanitized }); } } } - return events.flatMap((ev) => { - const sanitized = sanitizeBotWidgetMessageEvent(ev.getEffectiveEvent() as IRoomEvent); - return sanitized ? [sanitized] : []; - }); + return collected.map((c) => c.sanitized); } public async readRoomState( diff --git a/src/app/features/bots/BotWidgetEmbed.ts b/src/app/features/bots/BotWidgetEmbed.ts index 87886313..fce5ac5d 100644 --- a/src/app/features/bots/BotWidgetEmbed.ts +++ b/src/app/features/bots/BotWidgetEmbed.ts @@ -18,7 +18,11 @@ import { } from 'matrix-widget-api'; import { Theme } from '../../hooks/useTheme'; import type { BotPreset } from './catalog'; -import { BotWidgetDriver, sanitizeBotWidgetMessageEvent } from './BotWidgetDriver'; +import { + BotWidgetDriver, + sanitizeBotWidgetMessageEvent, + sanitizeBotWidgetRedactionEvent, +} from './BotWidgetDriver'; export type BotWidgetEmbedOptions = { mx: MatrixClient; @@ -95,6 +99,24 @@ const createBotIframe = (preset: BotPreset): HTMLIFrameElement => { // login flow), as a per-preset opt-in — not as a default. Element-Web's // wider default exists because their widget set includes Element Call; // Phase 2 bot widgets are text-protocol management surfaces. + // + // Threat-model honesty: the sandbox here is STRUCTURAL, not adversarial. + // (a) The widget is served cross-origin (widgets.vojo.chat in prod, + // localhost:8081 in dev) so the documented `allow-scripts` + + // `allow-same-origin` same-origin-escape doesn't apply — same-origin + // refers to the iframe's OWN origin, not the host's. The widget can't + // read host (vojo.chat) localStorage / cookies because it's a + // different origin entirely. + // (b) The actual security boundary against a compromised widget bundle + // is BotWidgetDriver — capability allowlist, sanitizer (only + // `m.text`/`m.notice`/`m.image` fields the bridge needs, no mxc / + // file / info), strict 1:1 room invariant in `isSafeBotWidgetRoom`. + // A hostile bundle that somehow shipped would still see only the + // events the driver hands it. + // If we ever serve the widget same-origin (e.g. inlined as a static + // bundle under /widgets/ on vojo.chat), drop `allow-same-origin` here — + // the postMessage transport doesn't need it, and the same-origin + // sandbox-escape becomes real once the iframe shares the host's origin. iframe.setAttribute('sandbox', 'allow-scripts allow-forms allow-same-origin'); iframe.allow = 'clipboard-write'; iframe.referrerPolicy = 'no-referrer'; @@ -258,7 +280,14 @@ export class BotWidgetEmbed { if (typeof raw.event_id !== 'string' || raw.event_id.length === 0) return; if (this.fedEventIds.has(raw.event_id)) return; - const sanitized = sanitizeBotWidgetMessageEvent(raw as IRoomEvent); + // Dispatch by event type — m.room.message and m.room.redaction take + // different sanitizer paths (the QR-login flow needs both: edits arrive + // as m.image with `m.relates_to`, the post-scan cleanup arrives as a + // separate m.room.redaction targeting the QR event id). + const sanitized = + raw.type === EventType.RoomRedaction + ? sanitizeBotWidgetRedactionEvent(raw as IRoomEvent) + : sanitizeBotWidgetMessageEvent(raw as IRoomEvent); if (!sanitized) return; this.fedEventIds.add(raw.event_id);