feat(room): add voice messages with an in-composer recorder, playback bubble, and per-room disable toggle

This commit is contained in:
heaven 2026-06-05 18:08:03 +03:00
parent d1d2c68393
commit 7e7630bba4
30 changed files with 1665 additions and 46 deletions

7
package-lock.json generated
View file

@ -54,6 +54,7 @@
"matrix-js-sdk": "41.4.0", "matrix-js-sdk": "41.4.0",
"matrix-widget-api": "1.17.0", "matrix-widget-api": "1.17.0",
"millify": "6.1.0", "millify": "6.1.0",
"opus-recorder": "8.0.5",
"pdfjs-dist": "4.2.67", "pdfjs-dist": "4.2.67",
"prismjs": "1.30.0", "prismjs": "1.30.0",
"react": "18.2.0", "react": "18.2.0",
@ -13256,6 +13257,12 @@
"node": ">= 0.8.0" "node": ">= 0.8.0"
} }
}, },
"node_modules/opus-recorder": {
"version": "8.0.5",
"resolved": "https://registry.npmjs.org/opus-recorder/-/opus-recorder-8.0.5.tgz",
"integrity": "sha512-tBRXc9Btds7i3bVfA7d5rekAlyOcfsivt5vSIXHxRV1Oa+s6iXFW8omZ0Lm3ABWotVcEyKt96iIIUcgbV07YOw==",
"license": "MIT"
},
"node_modules/ora": { "node_modules/ora": {
"version": "5.4.1", "version": "5.4.1",
"resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz", "resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz",

View file

@ -96,6 +96,7 @@
"matrix-js-sdk": "41.4.0", "matrix-js-sdk": "41.4.0",
"matrix-widget-api": "1.17.0", "matrix-widget-api": "1.17.0",
"millify": "6.1.0", "millify": "6.1.0",
"opus-recorder": "8.0.5",
"pdfjs-dist": "4.2.67", "pdfjs-dist": "4.2.67",
"prismjs": "1.30.0", "prismjs": "1.30.0",
"react": "18.2.0", "react": "18.2.0",

View file

@ -295,6 +295,19 @@
}, },
"Search": { "Search": {
"search": "Search", "search": "Search",
"people": "People",
"by_address": "By address",
"address_hint": "To message someone new, type their address — @name:server",
"dm_rate_limited": "Too many requests. Please try again in a moment.",
"dm_failed": "Couldn't start the chat.",
"start_dm_title": "New chat",
"checking": "Checking…",
"user_found": "User found",
"found_on_server": "Found · {{server}}",
"user_not_found": "Not found on {{server}}",
"user_unreachable": "{{server}} isn't responding — can't verify",
"encrypt_label": "Encrypt messages",
"start_dm_action": "Message",
"no_match_found": "No Match Found", "no_match_found": "No Match Found",
"no_rooms": "No Rooms", "no_rooms": "No Rooms",
"no_match_for_query": "No match found for \"{{query}}\".", "no_match_for_query": "No match found for \"{{query}}\".",
@ -540,6 +553,18 @@
"send_message_alt_12": "The placeholder stares back...", "send_message_alt_12": "The placeholder stares back...",
"drop_files": "Drop Files in \"{{name}}\"", "drop_files": "Drop Files in \"{{name}}\"",
"drag_drop_desc": "Drag and drop files here or click for selection dialog", "drag_drop_desc": "Drag and drop files here or click for selection dialog",
"voice_record": "Record voice message",
"voice_close": "Close recorder",
"voice_delete": "Delete recording",
"voice_play": "Play",
"voice_pause": "Pause",
"voice_stop": "Stop recording",
"voice_send": "Send voice message",
"voice_dismiss_error": "Dismiss",
"voice_mic_error": "Couldn't access the microphone.",
"voice_send_error": "Couldn't send the voice message.",
"voice_disabled": "{{name}} disabled voice messages in this chat.",
"voice_disabled_generic": "Voice messages are disabled in this chat.",
"pinned_messages": "Pinned Messages", "pinned_messages": "Pinned Messages",
"no_pinned_messages": "No Pinned Messages", "no_pinned_messages": "No Pinned Messages",
"no_pinned_messages_desc": "Users with sufficient permissions can pin messages from the message context menu.", "no_pinned_messages_desc": "Users with sufficient permissions can pin messages from the message context menu.",
@ -753,6 +778,8 @@
"visibility_after_join": "After Join", "visibility_after_join": "After Join",
"visibility_all_messages": "All Messages", "visibility_all_messages": "All Messages",
"visibility_all_messages_guests": "All Messages (Guests)", "visibility_all_messages_guests": "All Messages (Guests)",
"voice_messages": "Voice messages",
"voice_messages_desc": "Allow voice messages in this chat. When off, others can't send them here.",
"room_encryption": "Room Encryption", "room_encryption": "Room Encryption",
"encryption_enabled_desc": "Messages in this room are protected by end-to-end encryption.", "encryption_enabled_desc": "Messages in this room are protected by end-to-end encryption.",
"encryption_disabled_desc": "Once enabled, encryption cannot be disabled!", "encryption_disabled_desc": "Once enabled, encryption cannot be disabled!",

View file

@ -295,6 +295,19 @@
}, },
"Search": { "Search": {
"search": "Поиск", "search": "Поиск",
"people": "Люди",
"by_address": "По адресу",
"address_hint": "Чтобы написать новому человеку, введите его адрес — @имя:сервер",
"dm_rate_limited": "Слишком часто. Попробуйте чуть позже.",
"dm_failed": "Не удалось создать чат.",
"start_dm_title": "Новый чат",
"checking": "Проверяем…",
"user_found": "Пользователь найден",
"found_on_server": "Найден · {{server}}",
"user_not_found": "Не найден на {{server}}",
"user_unreachable": "{{server}} не отвечает — не удалось проверить",
"encrypt_label": "Шифровать переписку",
"start_dm_action": "Написать",
"no_match_found": "Совпадений не найдено", "no_match_found": "Совпадений не найдено",
"no_rooms": "Нет комнат", "no_rooms": "Нет комнат",
"no_match_for_query": "Совпадений для «{{query}}» не найдено.", "no_match_for_query": "Совпадений для «{{query}}» не найдено.",
@ -550,6 +563,18 @@
"send_message_alt_12": "Плейсхолдер смотрит на вас...", "send_message_alt_12": "Плейсхолдер смотрит на вас...",
"drop_files": "Перетащите файлы в \"{{name}}\"", "drop_files": "Перетащите файлы в \"{{name}}\"",
"drag_drop_desc": "Перетащите файлы сюда или нажмите для выбора", "drag_drop_desc": "Перетащите файлы сюда или нажмите для выбора",
"voice_record": "Записать голосовое сообщение",
"voice_close": "Закрыть запись",
"voice_delete": "Удалить запись",
"voice_play": "Воспроизвести",
"voice_pause": "Пауза",
"voice_stop": "Остановить запись",
"voice_send": "Отправить голосовое сообщение",
"voice_dismiss_error": "Скрыть",
"voice_mic_error": "Не удалось получить доступ к микрофону.",
"voice_send_error": "Не удалось отправить голосовое сообщение.",
"voice_disabled": "{{name}} отключил голосовые сообщения в этом чате.",
"voice_disabled_generic": "Голосовые сообщения отключены в этом чате.",
"pinned_messages": "Закреплённые сообщения", "pinned_messages": "Закреплённые сообщения",
"no_pinned_messages": "Нет закреплённых сообщений", "no_pinned_messages": "Нет закреплённых сообщений",
"no_pinned_messages_desc": "Пользователи с достаточным уровнем прав могут закреплять сообщения через контекстное меню.", "no_pinned_messages_desc": "Пользователи с достаточным уровнем прав могут закреплять сообщения через контекстное меню.",
@ -771,6 +796,8 @@
"visibility_after_join": "После вступления", "visibility_after_join": "После вступления",
"visibility_all_messages": "Все сообщения", "visibility_all_messages": "Все сообщения",
"visibility_all_messages_guests": "Все сообщения (гости)", "visibility_all_messages_guests": "Все сообщения (гости)",
"voice_messages": "Голосовые сообщения",
"voice_messages_desc": "Разрешить голосовые сообщения в этом чате. Если выключено, другие не смогут их отправлять.",
"room_encryption": "Шифрование комнаты", "room_encryption": "Шифрование комнаты",
"encryption_enabled_desc": "Сообщения в этой комнате защищены сквозным шифрованием.", "encryption_enabled_desc": "Сообщения в этой комнате защищены сквозным шифрованием.",
"encryption_disabled_desc": "После включения шифрование невозможно отключить!", "encryption_disabled_desc": "После включения шифрование невозможно отключить!",

View file

@ -17,6 +17,7 @@ import {
MNotice, MNotice,
MText, MText,
MVideo, MVideo,
MVoice,
ReadPdfFile, ReadPdfFile,
ReadTextFile, ReadTextFile,
RenderBody, RenderBody,
@ -28,6 +29,7 @@ import {
ThumbnailContent, ThumbnailContent,
UnsupportedContent, UnsupportedContent,
VideoContent, VideoContent,
VoiceContent,
} from './message'; } from './message';
import { UrlPreviewCard, UrlPreviewHolder } from './url-preview'; import { UrlPreviewCard, UrlPreviewHolder } from './url-preview';
import { Image, MediaControl, Video } from './media'; import { Image, MediaControl, Video } from './media';
@ -35,7 +37,7 @@ import { ImageViewer } from './image-viewer';
import { PdfViewer } from './Pdf-viewer'; import { PdfViewer } from './Pdf-viewer';
import { TextViewer } from './text-viewer'; import { TextViewer } from './text-viewer';
import { testMatrixTo } from '../plugins/matrix-to'; import { testMatrixTo } from '../plugins/matrix-to';
import { IImageContent } from '../../types/matrix/common'; import { IAudioContent, IImageContent, isVoiceMessageContent } from '../../types/matrix/common';
import { logMedia } from './message/attachment/streamMediaDebug'; import { logMedia } from './message/attachment/streamMediaDebug';
// Threads the StreamLayout's mediaMode info from Message.tsx down to the // Threads the StreamLayout's mediaMode info from Message.tsx down to the
@ -54,6 +56,13 @@ export const useStreamMediaContext = (): StreamMediaContextValue | null =>
type RenderMessageContentProps = { type RenderMessageContentProps = {
displayName: string; displayName: string;
// Voice bubble: the sender's id + resolved avatar URL so VoiceContent can draw
// the avatar. Non-timeline callers (pin-menu, search) may omit them.
senderId?: string;
senderAvatarUrl?: string;
// True when the surrounding layout ALREADY draws a per-message avatar (channel
// layout / thread drawer) — VoiceContent then skips its own to avoid doubling.
hideVoiceAvatar?: boolean;
msgType: string; msgType: string;
ts: number; ts: number;
edited?: boolean; edited?: boolean;
@ -73,6 +82,9 @@ type RenderMessageContentProps = {
}; };
export function RenderMessageContent({ export function RenderMessageContent({
displayName, displayName,
senderId,
senderAvatarUrl,
hideVoiceAvatar,
msgType, msgType,
ts, ts,
edited, edited,
@ -300,6 +312,26 @@ export function RenderMessageContent({
} }
if (msgType === MsgType.Audio) { if (msgType === MsgType.Audio) {
// Voice notes (MSC3245) — both Vojo-native and Telegram-bridged — render as
// the Dawn voice bubble; plain audio files keep the generic player.
const audioContent = getContent<IAudioContent & Record<string, unknown>>();
if (isVoiceMessageContent(audioContent)) {
return (
<MVoice
content={getContent()}
renderAsFile={renderFile}
renderVoiceContent={(props) => (
<VoiceContent
{...props}
senderId={senderId}
senderAvatarUrl={senderAvatarUrl}
senderName={displayName}
hideAvatar={hideVoiceAvatar}
/>
)}
/>
);
}
return ( return (
<> <>
<MAudio <MAudio

View file

@ -72,6 +72,11 @@ type CustomEditorProps = {
bottom?: ReactNode; bottom?: ReactNode;
before?: ReactNode; before?: ReactNode;
after?: ReactNode; after?: ReactNode;
// When set, renders in place of the text-input row (the Editable) while
// keeping the composer card, the Slate context and the top/bottom slots
// mounted. Used by the voice recorder so the input morphs inline instead of
// the whole composer being swapped out. See docs/plans/voice_messages.md.
replaceEditable?: ReactNode;
maxHeight?: string; maxHeight?: string;
editor: Editor; editor: Editor;
placeholder?: string; placeholder?: string;
@ -88,6 +93,7 @@ export const CustomEditor = forwardRef<HTMLDivElement, CustomEditorProps>(
bottom, bottom,
before, before,
after, after,
replaceEditable,
maxHeight = '50vh', maxHeight = '50vh',
editor, editor,
placeholder, placeholder,
@ -136,6 +142,7 @@ export const CustomEditor = forwardRef<HTMLDivElement, CustomEditorProps>(
<div className={css.Editor} ref={ref}> <div className={css.Editor} ref={ref}>
<Slate editor={editor} initialValue={initialValue} onChange={onChange}> <Slate editor={editor} initialValue={initialValue} onChange={onChange}>
{top} {top}
{replaceEditable ?? (
<Box alignItems="Start"> <Box alignItems="Start">
{before && ( {before && (
<Box className={css.EditorOptions} alignItems="Center" gap="100" shrink="No"> <Box className={css.EditorOptions} alignItems="Center" gap="100" shrink="No">
@ -168,6 +175,7 @@ export const CustomEditor = forwardRef<HTMLDivElement, CustomEditorProps>(
</Box> </Box>
)} )}
</Box> </Box>
)}
{bottom} {bottom}
</Slate> </Slate>
</div> </div>

View file

@ -22,6 +22,7 @@ import {
IThumbnailContent, IThumbnailContent,
IVideoContent, IVideoContent,
IVideoInfo, IVideoInfo,
MATRIX_AUDIO_PROPERTY_NAME,
MATRIX_SPOILER_PROPERTY_NAME, MATRIX_SPOILER_PROPERTY_NAME,
MATRIX_SPOILER_REASON_PROPERTY_NAME, MATRIX_SPOILER_REASON_PROPERTY_NAME,
} from '../../../types/matrix/common'; } from '../../../types/matrix/common';
@ -339,6 +340,45 @@ export function MAudio({ content, renderAsFile, renderAudioContent, outlined }:
); );
} }
type RenderVoiceContentProps = {
info: IAudioInfo;
mimeType: string;
url: string;
encInfo?: IEncryptedFile;
waveform?: number[];
};
type MVoiceProps = {
content: IAudioContent;
renderAsFile: () => ReactNode;
renderVoiceContent: (props: RenderVoiceContentProps) => ReactNode;
};
// Voice notes (MSC3245). Unlike `MAudio`, this drops the `FileHeader` so the
// voice bubble is a clean full-width rectangle (no "Voice message.ogg" row) and
// owns its own background/padding via `VoiceContent`. Falls back to a plain file
// card when the audio is unplayable. See docs/plans/voice_messages.md §1 (D4).
export function MVoice({ content, renderAsFile, renderVoiceContent }: MVoiceProps) {
const audioInfo = content?.info;
const mxcUrl = content.file?.url ?? content.url;
const safeMimeType = getBlobSafeMimeType(audioInfo?.mimetype ?? '');
if (!audioInfo || !safeMimeType.startsWith('audio') || typeof mxcUrl !== 'string') {
if (mxcUrl) {
return renderAsFile();
}
return <BrokenContent />;
}
const waveform = content[MATRIX_AUDIO_PROPERTY_NAME]?.waveform;
return renderVoiceContent({
info: audioInfo,
mimeType: safeMimeType,
url: mxcUrl,
encInfo: content.file,
waveform,
});
}
type RenderFileContentProps = { type RenderFileContentProps = {
body: string; body: string;
info: IFileInfo & IThumbnailContent; info: IFileInfo & IThumbnailContent;

View file

@ -0,0 +1,154 @@
import { keyframes, style } from '@vanilla-extract/css';
import { color, config, toRem } from 'folds';
// Dawn voice-note bubble — identical on native and web. The sender avatar sits
// OUTSIDE the bubble (bare, no background). Both own and other have a filled
// bubble with visible edges; the difference is the FILL TONE: others get a
// light subtle fill (SurfaceVariant.ContainerHover #21232b), own gets the
// darker fill of the composer / input form (Surface.Container #0d0e11 — the
// `${ChatComposer} .${Editor}` override in RoomView.css.ts, NOT the lighter
// SurfaceVariant). Same 1px edge on both. See docs/plans/voice_messages.md §5.
const fadeIn = keyframes({
from: { opacity: 0, transform: 'translateY(3px)' },
to: { opacity: 1, transform: 'translateY(0)' },
});
export const Row = style({
display: 'flex',
alignItems: 'center',
gap: config.space.S300,
width: '100%',
boxSizing: 'border-box',
animation: `${fadeIn} 180ms ease`,
});
// Bare avatar — fixed 40px, no background box of its own (the avatar image /
// fallback fills it). Strip any container fill folds might apply.
export const AvatarSlot = style({
flexShrink: 0,
width: toRem(40),
height: toRem(40),
backgroundColor: 'transparent',
});
export const Bubble = style({
display: 'flex',
alignItems: 'center',
gap: config.space.S300,
flexGrow: 1,
minWidth: 0,
maxWidth: toRem(400),
// Fixed height so own and other are pixel-identical regardless of fill.
height: toRem(56),
boxSizing: 'border-box',
padding: `0 ${config.space.S400}`,
color: color.SurfaceVariant.OnContainer,
borderRadius: config.radii.R400,
// Others (default): a light subtle fill with a visible 1px edge — the
// "Владислав" look. Own overrides the fill to the darker composer tone below.
backgroundColor: color.SurfaceVariant.ContainerHover,
border: `${config.borderWidth.B300} solid ${color.SurfaceVariant.ContainerLine}`,
});
// Own messages — the exact composer / input-form fill (Surface.Container,
// #0d0e11), darker than the peer fill; same edge.
export const BubbleOwn = style({
backgroundColor: color.Surface.Container,
});
const playPulse = keyframes({
'0%': { boxShadow: `0 0 0 0 ${color.Primary.Main}` },
'70%': { boxShadow: `0 0 0 ${toRem(6)} rgba(0,0,0,0)` },
'100%': { boxShadow: `0 0 0 0 rgba(0,0,0,0)` },
});
export const PlayButton = style({
flexShrink: 0,
width: toRem(40),
height: toRem(40),
display: 'flex',
alignItems: 'center',
justifyContent: 'center',
borderRadius: config.radii.Pill,
border: 'none',
padding: 0,
cursor: 'pointer',
backgroundColor: color.Primary.Main,
color: color.Primary.OnMain,
transition: 'transform 120ms ease, background-color 120ms ease',
selectors: {
'&:hover': { backgroundColor: color.Primary.MainHover, transform: 'scale(1.06)' },
'&:active': { transform: 'scale(0.92)' },
'&:disabled': { cursor: 'default', opacity: 0.6 },
},
});
export const PlayButtonPlaying = style({
animation: `${playPulse} 1.6s ease-out infinite`,
});
export const Waveform = style({
position: 'relative',
flexGrow: 1,
minWidth: 0,
display: 'flex',
alignItems: 'center',
gap: toRem(2),
height: toRem(32),
cursor: 'pointer',
// Guarantee the react-range thumb can never paint past the track edge toward
// the time readout (it's a 1px transparent overhang at most, but contain it).
overflow: 'hidden',
});
const barGrow = keyframes({
from: { transform: 'scaleY(0.25)' },
to: { transform: 'scaleY(1)' },
});
export const WaveBar = style({
flex: '1 1 0',
minWidth: toRem(2),
minHeight: toRem(3),
borderRadius: toRem(2),
transformOrigin: 'center',
animation: `${barGrow} 220ms ease`,
transition: 'background-color 120ms linear, transform 120ms ease',
});
export const WaveBarPlayed = style({ backgroundColor: color.Primary.Main });
export const WaveBarRest = style({ backgroundColor: color.SurfaceVariant.ContainerLine });
export const WaveThumb = style({
width: toRem(2),
height: '100%',
backgroundColor: 'transparent',
outline: 'none',
});
export const ProgressTrack = style({
position: 'relative',
flexGrow: 1,
height: toRem(4),
borderRadius: config.radii.Pill,
backgroundColor: color.SurfaceVariant.ContainerLine,
cursor: 'pointer',
});
export const ProgressFill = style({
position: 'absolute',
left: 0,
top: 0,
height: '100%',
borderRadius: config.radii.Pill,
backgroundColor: color.Primary.Main,
transition: 'width 120ms linear',
});
export const Time = style({
flexShrink: 0,
minWidth: toRem(40),
textAlign: 'right',
fontVariantNumeric: 'tabular-nums',
});

View file

@ -0,0 +1,202 @@
/* eslint-disable jsx-a11y/media-has-caption */
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { Avatar, Icon, Icons, Spinner, Text } from 'folds';
import { EncryptedAttachmentInfo } from 'browser-encrypt-attachment';
import { Range } from 'react-range';
import { useTranslation } from 'react-i18next';
import { useMatrixClient } from '../../../hooks/useMatrixClient';
import { AsyncStatus, useAsyncCallback } from '../../../hooks/useAsyncCallback';
import { IAudioInfo } from '../../../../types/matrix/common';
import {
PlayTimeCallback,
useMediaLoading,
useMediaPlay,
useMediaPlayTimeCallback,
useMediaSeek,
} from '../../../hooks/media';
import { useThrottle } from '../../../hooks/useThrottle';
import { secondsToMinutesAndSeconds } from '../../../utils/common';
import {
decryptFile,
downloadEncryptedMedia,
downloadMedia,
mxcUrlToHttp,
} from '../../../utils/matrix';
import { useMediaAuthentication } from '../../../hooks/useMediaAuthentication';
import { normalizeWaveform } from '../../../utils/audioWaveform';
import { UserAvatar } from '../../user-avatar';
import * as css from './VoiceContent.css';
const PLAY_TIME_THROTTLE_OPS = {
wait: 200,
immediate: true,
};
const TARGET_BARS = 40;
export type VoiceContentProps = {
mimeType: string;
url: string;
info: IAudioInfo;
encInfo?: EncryptedAttachmentInfo;
waveform?: number[];
// The message sender — drives the always-present avatar on the left and the
// own-vs-other frame. Non-timeline callers (pin-menu, search) may omit them,
// in which case the avatar falls back to a placeholder.
senderId?: string;
senderAvatarUrl?: string;
senderName?: string;
// Set when the surrounding layout already draws a per-message avatar (channel
// layout / thread drawer) — skip our own to avoid two avatars on the row.
hideAvatar?: boolean;
};
export function VoiceContent({
mimeType,
url,
info,
encInfo,
waveform,
senderId,
senderAvatarUrl,
senderName,
hideAvatar,
}: VoiceContentProps) {
const { t } = useTranslation();
const mx = useMatrixClient();
const useAuthentication = useMediaAuthentication();
const [srcState, loadSrc] = useAsyncCallback(
useCallback(async () => {
const mediaUrl = mxcUrlToHttp(mx, url, useAuthentication);
if (!mediaUrl) throw new Error('Invalid media URL');
const fileContent = encInfo
? await downloadEncryptedMedia(mediaUrl, (encBuf) => decryptFile(encBuf, mimeType, encInfo))
: await downloadMedia(mediaUrl);
return URL.createObjectURL(fileContent);
}, [mx, url, useAuthentication, mimeType, encInfo])
);
// Revoke the downloaded object URL when it changes / on unmount.
useEffect(() => {
const objectUrl = srcState.status === AsyncStatus.Success ? srcState.data : undefined;
return () => {
if (objectUrl) URL.revokeObjectURL(objectUrl);
};
}, [srcState]);
const audioRef = useRef<HTMLAudioElement | null>(null);
const [currentTime, setCurrentTime] = useState(0);
// duration in seconds. (NOTE: info.duration is in milliseconds)
const infoDuration = info.duration ?? 0;
const [duration, setDuration] = useState(
(Number.isFinite(infoDuration) && infoDuration >= 0 ? infoDuration : 0) / 1000
);
const getAudioRef = useCallback(() => audioRef.current, []);
const { loading } = useMediaLoading(getAudioRef);
const { playing, setPlaying } = useMediaPlay(getAudioRef);
const { seek } = useMediaSeek(getAudioRef);
const handlePlayTimeCallback: PlayTimeCallback = useCallback((d, ct) => {
// Keep the info.duration seed when the element reports a non-finite duration
// (ogg/opus frequently reports Infinity until fully buffered) — overwriting
// it with 0 would flatten the waveform and show 0:00.
if (Number.isFinite(d) && d > 0) setDuration(d);
setCurrentTime(ct);
}, []);
useMediaPlayTimeCallback(
getAudioRef,
useThrottle(handlePlayTimeCallback, PLAY_TIME_THROTTLE_OPS)
);
const isOwn = !!senderId && senderId === mx.getUserId();
const bars = useMemo(() => normalizeWaveform(waveform, TARGET_BARS), [waveform]);
const max = duration || 1;
const progress = duration > 0 ? Math.min(1, currentTime / duration) : 0;
const clampedValue = Math.min(Math.max(currentTime, 0), max);
const handlePlay = () => {
if (srcState.status === AsyncStatus.Success) {
setPlaying(!playing);
} else if (srcState.status !== AsyncStatus.Loading) {
loadSrc();
}
};
const isLoading = srcState.status === AsyncStatus.Loading || loading;
// Show elapsed once the user is into playback, otherwise the total length.
const displayTime = playing || currentTime > 0 ? currentTime : duration;
return (
<div className={css.Row}>
{!hideAvatar && (
<Avatar className={css.AvatarSlot} size="300">
<UserAvatar
userId={senderId ?? ''}
src={senderAvatarUrl}
alt={senderName ?? senderId ?? ''}
renderFallback={() => <Icon size="200" src={Icons.User} filled />}
/>
</Avatar>
)}
<div className={`${css.Bubble} ${isOwn ? css.BubbleOwn : ''}`}>
<button
type="button"
className={`${css.PlayButton} ${playing ? css.PlayButtonPlaying : ''}`}
onClick={handlePlay}
disabled={srcState.status === AsyncStatus.Loading}
aria-label={playing ? t('Room.voice_pause') : t('Room.voice_play')}
>
{isLoading ? (
<Spinner variant="Secondary" size="200" />
) : (
<Icon src={playing ? Icons.Pause : Icons.Play} size="300" filled />
)}
</button>
<Range
step={0.05}
min={0}
max={max}
values={[clampedValue]}
onChange={(values) => seek(values[0])}
renderTrack={({ props, children }) =>
bars.length > 0 ? (
<div {...props} className={css.Waveform}>
{bars.map((amp, index) => {
const played = duration > 0 && (index + 0.5) / bars.length <= progress;
return (
<span
// eslint-disable-next-line react/no-array-index-key
key={index}
className={`${css.WaveBar} ${played ? css.WaveBarPlayed : css.WaveBarRest}`}
style={{ height: `${Math.max(10, Math.round(amp * 100))}%` }}
/>
);
})}
{children}
</div>
) : (
<div {...props} className={css.ProgressTrack}>
<div className={css.ProgressFill} style={{ width: `${progress * 100}%` }} />
{children}
</div>
)
}
renderThumb={({ props }) => <div {...props} className={css.WaveThumb} />}
/>
<Text className={css.Time} size="T200">
{secondsToMinutesAndSeconds(displayTime)}
</Text>
<audio controls={false} autoPlay ref={audioRef} style={{ display: 'none' }}>
{srcState.status === AsyncStatus.Success && (
<source src={srcState.data} type={mimeType} />
)}
</audio>
</div>
</div>
);
}

View file

@ -2,6 +2,7 @@ export * from './ThumbnailContent';
export * from './ImageContent'; export * from './ImageContent';
export * from './VideoContent'; export * from './VideoContent';
export * from './AudioContent'; export * from './AudioContent';
export * from './VoiceContent';
export * from './FileContent'; export * from './FileContent';
export * from './FallbackContent'; export * from './FallbackContent';
export * from './EventContent'; export * from './EventContent';

View file

@ -0,0 +1,69 @@
import React, { useCallback } from 'react';
import { Box, color, Spinner, Switch, Text } from 'folds';
import { useTranslation } from 'react-i18next';
import { MatrixError } from 'matrix-js-sdk';
import type { StateEvents } from 'matrix-js-sdk';
import { SettingTile } from '../../../components/setting-tile';
import { useRoom } from '../../../hooks/useRoom';
import { AsyncStatus, useAsyncCallback } from '../../../hooks/useAsyncCallback';
import { StateEvent } from '../../../../types/matrix/room';
import { useMatrixClient } from '../../../hooks/useMatrixClient';
import { useStateEvent } from '../../../hooks/useStateEvent';
import { RoomPermissionsAPI } from '../../../hooks/useRoomPermissions';
type RoomVoiceMessagesProps = {
permissions: RoomPermissionsAPI;
};
// Per-room "allow voice messages" toggle. Writes the `in.vojo.room.voice_messages`
// state event. In a 1:1 both parties are PL 100 (TrustedPrivateChat) so either
// can flip it; in a group it requires `state_default` (admin). Soft, client-side,
// Vojo↔Vojo. See docs/plans/voice_messages.md §4.
export function RoomVoiceMessages({ permissions }: RoomVoiceMessagesProps) {
const { t } = useTranslation();
const mx = useMatrixClient();
const room = useRoom();
const canEdit = permissions.stateEvent(StateEvent.VoiceMessages, mx.getSafeUserId());
const content = useStateEvent(room, StateEvent.VoiceMessages)?.getContent<{
enabled?: boolean;
disabled_by?: string;
}>();
// Default-on: absent OR enabled !== false ⇒ voice allowed.
const allowed = content?.enabled !== false;
const [setState, setAllowed] = useAsyncCallback(
useCallback(
async (nextAllowed: boolean) => {
// Only stamp disabled_by when actually disabling — on re-enable it would
// misleadingly name the re-enabler. The gate reads it only when
// enabled === false anyway. See review notes in docs/plans/voice_messages.md.
await mx.sendStateEvent(room.roomId, StateEvent.VoiceMessages as keyof StateEvents, {
enabled: nextAllowed,
...(nextAllowed ? {} : { disabled_by: mx.getSafeUserId() }),
});
},
[mx, room.roomId]
)
);
const loading = setState.status === AsyncStatus.Loading;
return (
<SettingTile
title={t('RoomSettings.voice_messages')}
description={t('RoomSettings.voice_messages_desc')}
after={
<Box gap="200" alignItems="Center">
{loading && <Spinner variant="Secondary" />}
<Switch value={allowed} onChange={setAllowed} disabled={!canEdit || loading} />
</Box>
}
>
{setState.status === AsyncStatus.Error && (
<Text style={{ color: color.Critical.Main }} size="T200">
{(setState.error as MatrixError).message}
</Text>
)}
</SettingTile>
);
}

View file

@ -5,3 +5,4 @@ export * from './RoomJoinRules';
export * from './RoomProfile'; export * from './RoomProfile';
export * from './RoomPublish'; export * from './RoomPublish';
export * from './RoomUpgrade'; export * from './RoomUpgrade';
export * from './RoomVoiceMessages';

View file

@ -62,6 +62,9 @@ export function CreateChat({ defaultUserId, onCreated, gap = '500' }: CreateChat
visibility: Visibility.Private, visibility: Visibility.Private,
preset: Preset.TrustedPrivateChat, preset: Preset.TrustedPrivateChat,
initial_state: initialState, initial_state: initialState,
// TrustedPrivateChat already gives both parties PL 100, so either can
// toggle the voice-messages state event; no events override needed
// (it would replace Synapse's default events map). See voice_messages.md §4.
}); });
addRoomIdToMDirect(mx, result.room_id, userId); addRoomIdToMDirect(mx, result.room_id, userId);

View file

@ -34,6 +34,10 @@ export function useCreateDirect(): (userId: string, opts?: CreateDirectOptions)
visibility: Visibility.Private, visibility: Visibility.Private,
preset: Preset.TrustedPrivateChat, preset: Preset.TrustedPrivateChat,
initial_state: initialState, initial_state: initialState,
// No power_level override: TrustedPrivateChat already gives both parties
// PL 100, so either can write the voice-messages state event. (A custom
// `events` override would shallow-REPLACE Synapse's default events map
// and weaken the room's state-event protections.) See voice_messages.md §4.
}); });
addRoomIdToMDirect(mx, result.room_id, userId); addRoomIdToMDirect(mx, result.room_id, userId);

View file

@ -15,6 +15,7 @@ import {
RoomPublishedAddresses, RoomPublishedAddresses,
RoomPublish, RoomPublish,
RoomUpgrade, RoomUpgrade,
RoomVoiceMessages,
} from '../../common-settings/general'; } from '../../common-settings/general';
import { useRoomCreators } from '../../../hooks/useRoomCreators'; import { useRoomCreators } from '../../../hooks/useRoomCreators';
import { useRoomPermissions } from '../../../hooks/useRoomPermissions'; import { useRoomPermissions } from '../../../hooks/useRoomPermissions';
@ -54,6 +55,7 @@ export function General({ requestClose }: GeneralProps) {
<RoomJoinRules permissions={permissions} /> <RoomJoinRules permissions={permissions} />
<RoomHistoryVisibility permissions={permissions} /> <RoomHistoryVisibility permissions={permissions} />
<RoomEncryption permissions={permissions} /> <RoomEncryption permissions={permissions} />
<RoomVoiceMessages permissions={permissions} />
<RoomPublish permissions={permissions} /> <RoomPublish permissions={permissions} />
</SettingsSection> </SettingsSection>
<Box direction="Column" gap="200"> <Box direction="Column" gap="200">

View file

@ -28,6 +28,7 @@ import {
PopOut, PopOut,
Scroll, Scroll,
Text, Text,
color,
config, config,
toRem, toRem,
} from 'folds'; } from 'folds';
@ -102,7 +103,12 @@ import {
getFileMsgContent, getFileMsgContent,
getImageMsgContent, getImageMsgContent,
getVideoMsgContent, getVideoMsgContent,
getVoiceMsgContent,
} from './msgContent'; } from './msgContent';
import { VoiceRecording, VoiceRecordingResult } from '../../utils/voiceRecording';
import { VoiceRecorder } from './VoiceRecorderForm';
import { useStateEvent } from '../../hooks/useStateEvent';
import { StateEvent } from '../../../types/matrix/room';
import { getMemberDisplayName, getMentionContent, trimReplyFromBody } from '../../utils/room'; import { getMemberDisplayName, getMentionContent, trimReplyFromBody } from '../../utils/room';
import { CommandAutocomplete } from './CommandAutocomplete'; import { CommandAutocomplete } from './CommandAutocomplete';
import { Command, SHRUG, TABLEFLIP, UNFLIP, useCommands } from '../../hooks/useCommands'; import { Command, SHRUG, TABLEFLIP, UNFLIP, useCommands } from '../../hooks/useCommands';
@ -188,6 +194,18 @@ const StreamComposerIcons = {
/> />
</> </>
), ),
Mic: () => (
<>
<rect x="9" y="2" width="6" height="11" rx="3" stroke="currentColor" strokeWidth="1.6" />
<path
d="M5 11a7 7 0 0 0 14 0M12 18v3"
stroke="currentColor"
strokeWidth="1.6"
strokeLinecap="round"
strokeLinejoin="round"
/>
</>
),
}; };
interface RoomInputProps { interface RoomInputProps {
@ -221,6 +239,24 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
const [hideActivity] = useSetting(settingsAtom, 'hideActivity'); const [hideActivity] = useSetting(settingsAtom, 'hideActivity');
const isOneOnOne = useIsOneOnOne(); const isOneOnOne = useIsOneOnOne();
const commands = useCommands(mx, room); const commands = useCommands(mx, room);
// Voice messages — per-room "allowed" preference (default-on) + recorder
// state. `voiceDisabledBy` is set (mxid, possibly empty) when the room has
// `enabled: false`; undefined ⇒ allowed. See docs/plans/voice_messages.md.
const voiceMsgEvent = useStateEvent(room, StateEvent.VoiceMessages);
const voiceMsgContent = voiceMsgEvent?.getContent<{
enabled?: boolean;
disabled_by?: string;
}>();
const voiceDisabledBy =
voiceMsgContent?.enabled === false ? voiceMsgContent.disabled_by ?? '' : undefined;
const voiceSupported = useMemo(() => VoiceRecording.isSupported(), []);
const [voiceMode, setVoiceMode] = useState(false);
const [voiceError, setVoiceError] = useState<string | null>(null);
// Drop the stale "voice disabled" banner once the room re-enables voice.
useEffect(() => {
if (voiceDisabledBy === undefined) setVoiceError(null);
}, [voiceDisabledBy]);
const emojiBtnRef = useRef<HTMLButtonElement>(null); const emojiBtnRef = useRef<HTMLButtonElement>(null);
const screenSize = useScreenSizeContext(); const screenSize = useScreenSizeContext();
// On native / narrow screens the emoji-sticker board is docked inline at the // On native / narrow screens the emoji-sticker board is docked inline at the
@ -440,6 +476,84 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
); );
}; };
const voiceBlockedName = useCallback(
() =>
(voiceDisabledBy &&
(getMemberDisplayName(room, voiceDisabledBy) ?? getMxIdLocalPart(voiceDisabledBy))) ||
undefined,
[voiceDisabledBy, room]
);
// Open the voice recorder form (it replaces the text input). Blocked when
// the room has voice disabled — surface the inline error instead. Idempotent
// so pressing the mic (pointerdown to start recording) and the following
// click don't double-open. Starting the recorder here ties getUserMedia /
// AudioContext.resume to the user gesture.
const handleOpenVoice = useCallback(() => {
if (textOnly || voiceMode) return;
if (voiceDisabledBy !== undefined) {
const name = voiceBlockedName();
setVoiceError(name ? t('Room.voice_disabled', { name }) : t('Room.voice_disabled_generic'));
return;
}
setVoiceError(null);
setVoiceMode(true);
}, [textOnly, voiceMode, voiceDisabledBy, voiceBlockedName, t]);
const handleCloseVoice = useCallback(() => {
setVoiceMode(false);
}, []);
// Encrypt (when E2EE) + upload + send a finished recording. Called by the
// recorder's Send action. The recorder unmounts (voiceMode → false) which
// stops the preview + releases the mic. See docs/plans/voice_messages.md.
const handleVoiceSend = useCallback(
async (result: VoiceRecordingResult) => {
setVoiceMode(false);
// Too short — an accidental tap; drop silently.
if (result.durationMs < 500) return;
// A real recording that produced no audio (encode failed / timed out) —
// surface it instead of silently losing the message.
if (result.blob.size === 0) {
setVoiceError(t('Room.voice_send_error'));
return;
}
// Re-check the gate at send time — state may have changed mid-recording.
if (voiceDisabledBy !== undefined) {
const name = voiceBlockedName();
setVoiceError(
name ? t('Room.voice_disabled', { name }) : t('Room.voice_disabled_generic')
);
return;
}
try {
const file = safeFile(
new File([result.blob], 'Voice message.ogg', { type: 'audio/ogg' })
);
let item: TUploadItem;
if (room.hasEncryptionStateEvent()) {
const enc = await encryptFile(file);
item = { ...enc, metadata: { markedAsSpoiler: false } };
} else {
item = {
file,
originalFile: file,
encInfo: undefined,
metadata: { markedAsSpoiler: false },
};
}
const data = await mx.uploadContent(item.file);
const mxc = data?.content_uri;
if (!mxc) throw new Error('Failed to upload voice message');
const content = getVoiceMsgContent(item, mxc, result.durationMs, result.waveform);
await mx.sendMessage(roomId, threadId ?? null, content as RoomMessageEventContent);
} catch {
setVoiceError(t('Room.voice_send_error'));
}
},
[voiceDisabledBy, voiceBlockedName, room, mx, roomId, threadId, t]
);
const submit = useCallback(() => { const submit = useCallback(() => {
uploadBoardHandlers.current?.handleSend(); uploadBoardHandlers.current?.handleSend();
@ -620,6 +734,22 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
</IconButton> </IconButton>
); );
// Voice-record trigger — sits left of the emoji button. A tap opens the
// recorder (which morphs the input into the audio form). Hidden when the
// engine can't record or voice is disabled for the room.
const micButton = (
<IconButton
onClick={handleOpenVoice}
variant="SurfaceVariant"
fill="None"
size="300"
radii="300"
aria-label={t('Room.voice_record')}
>
<Icon src={StreamComposerIcons.Mic} />
</IconButton>
);
// The native dock renders the board in the composer's top slot, so its // The native dock renders the board in the composer's top slot, so its
// open/close state lives here (only read on native). Desktop keeps its // open/close state lives here (only read on native). Desktop keeps its
// state isolated inside the UseStateProvider below so opening the pop-out // state isolated inside the UseStateProvider below so opening the pop-out
@ -833,9 +963,34 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
onKeyDown={handleKeyDown} onKeyDown={handleKeyDown}
onKeyUp={handleKeyUp} onKeyUp={handleKeyUp}
onPaste={handlePaste} onPaste={handlePaste}
replaceEditable={
voiceMode ? (
<VoiceRecorder onSend={handleVoiceSend} onClose={handleCloseVoice} />
) : undefined
}
top={ top={
<> <>
{dockedEmojiBoard} {dockedEmojiBoard}
{voiceError && (
<Box
alignItems="Center"
gap="200"
style={{ padding: `${config.space.S200} ${config.space.S300} 0` }}
>
<IconButton
onClick={() => setVoiceError(null)}
variant="SurfaceVariant"
size="300"
radii="300"
aria-label={t('Room.voice_dismiss_error')}
>
<Icon src={Icons.Cross} size="50" />
</IconButton>
<Text size="T300" style={{ color: color.Critical.Main }}>
{voiceError}
</Text>
</Box>
)}
{replyDraft && ( {replyDraft && (
<div> <div>
<Box <Box
@ -876,6 +1031,7 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
</> </>
} }
bottom={ bottom={
voiceMode ? null : (
<Box <Box
alignItems="Center" alignItems="Center"
gap="200" gap="200"
@ -883,9 +1039,11 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
> >
{!textOnly && plusButton} {!textOnly && plusButton}
<Box grow="Yes" /> <Box grow="Yes" />
{!textOnly && voiceSupported && voiceDisabledBy === undefined && micButton}
{!textOnly && emojiButton} {!textOnly && emojiButton}
{sendButton} {sendButton}
</Box> </Box>
)
} }
/> />
</div> </div>

View file

@ -32,7 +32,7 @@ import { isKeyHotkey } from 'is-hotkey';
import { Opts as LinkifyOpts } from 'linkifyjs'; import { Opts as LinkifyOpts } from 'linkifyjs';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { useParams } from 'react-router-dom'; import { useParams } from 'react-router-dom';
import { getMxIdLocalPart } from '../../utils/matrix'; import { getMxIdLocalPart, mxcUrlToHttp } from '../../utils/matrix';
import { useMatrixClient } from '../../hooks/useMatrixClient'; import { useMatrixClient } from '../../hooks/useMatrixClient';
import { useVirtualPaginator, ItemRange } from '../../hooks/useVirtualPaginator'; import { useVirtualPaginator, ItemRange } from '../../hooks/useVirtualPaginator';
import { useAlive } from '../../hooks/useAlive'; import { useAlive } from '../../hooks/useAlive';
@ -66,6 +66,7 @@ import {
getEditedEvent, getEditedEvent,
getEventReactions, getEventReactions,
getLatestEditableEvt, getLatestEditableEvt,
getMemberAvatarMxc,
getMemberDisplayName, getMemberDisplayName,
isBridgedRoom, isBridgedRoom,
isMembershipChanged, isMembershipChanged,
@ -1537,6 +1538,10 @@ export function RoomTimeline({
const senderId = mEvent.getSender() ?? ''; const senderId = mEvent.getSender() ?? '';
const senderDisplayName = const senderDisplayName =
getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId; getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId;
const senderAvatarMxc = getMemberAvatarMxc(room, senderId);
const senderAvatarUrl = senderAvatarMxc
? mxcUrlToHttp(mx, senderAvatarMxc, useAuthentication, 96, 96, 'crop') ?? undefined
: undefined;
return ( return (
<Message <Message
@ -1607,6 +1612,9 @@ export function RoomTimeline({
) : ( ) : (
<RenderMessageContent <RenderMessageContent
displayName={senderDisplayName} displayName={senderDisplayName}
senderId={senderId}
senderAvatarUrl={senderAvatarUrl}
hideVoiceAvatar={messageLayout === 'channel'}
msgType={mEvent.getContent().msgtype ?? ''} msgType={mEvent.getContent().msgtype ?? ''}
ts={mEvent.getTs()} ts={mEvent.getTs()}
edited={!!editedEvent} edited={!!editedEvent}
@ -1748,9 +1756,17 @@ export function RoomTimeline({
getMemberDisplayName(room, senderId) ?? getMemberDisplayName(room, senderId) ??
getMxIdLocalPart(senderId) ?? getMxIdLocalPart(senderId) ??
senderId; senderId;
const senderAvatarMxc = getMemberAvatarMxc(room, senderId);
const senderAvatarUrl = senderAvatarMxc
? mxcUrlToHttp(mx, senderAvatarMxc, useAuthentication, 96, 96, 'crop') ??
undefined
: undefined;
return ( return (
<RenderMessageContent <RenderMessageContent
displayName={senderDisplayName} displayName={senderDisplayName}
senderId={senderId}
senderAvatarUrl={senderAvatarUrl}
hideVoiceAvatar={messageLayout === 'channel'}
msgType={mEvent.getContent().msgtype ?? ''} msgType={mEvent.getContent().msgtype ?? ''}
ts={mEvent.getTs()} ts={mEvent.getTs()}
edited={!!editedEvent} edited={!!editedEvent}

View file

@ -30,11 +30,12 @@ import { useEditor } from '../../components/editor';
import { import {
getEditedEvent, getEditedEvent,
getEventReactions, getEventReactions,
getMemberAvatarMxc,
getMemberDisplayName, getMemberDisplayName,
isBridgedRoom, isBridgedRoom,
reactionOrEditEvent, reactionOrEditEvent,
} from '../../utils/room'; } from '../../utils/room';
import { getMxIdLocalPart } from '../../utils/matrix'; import { getMxIdLocalPart, mxcUrlToHttp } from '../../utils/matrix';
import { import {
ImageContent, ImageContent,
MessageNotDecryptedContent, MessageNotDecryptedContent,
@ -1007,6 +1008,10 @@ export function ThreadDrawer({
const senderId = mEvent.getSender() ?? ''; const senderId = mEvent.getSender() ?? '';
const senderDisplayName = const senderDisplayName =
getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId; getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId;
const senderAvatarMxc = getMemberAvatarMxc(room, senderId);
const senderAvatarUrl = senderAvatarMxc
? mxcUrlToHttp(mx, senderAvatarMxc, useAuthentication, 96, 96, 'crop') ?? undefined
: undefined;
const eventType = mEvent.getType(); const eventType = mEvent.getType();
const body = (() => { const body = (() => {
@ -1020,6 +1025,9 @@ export function ThreadDrawer({
return ( return (
<RenderMessageContent <RenderMessageContent
displayName={senderDisplayName} displayName={senderDisplayName}
senderId={senderId}
senderAvatarUrl={senderAvatarUrl}
hideVoiceAvatar
msgType={mEvent.getContent().msgtype ?? ''} msgType={mEvent.getContent().msgtype ?? ''}
ts={mEvent.getTs()} ts={mEvent.getTs()}
edited={!!editedEvent} edited={!!editedEvent}
@ -1091,6 +1099,10 @@ export function ThreadDrawer({
const senderId = mEvent.getSender() ?? ''; const senderId = mEvent.getSender() ?? '';
const senderDisplayName = const senderDisplayName =
getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId; getMemberDisplayName(room, senderId) ?? getMxIdLocalPart(senderId) ?? senderId;
const senderAvatarMxc = getMemberAvatarMxc(room, senderId);
const senderAvatarUrl = senderAvatarMxc
? mxcUrlToHttp(mx, senderAvatarMxc, useAuthentication, 96, 96, 'crop') ?? undefined
: undefined;
const { replyEventId, threadRootId } = mEvent; const { replyEventId, threadRootId } = mEvent;
// matrix-js-sdk auto-injects a fallback `m.in_reply_to` into every // matrix-js-sdk auto-injects a fallback `m.in_reply_to` into every
@ -1205,6 +1217,9 @@ export function ThreadDrawer({
return ( return (
<RenderMessageContent <RenderMessageContent
displayName={senderDisplayName} displayName={senderDisplayName}
senderId={senderId}
senderAvatarUrl={senderAvatarUrl}
hideVoiceAvatar
msgType={mEvent.getContent().msgtype ?? ''} msgType={mEvent.getContent().msgtype ?? ''}
ts={mEvent.getTs()} ts={mEvent.getTs()}
edited={!!editedEvent} edited={!!editedEvent}

View file

@ -0,0 +1,127 @@
import { globalStyle, keyframes, style } from '@vanilla-extract/css';
import { color, config, toRem } from 'folds';
import { ChatComposer } from './RoomView.css';
// The recorder renders into the composer's `replaceEditable` slot as two stacked
// rows (wave + controls), morphing the input in place. See VoiceRecorder and
// docs/plans/voice_messages.md.
const fadeIn = keyframes({
from: { opacity: 0, transform: 'translateY(2px)' },
to: { opacity: 1, transform: 'translateY(0)' },
});
export const Recorder = style({
display: 'flex',
flexDirection: 'column',
width: '100%',
boxSizing: 'border-box',
animation: `${fadeIn} 160ms ease`,
});
export const WaveRow = style({
display: 'flex',
alignItems: 'center',
gap: config.space.S300,
width: '100%',
boxSizing: 'border-box',
minHeight: toRem(44),
padding: `${config.space.S200} ${config.space.S400}`,
});
export const Controls = style({
display: 'flex',
alignItems: 'center',
// A touch more breathing room than the text action row so the controls are
// harder to mis-tap — but still compact.
gap: config.space.S400,
width: '100%',
boxSizing: 'border-box',
padding: `${toRem(2)} ${config.space.S300} ${toRem(4)}`,
});
export const Grow = style({ flexGrow: 1 });
// Circular violet play/pause beside the waveform — reads as a playback control
// (and is visually + positionally distinct from the Send action below it).
export const PlayBtn = style({
flexShrink: 0,
width: toRem(32),
height: toRem(32),
display: 'flex',
alignItems: 'center',
justifyContent: 'center',
border: 'none',
padding: 0,
borderRadius: config.radii.Pill,
cursor: 'pointer',
backgroundColor: color.Primary.Main,
color: color.Primary.OnMain,
transition: 'transform 120ms ease, background-color 120ms ease',
selectors: {
'&:hover': { backgroundColor: color.Primary.MainHover, transform: 'scale(1.06)' },
'&:active': { transform: 'scale(0.92)' },
},
});
// The composer touch-hover gate (RoomView.css) zeroes the background of EVERY
// `${ChatComposer} button` on a stuck :hover/:focus-visible after an Android
// WebView tap — which made this DELIBERATELY-filled play button vanish (violet
// fill → transparent, dark icon on dark = invisible). Re-assert the fill with a
// more specific selector (button.PlayBtn beats plain button) so it survives.
globalStyle(
`:root[data-input="touch"] ${ChatComposer} button.${PlayBtn}:hover, :root[data-input="touch"] ${ChatComposer} button.${PlayBtn}:focus-visible`,
{ backgroundColor: color.Primary.Main }
);
const pulse = keyframes({
'0%': { opacity: 1, transform: 'scale(1)' },
'50%': { opacity: 0.35, transform: 'scale(0.82)' },
'100%': { opacity: 1, transform: 'scale(1)' },
});
export const RecDot = style({
width: toRem(10),
height: toRem(10),
borderRadius: config.radii.Pill,
flexShrink: 0,
backgroundColor: color.Critical.Main,
animation: `${pulse} 1.2s ease-in-out infinite`,
});
export const Time = style({
flexShrink: 0,
minWidth: toRem(42),
textAlign: 'right',
fontVariantNumeric: 'tabular-nums',
color: color.SurfaceVariant.OnContainer,
});
export const ErrorText = style({
color: color.Critical.Main,
});
export const Waveform = style({
position: 'relative',
flexGrow: 1,
minWidth: 0,
display: 'flex',
alignItems: 'center',
gap: toRem(2),
height: toRem(28),
overflow: 'hidden',
});
export const WaveBar = style({
flex: '1 1 0',
minWidth: toRem(2),
minHeight: toRem(2),
borderRadius: config.radii.Pill,
// Glide between samples (cadence ~80ms) so the live meter flows continuously
// rather than snapping — and the played/unplayed boundary slides on playback.
transition: 'height 90ms linear, background-color 120ms linear',
});
export const WaveBarPlayed = style({ backgroundColor: color.Primary.Main });
export const WaveBarRest = style({ backgroundColor: color.SurfaceVariant.ContainerLine });
export const WaveBarLive = style({ backgroundColor: color.Primary.Main });

View file

@ -0,0 +1,315 @@
/* eslint-disable jsx-a11y/media-has-caption */
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { Icon, IconButton, Spinner, Text } from 'folds';
import { useTranslation } from 'react-i18next';
import { VoiceRecording, VoiceRecordingResult } from '../../utils/voiceRecording';
import { secondsToMinutesAndSeconds } from '../../utils/common';
import { normalizeWaveform } from '../../utils/audioWaveform';
import { PlayTimeCallback, useMediaPlay, useMediaPlayTimeCallback } from '../../hooks/media';
import { useThrottle } from '../../hooks/useThrottle';
import * as css from './VoiceRecorderForm.css';
const LIVE_BARS = 48;
const PREVIEW_BARS = 48;
const PLAY_TIME_THROTTLE_OPS = { wait: 80, immediate: true };
// Hard cap on a single recording. Telegram itself has no duration limit (it's
// only storage/file-size bounded), so a "press and forget" would record forever
// — we auto-stop into the preview state at this point so nothing is lost and the
// mic is released. Tune freely; 5 minutes is generous for a chat voice note.
const MAX_RECORDING_MS = 5 * 60 * 1000;
type Phase = 'starting' | 'recording' | 'recorded' | 'error';
// Composer recorder icons — stroke-based outline style from the Dawn design
// canon (docs/design/new-direct-messages-design/project/shared.jsx), NOT folds'
// default icons. folds wraps these in <svg viewBox="0 0 24 24" fill="none">.
export const RecIcons = {
Close: () => (
<path d="M6 6l12 12M18 6L6 18" stroke="currentColor" strokeWidth="1.8" strokeLinecap="round" />
),
Trash: () => (
<>
<path d="M4 7h16" stroke="currentColor" strokeWidth="1.6" strokeLinecap="round" />
<path
d="M9 7V5a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v2"
stroke="currentColor"
strokeWidth="1.6"
strokeLinejoin="round"
/>
<path
d="M6 7l1 13a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1l1-13"
stroke="currentColor"
strokeWidth="1.6"
strokeLinejoin="round"
/>
</>
),
Stop: () => (
<rect x="6" y="6" width="12" height="12" rx="2.5" stroke="currentColor" strokeWidth="1.6" />
),
Play: () => (
<path
d="M8 5.5l11 6.5-11 6.5V5.5z"
stroke="currentColor"
strokeWidth="1.6"
strokeLinejoin="round"
/>
),
Pause: () => (
<path d="M9 5v14M15 5v14" stroke="currentColor" strokeWidth="1.8" strokeLinecap="round" />
),
Send: () => (
<path
d="M4 12l16-8-6 18-3-7-7-3z"
stroke="currentColor"
strokeWidth="1.6"
strokeLinejoin="round"
/>
),
};
export type VoiceRecorderProps = {
// Fired with the finished recording when the user taps Send.
onSend: (result: VoiceRecordingResult) => void;
// Fired when the user closes the recorder (discard + back to text input).
onClose: () => void;
};
// Self-contained recorder that renders into the composer's `replaceEditable`
// slot (so the input morphs in place; the composer card / Slate context / drafts
// stay mounted). It owns ALL recorder state internally — high-frequency updates
// (timer, live levels, preview position) re-render ONLY this component, never the
// big RoomInput. The hidden <audio> lives here too, so on Send/Close the whole
// subtree unmounts and any preview playback stops. See docs/plans/voice_messages.md.
export function VoiceRecorder({ onSend, onClose }: VoiceRecorderProps) {
const { t } = useTranslation();
const recorderRef = useRef<VoiceRecording | null>(null);
const mountedRef = useRef(true);
const [phase, setPhase] = useState<Phase>('starting');
const [elapsedMs, setElapsedMs] = useState(0);
const [liveLevels, setLiveLevels] = useState<number[]>(() => new Array(LIVE_BARS).fill(0));
const [result, setResult] = useState<VoiceRecordingResult | null>(null);
const [previewUrl, setPreviewUrl] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const [currentTime, setCurrentTime] = useState(0);
const getAudioRef = useCallback(() => audioRef.current, []);
const { playing, setPlaying } = useMediaPlay(getAudioRef);
const handlePlayTime: PlayTimeCallback = useCallback((_d, ct) => setCurrentTime(ct), []);
useMediaPlayTimeCallback(getAudioRef, useThrottle(handlePlayTime, PLAY_TIME_THROTTLE_OPS));
const begin = useCallback(async () => {
if (recorderRef.current) return;
const rec = new VoiceRecording();
recorderRef.current = rec;
setResult(null);
setLiveLevels(new Array(LIVE_BARS).fill(0));
setElapsedMs(0);
setCurrentTime(0);
setPhase('starting');
try {
await rec.start((level) => {
// Fixed-width sliding window — shift one out, push the newest in, so the
// waveform glides continuously instead of growing then freezing.
setLiveLevels((prev) => {
const next = prev.slice(1);
next.push(level);
return next;
});
});
setPhase('recording');
} catch {
recorderRef.current = null;
setPhase('error');
}
}, []);
const handleStop = useCallback(async () => {
const rec = recorderRef.current;
if (!rec) return;
recorderRef.current = null;
try {
const res = await rec.stop();
// If the composer unmounted (room switch) during the await, don't create a
// preview URL no one is left to revoke, and skip the state writes.
if (!mountedRef.current) return;
setResult(res);
setPreviewUrl(URL.createObjectURL(res.blob));
setPhase('recorded');
} catch {
if (mountedRef.current) setPhase('error');
}
}, []);
// Auto-start on mount; cancel any in-flight recorder on unmount (mic release).
useEffect(() => {
mountedRef.current = true;
begin();
return () => {
mountedRef.current = false;
recorderRef.current?.cancel();
recorderRef.current = null;
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
// Recording timer + max-duration auto-stop.
useEffect(() => {
if (phase !== 'recording') return undefined;
const startedAt = performance.now();
const id = window.setInterval(() => {
const elapsed = performance.now() - startedAt;
setElapsedMs(elapsed);
if (elapsed >= MAX_RECORDING_MS) handleStop();
}, 200);
return () => window.clearInterval(id);
}, [phase, handleStop]);
// Revoke the preview object URL when it changes / on unmount.
useEffect(
() => () => {
if (previewUrl) URL.revokeObjectURL(previewUrl);
},
[previewUrl]
);
// Discard the preview and record again. Only reachable in the 'recorded'
// phase, where recorderRef is already null, so begin() starts a fresh take.
// The [previewUrl] effect is the single revocation owner.
// Discard the recording and close the whole recorder (back to text input) —
// NOT re-record. Unmounting cancels the recorder + revokes the preview URL.
const handleDelete = useCallback(() => {
setPlaying(false);
onClose();
}, [onClose, setPlaying]);
const handleSend = useCallback(() => {
setPlaying(false);
if (result) onSend(result);
else onClose();
}, [result, onSend, onClose, setPlaying]);
const previewBars = useMemo(() => normalizeWaveform(result?.waveform, PREVIEW_BARS), [result]);
const durationSec = (result?.durationMs ?? 0) / 1000;
const progress = durationSec > 0 ? Math.min(1, currentTime / durationSec) : 0;
const recording = phase === 'recording' || phase === 'starting';
const timeLabel = recording
? secondsToMinutesAndSeconds(elapsedMs / 1000)
: secondsToMinutesAndSeconds(playing || currentTime > 0 ? currentTime : durationSec);
return (
<div className={css.Recorder}>
<div className={css.WaveRow}>
{phase === 'error' ? (
<Text className={css.ErrorText} size="T300">
{t('Room.voice_mic_error')}
</Text>
) : (
<>
{recording && <span className={css.RecDot} aria-hidden />}
{phase === 'recorded' && (
<button
type="button"
className={css.PlayBtn}
onClick={() => setPlaying(!playing)}
aria-label={playing ? t('Room.voice_pause') : t('Room.voice_play')}
>
<Icon src={playing ? RecIcons.Pause : RecIcons.Play} size="200" />
</button>
)}
<div className={css.Waveform} aria-hidden>
{phase === 'recorded'
? previewBars.map((amp, index) => {
const played = (index + 0.5) / previewBars.length <= progress;
return (
<span
// eslint-disable-next-line react/no-array-index-key
key={index}
className={`${css.WaveBar} ${played ? css.WaveBarPlayed : css.WaveBarRest}`}
style={{ height: `${Math.max(12, Math.round(amp * 100))}%` }}
/>
);
})
: liveLevels.map((level, index) => (
<span
// eslint-disable-next-line react/no-array-index-key
key={index}
className={`${css.WaveBar} ${css.WaveBarLive}`}
style={{ height: `${Math.max(8, Math.round(Math.min(1, level) * 100))}%` }}
/>
))}
</div>
<Text className={css.Time} size="T300">
{timeLabel}
</Text>
</>
)}
</div>
<div className={css.Controls}>
{/* Left action: cancel while recording (X) or discard+close in preview
(trash). Both exit the recorder entirely. */}
{phase === 'recorded' ? (
<IconButton
onClick={handleDelete}
variant="SurfaceVariant"
fill="None"
size="300"
radii="300"
aria-label={t('Room.voice_delete')}
>
<Icon src={RecIcons.Trash} />
</IconButton>
) : (
<IconButton
onClick={onClose}
variant="SurfaceVariant"
fill="None"
size="300"
radii="300"
aria-label={t('Room.voice_close')}
>
<Icon src={RecIcons.Close} />
</IconButton>
)}
<div className={css.Grow} />
{phase === 'starting' && <Spinner size="200" variant="Secondary" />}
{phase === 'recording' && (
<IconButton
onClick={handleStop}
variant="Primary"
fill="Soft"
size="300"
radii="300"
aria-label={t('Room.voice_stop')}
>
<Icon src={RecIcons.Stop} />
</IconButton>
)}
{phase === 'recorded' && (
<IconButton
onClick={handleSend}
variant="Primary"
fill="Soft"
size="300"
radii="300"
aria-label={t('Room.voice_send')}
>
<Icon src={RecIcons.Send} />
</IconButton>
)}
</div>
<audio
controls={false}
ref={audioRef}
src={previewUrl ?? undefined}
style={{ display: 'none' }}
/>
</div>
);
}

View file

@ -40,6 +40,7 @@ import type { StateEvents } from 'matrix-js-sdk';
import { Relations } from 'matrix-js-sdk/lib/models/relations'; import { Relations } from 'matrix-js-sdk/lib/models/relations';
import classNames from 'classnames'; import classNames from 'classnames';
import { RoomPinnedEventsEventContent } from 'matrix-js-sdk/lib/types'; import { RoomPinnedEventsEventContent } from 'matrix-js-sdk/lib/types';
import { isVoiceMessageContent } from '../../../../types/matrix/common';
import { import {
CHANNEL_MESSAGE_SPACING, CHANNEL_MESSAGE_SPACING,
ChannelLayout, ChannelLayout,
@ -942,6 +943,12 @@ const MessageInner = as<'div', MessageProps>(
// decryption fires between render and listener attach. // decryption fires between render and listener attach.
const isMediaMessage = msgType === MsgType.Image || msgType === MsgType.Video; const isMediaMessage = msgType === MsgType.Image || msgType === MsgType.Video;
const mediaMode = isMediaMessage && !edit; const mediaMode = isMediaMessage && !edit;
// Voice notes are self-chromed cards — VoiceContent draws its own avatar +
// bubble. Collapse the asymmetric Stream bubble (DM) and drop the channel
// avatar (group) so a voice note renders identically for own/other, the only
// difference being the bubble fill. See docs/plans/voice_messages.md §5.
const isVoiceMessage = msgType === MsgType.Audio && isVoiceMessageContent(mEvent.getContent());
const voiceMode = isVoiceMessage && !edit;
if (msgType === MsgType.Image || msgType === MsgType.Video || msgType === MsgType.File) { if (msgType === MsgType.Image || msgType === MsgType.Video || msgType === MsgType.File) {
logMedia('Message', { logMedia('Message', {
@ -1362,7 +1369,7 @@ const MessageInner = as<'div', MessageProps>(
railStart={streamRailStart} railStart={streamRailStart}
railEnd={streamRailEnd} railEnd={streamRailEnd}
railHidden={railHidden} railHidden={railHidden}
mediaMode={mediaMode} mediaMode={mediaMode || voiceMode}
reactions={reactions} reactions={reactions}
threadSummary={threadSummary} threadSummary={threadSummary}
header={ header={

View file

@ -2,8 +2,10 @@ import { IContent, MatrixClient, MsgType } from 'matrix-js-sdk';
import to from 'await-to-js'; import to from 'await-to-js';
import { import {
IThumbnailContent, IThumbnailContent,
MATRIX_AUDIO_PROPERTY_NAME,
MATRIX_BLUR_HASH_PROPERTY_NAME, MATRIX_BLUR_HASH_PROPERTY_NAME,
MATRIX_SPOILER_PROPERTY_NAME, MATRIX_SPOILER_PROPERTY_NAME,
MATRIX_VOICE_PROPERTY_NAME,
} from '../../../types/matrix/common'; } from '../../../types/matrix/common';
import { import {
getImageFileUrl, getImageFileUrl,
@ -155,6 +157,55 @@ export const getAudioMsgContent = (item: TUploadItem, mxc: string): IContent =>
return content; return content;
}; };
// Voice message (MSC3245). Unlike `getAudioMsgContent`, this stamps the voice
// marker + the MSC1767 audio block (duration + waveform) so the message renders
// as a voice note and bridges to Telegram as a real voice note. `durationMs` is
// milliseconds; `waveform` is integers 0..1024. See docs/plans/voice_messages.md.
export const getVoiceMsgContent = (
item: TUploadItem,
mxc: string,
durationMs: number,
waveform: number[]
): IContent => {
const { file, encInfo } = item;
const content: IContent = {
msgtype: MsgType.Audio,
body: 'Voice message',
filename: file.name,
info: {
mimetype: file.type,
size: file.size,
duration: durationMs,
},
// MSC1767 extensible-event fallbacks (matches element-web's
// createVoiceMessageContent) so strict third-party MSC1767 clients still
// render the voice note. Harmless for the bridge / our own renderer.
'org.matrix.msc1767.text': 'Voice message',
[MATRIX_VOICE_PROPERTY_NAME]: {},
[MATRIX_AUDIO_PROPERTY_NAME]: {
duration: durationMs,
waveform,
},
};
const msc1767File: Record<string, unknown> = {
name: file.name,
mimetype: file.type,
size: file.size,
};
if (encInfo) {
content.file = {
...encInfo,
url: mxc,
};
msc1767File.file = { ...encInfo, url: mxc };
} else {
content.url = mxc;
msc1767File.url = mxc;
}
content['org.matrix.msc1767.file'] = msc1767File;
return content;
};
export const getFileMsgContent = (item: TUploadItem, mxc: string): IContent => { export const getFileMsgContent = (item: TUploadItem, mxc: string): IContent => {
const { file, encInfo } = item; const { file, encInfo } = item;
const content: IContent = { const content: IContent = {

View file

@ -32,10 +32,14 @@ export const useMediaPlay = (
targetEl?.addEventListener('playing', handleChange); targetEl?.addEventListener('playing', handleChange);
targetEl?.addEventListener('play', handleChange); targetEl?.addEventListener('play', handleChange);
targetEl?.addEventListener('pause', handleChange); targetEl?.addEventListener('pause', handleChange);
// Reaching the end sets `paused = true` but does NOT always fire a `pause`
// event — without this the button would stay stuck on "Pause" after playback.
targetEl?.addEventListener('ended', handleChange);
return () => { return () => {
targetEl?.removeEventListener('playing', handleChange); targetEl?.removeEventListener('playing', handleChange);
targetEl?.removeEventListener('play', handleChange); targetEl?.removeEventListener('play', handleChange);
targetEl?.removeEventListener('pause', handleChange); targetEl?.removeEventListener('pause', handleChange);
targetEl?.removeEventListener('ended', handleChange);
}; };
}, [getTargetElement]); }, [getTargetElement]);

View file

@ -220,6 +220,9 @@ export const useCommands = (mx: MatrixClient, room: Room): CommandRecord => {
visibility: Visibility.Private, visibility: Visibility.Private,
preset: Preset.TrustedPrivateChat, preset: Preset.TrustedPrivateChat,
initial_state: [createRoomEncryptionState()], initial_state: [createRoomEncryptionState()],
// TrustedPrivateChat gives both parties PL 100, so either can toggle
// the voice-messages state event; no events override (which would
// replace Synapse's default events map). See voice_messages.md §4.
}); });
addRoomIdToMDirect(mx, result.room_id, userIds[0]); addRoomIdToMDirect(mx, result.room_id, userIds[0]);
navigateRoom(result.room_id); navigateRoom(result.room_id);

View file

@ -0,0 +1,21 @@
// Resample an arbitrary-length waveform into `bars` peak-per-bucket values
// normalized to 0..1. Handles both the on-wire MSC1767/Telegram integers
// (0..1024) and a 0..1 float sender via max-detection (peak > 1 ⇒ divide by
// peak). Used by the voice-note bubble and the recorder preview.
export const normalizeWaveform = (waveform: number[] | undefined, bars: number): number[] => {
if (!waveform || waveform.length === 0) return [];
const peak = waveform.reduce((m, v) => Math.max(m, Math.abs(v)), 0);
const divisor = peak > 1 ? peak : 1;
const out: number[] = [];
const bucket = waveform.length / bars;
for (let i = 0; i < bars; i += 1) {
const start = Math.floor(i * bucket);
const end = Math.max(start + 1, Math.floor((i + 1) * bucket));
let localMax = 0;
for (let j = start; j < end && j < waveform.length; j += 1) {
localMax = Math.max(localMax, Math.abs(waveform[j]));
}
out.push(Math.min(1, localMax / divisor));
}
return out;
};

View file

@ -0,0 +1,259 @@
import Recorder from 'opus-recorder';
import encoderPath from 'opus-recorder/dist/encoderWorker.min.js?url';
// In-browser voice-message recorder. Produces real OGG/Opus (so it bridges to
// Telegram as a genuine voice note — MediaRecorder's webm/opus would NOT) plus
// a waveform sampled live off an AnalyserNode. Encode-only: we drop element-web's
// Safari WAV-decode fallback and AudioWorklet metering. See
// docs/plans/voice_messages.md §1 (D1, D2, D3) and §3 (Phase 2).
export type VoiceRecordingResult = {
blob: Blob;
// recorded length in milliseconds
durationMs: number;
// MSC1767 waveform: integers 0..1024
waveform: number[];
};
// Live amplitude sampling cadence (~12.5 Hz). Enough resolution for a readable
// waveform without flooding the array on long recordings.
const SAMPLE_INTERVAL_MS = 80;
// Number of buckets in the emitted waveform.
const WAVEFORM_POINTS = 100;
const getAudioContext = (): AudioContext => {
const Ctx =
window.AudioContext ||
(window as unknown as { webkitAudioContext: typeof AudioContext }).webkitAudioContext;
return new Ctx();
};
// Resample arbitrary-length 0..1 amplitudes into `points` averaged buckets,
// then scale to integers 0..1024 for the MSC1767 waveform.
const buildWaveform = (amplitudes: number[], points: number): number[] => {
if (amplitudes.length === 0) return new Array(points).fill(0);
const out: number[] = [];
const bucket = amplitudes.length / points;
for (let i = 0; i < points; i += 1) {
const start = Math.floor(i * bucket);
const end = Math.max(start + 1, Math.floor((i + 1) * bucket));
let sum = 0;
let count = 0;
for (let j = start; j < end && j < amplitudes.length; j += 1) {
sum += amplitudes[j];
count += 1;
}
const avg = count > 0 ? sum / count : 0;
out.push(Math.min(1024, Math.round(avg * 1024)));
}
return out;
};
export class VoiceRecording {
private recorder: Recorder | null = null;
// Set by cancel() so a start() still awaiting getUserMedia / worklet init
// releases anything it acquires late instead of leaking the mic.
private cancelled = false;
private stream: MediaStream | null = null;
private audioContext: AudioContext | null = null;
private source: MediaStreamAudioSourceNode | null = null;
private analyser: AnalyserNode | null = null;
private amplitudes: number[] = [];
private sampleTimer: number | undefined;
private startedAt = 0;
private chunks: Uint8Array[] = [];
private onAmplitude: ((level: number) => void) | null = null;
static isSupported(): boolean {
return (
typeof navigator !== 'undefined' &&
!!navigator.mediaDevices?.getUserMedia &&
// opus-recorder needs WebAssembly — mirror its own gate so the mic button
// is hidden (not failed-into-error) on a WASM-less engine.
typeof WebAssembly !== 'undefined' &&
(typeof window.AudioContext !== 'undefined' ||
typeof (window as unknown as { webkitAudioContext?: unknown }).webkitAudioContext !==
'undefined')
);
}
async start(onAmplitude?: (level: number) => void): Promise<void> {
this.onAmplitude = onAmplitude ?? null;
this.cancelled = false;
try {
await this.startInternal();
} catch (err) {
// If we acquired the mic before a later step failed (e.g. the encoder
// worklet didn't load), release it so the mic never stays on.
this.cleanup();
throw err;
}
}
private async startInternal(): Promise<void> {
this.stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
// Torn down (Close / room switch) while getUserMedia was resolving — release
// the just-acquired mic instead of leaking it.
if (this.cancelled) {
this.cleanup();
return;
}
this.audioContext = getAudioContext();
// Started from a click (mic button) so the gesture lets us resume a
// context that some engines spawn suspended.
if (this.audioContext.state === 'suspended') {
await this.audioContext.resume();
}
if (this.cancelled) {
this.cleanup();
return;
}
this.source = this.audioContext.createMediaStreamSource(this.stream);
this.analyser = this.audioContext.createAnalyser();
this.analyser.fftSize = 1024;
this.source.connect(this.analyser);
this.recorder = new Recorder({
encoderPath,
sourceNode: this.source,
encoderSampleRate: 48000,
numberOfChannels: 1,
encoderApplication: 2048, // VOIP / voice
encoderBitRate: 24000,
encoderFrameSize: 20,
maxFramesPerPage: 40,
resampleQuality: 3,
streamPages: false,
});
this.chunks = [];
this.amplitudes = [];
this.recorder.ondataavailable = (data) => {
this.chunks.push(new Uint8Array(data));
};
await this.recorder.start();
if (this.cancelled) {
this.cleanup();
return;
}
this.startedAt = performance.now();
const buf = new Uint8Array(this.analyser.fftSize);
this.sampleTimer = window.setInterval(() => {
if (!this.analyser) return;
this.analyser.getByteTimeDomainData(buf);
let peak = 0;
for (let i = 0; i < buf.length; i += 1) {
peak = Math.max(peak, Math.abs(buf[i] - 128));
}
const level = peak / 128;
this.amplitudes.push(level);
this.onAmplitude?.(level);
}, SAMPLE_INTERVAL_MS);
}
async stop(): Promise<VoiceRecordingResult> {
this.clearTimer();
const rec = this.recorder;
const durationMs = Math.max(0, Math.round(performance.now() - this.startedAt));
if (!rec) {
this.cleanup();
return { blob: this.buildBlob(), durationMs, waveform: [] };
}
// opus-recorder delivers the final ogg pages via ondataavailable then calls
// onstop; its stop() promise resolves on the same "done". Resolve on
// whichever of {onstop, stop()-settles, a safety timeout} comes first so we
// ALWAYS fall through to cleanup() and release the mic — even if a callback
// misfires on some WebView. opus-recorder does NOT own our getUserMedia
// stream (we pass a sourceNode), so cleanup() is the only thing that stops
// the mic track.
await new Promise<void>((resolve) => {
let done = false;
const finish = () => {
if (done) return;
done = true;
resolve();
};
rec.onstop = finish;
const timer = window.setTimeout(finish, 1500);
rec
.stop()
.then(finish)
.catch(finish)
.finally(() => window.clearTimeout(timer));
});
const blob = this.buildBlob();
const waveform = buildWaveform(this.amplitudes, WAVEFORM_POINTS);
this.cleanup();
return { blob, durationMs, waveform };
}
cancel(): void {
// Mark cancelled so an in-flight startInternal() (awaiting getUserMedia /
// worklet init) releases whatever it acquires late instead of leaking it.
this.cancelled = true;
this.clearTimer();
// Release the mic immediately. opus-recorder's own stop() is fire-and-forget
// here (it never touches our stream); cleanup() stops the track right away.
try {
this.recorder?.stop().catch(() => undefined);
} catch {
// already stopped — ignore
}
this.chunks = [];
this.cleanup();
}
private buildBlob(): Blob {
const total = this.chunks.reduce((n, c) => n + c.length, 0);
const merged = new Uint8Array(total);
let offset = 0;
this.chunks.forEach((c) => {
merged.set(c, offset);
offset += c.length;
});
return new Blob([merged], { type: 'audio/ogg' });
}
private clearTimer(): void {
if (this.sampleTimer !== undefined) {
window.clearInterval(this.sampleTimer);
this.sampleTimer = undefined;
}
}
private cleanup(): void {
this.clearTimer();
this.source?.disconnect();
this.analyser?.disconnect();
this.stream?.getTracks().forEach((track) => track.stop());
if (this.audioContext && this.audioContext.state !== 'closed') {
this.audioContext.close().catch(() => undefined);
}
this.recorder = null;
this.source = null;
this.analyser = null;
this.stream = null;
this.audioContext = null;
this.onAmplitude = null;
}
}

35
src/ext.d.ts vendored
View file

@ -35,3 +35,38 @@ declare module '*.svg' {
const content: string; const content: string;
export default content; export default content;
} }
// opus-recorder ships no types. We only touch a small slice of the API: the
// constructor config, start/stop, and the data/stop callbacks. See
// docs/plans/voice_messages.md §3 (Phase 2).
declare module 'opus-recorder' {
export interface OpusRecorderConfig {
encoderPath?: string;
sourceNode?: MediaStreamAudioSourceNode;
encoderSampleRate?: number;
numberOfChannels?: number;
encoderApplication?: number;
encoderBitRate?: number;
encoderFrameSize?: number;
maxFramesPerPage?: number;
resampleQuality?: number;
streamPages?: boolean;
mediaTrackConstraints?: boolean | MediaTrackConstraints;
}
export default class Recorder {
constructor(config?: OpusRecorderConfig);
ondataavailable: ((data: Uint8Array) => void) | null;
onstart: (() => void) | null;
onstop: (() => void) | null;
start(): Promise<void>;
stop(): Promise<void>;
readonly audioContext?: AudioContext;
}
}

View file

@ -6,6 +6,21 @@ export const MATRIX_SPOILER_PROPERTY_NAME = 'page.codeberg.everypizza.msc4193.sp
export const MATRIX_SPOILER_REASON_PROPERTY_NAME = export const MATRIX_SPOILER_REASON_PROPERTY_NAME =
'page.codeberg.everypizza.msc4193.spoiler.reason'; 'page.codeberg.everypizza.msc4193.spoiler.reason';
// MSC3245 voice message marker (presence ⇒ render as a voice note) and the
// MSC1767 extensible-audio block carrying duration + waveform. Both Vojo-native
// and Telegram-bridged voice notes carry these keys. See
// docs/plans/voice_messages.md §2.1.
export const MATRIX_VOICE_PROPERTY_NAME = 'org.matrix.msc3245.voice';
// Legacy unstable prefix some older clients used before msc3245.voice.
export const MATRIX_VOICE_LEGACY_PROPERTY_NAME = 'org.matrix.msc2516.voice';
export const MATRIX_AUDIO_PROPERTY_NAME = 'org.matrix.msc1767.audio';
// Single source of truth for "is this m.audio a voice note?" — used by both the
// renderer (RenderMessageContent → VoiceContent) and the layout (Message.tsx
// bubble-collapse) so the two never disagree. See docs/plans/voice_messages.md §5.
export const isVoiceMessageContent = (content: Record<string, unknown> | undefined): boolean =>
!!content?.[MATRIX_VOICE_PROPERTY_NAME] || !!content?.[MATRIX_VOICE_LEGACY_PROPERTY_NAME];
export type IImageInfo = { export type IImageInfo = {
w?: number; w?: number;
h?: number; h?: number;
@ -28,6 +43,12 @@ export type IAudioInfo = {
duration?: number; duration?: number;
}; };
// MSC1767 `org.matrix.msc1767.audio`: duration (ms) + waveform (integers 0..1024).
export type IMSC1767Audio = {
duration?: number;
waveform?: number[];
};
export type IFileInfo = { export type IFileInfo = {
mimetype?: string; mimetype?: string;
size?: number; size?: number;
@ -72,6 +93,9 @@ export type IAudioContent = {
url?: string; url?: string;
info?: IAudioInfo; info?: IAudioInfo;
file?: IEncryptedFile; file?: IEncryptedFile;
// Voice-note keys — present when this audio is a voice message.
[MATRIX_VOICE_PROPERTY_NAME]?: Record<string, never>;
[MATRIX_AUDIO_PROPERTY_NAME]?: IMSC1767Audio;
}; };
export type IFileContent = { export type IFileContent = {

View file

@ -49,6 +49,11 @@ export enum StateEvent {
PoniesRoomEmotes = 'im.ponies.room_emotes', PoniesRoomEmotes = 'im.ponies.room_emotes',
PowerLevelTags = 'in.vojo.room.power_level_tags', PowerLevelTags = 'in.vojo.room.power_level_tags',
// Vojo per-room "voice messages allowed" preference. Soft, client-side,
// Vojo↔Vojo: a participant (1:1) or admin (group) writes `{ enabled: false,
// disabled_by }` to block voice messages in the room. Absent OR
// `enabled !== false` ⇒ allowed (default-on). See docs/plans/voice_messages.md.
VoiceMessages = 'in.vojo.room.voice_messages',
} }
export enum MessageEvent { export enum MessageEvent {

View file

@ -25,6 +25,7 @@ declare module 'matrix-js-sdk' {
interface StateEvents { interface StateEvents {
'im.ponies.room_emotes': unknown; 'im.ponies.room_emotes': unknown;
'in.vojo.room.power_level_tags': unknown; 'in.vojo.room.power_level_tags': unknown;
'in.vojo.room.voice_messages': unknown;
// MSC2346 bridge metadata — stable + unstable variants. Mautrix-* and // MSC2346 bridge metadata — stable + unstable variants. Mautrix-* and
// similar bridges write at least one; SDK ships no built-in typing. // similar bridges write at least one; SDK ships no built-in typing.
'm.bridge': unknown; 'm.bridge': unknown;