feat(webapp): virtual background

huanghuang358 · huanghuang358 · commit a609db68ff01 · 2025-02-12T01:43:24.000+08:00
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -11,6 +11,7 @@
     "preview": "vite preview -c webapp/vite.config.ts"
   },
   "dependencies": {
+    "@mediapipe/tasks-vision": "^0.10.21",
     "copy-to-clipboard": "^3.3.3",
     "jotai": "^2.10.2",
     "jotai-devtools": "^0.10.1",
diff --git a/webapp/components/device.tsx b/webapp/components/device.tsx
@@ -13,6 +13,7 @@ import SvgSpeaker from './svg/speaker'
 import SvgAudio from './svg/audio'
 import SvgVideo from './svg/video'
 import { SvgPresentCancel, SvgPresentToAll } from './svg/present'
+import { SvgBackgroundCancel, SvgBackground } from './svg/background'
 
 function toDevice(info: MediaDeviceInfo): Device {
   const deviceId = info.deviceId
@@ -31,11 +32,13 @@ export default function DeviceBar(props: { streamId: string }) {
   const [loadingAudio, setLoadingAudio] = useState(false)
   const [loadingVideo, setLoadingVideo] = useState(false)
   const [loadingScreen, setLoadingScreen] = useState(false)
+  const [loadingBackground, setLoadingBackground] = useState(false)
 
   const [currentDeviceSpeaker, setCurrentDeviceSpeaker] = useAtom(deviceSpeakerAtom)
   const [speakerStatus, setSpeakerStatus] = useAtom(speakerStatusAtom)
 
   const [settingsEnabledScreen] = useAtom(settingsEnabledScreenAtom)
+  const [virtualBackgroundEnabled, setVirtualBackgroundEnabled] = useState(false)
 
   const {
     userStatus,
@@ -45,6 +48,7 @@ export default function DeviceBar(props: { streamId: string }) {
     setCurrentDeviceVideo,
     toggleEnableAudio,
     toggleEnableVideo,
+    toggleEnableVirtualBackground,
   } = useWhipClient(props.streamId)
 
   const [deviceSpeaker, setDeviceSpeaker] = useState<Device[]>([deviceNone])
@@ -162,6 +166,9 @@ export default function DeviceBar(props: { streamId: string }) {
   const onChangedDeviceVideo = async (current: string) => {
     setLoadingVideo(true)
     await setCurrentDeviceVideo(current)
+    if (userStatus.screen) {
+      setVirtualBackgroundEnabled(false)
+    }
     setLoadingVideo(false)
   }
 
@@ -243,6 +250,9 @@ export default function DeviceBar(props: { streamId: string }) {
           <button className="text-rose-400 rounded-md w-8 h-8" onClick={async () => {
             setLoadingVideo(true)
             await toggleEnableVideo()
+            if (!userStatus.video && virtualBackgroundEnabled) {
+              setVirtualBackgroundEnabled(false)
+            }
             setLoadingVideo(false)
           }}>
             <center>{ loadingVideo ? <Loading/> : <SvgVideo/> }</center>
@@ -272,6 +282,22 @@ export default function DeviceBar(props: { streamId: string }) {
             )}
           </select>
         </section>
+
+        <section className="m-1 p-1 flex flex-row justify-center rounded-md border-1 border-indigo-500">
+          <button className="text-rose-400 rounded-md w-8 h-8" disabled={!userStatus.video || userStatus.screen} onClick={async () => {
+            setLoadingBackground(true)
+            await toggleEnableVirtualBackground()
+            setVirtualBackgroundEnabled(s => !s)
+            setLoadingBackground(false)
+          }}>
+            <center>
+              { loadingBackground
+                  ? <Loading/>
+                  : virtualBackgroundEnabled ? <SvgBackgroundCancel/> : <SvgBackground/>
+              }
+            </center>
+          </button>
+        </section>
       </center>
       {!settingsEnabledScreen && (
         <center>
diff --git a/webapp/components/svg/background.tsx b/webapp/components/svg/background.tsx
@@ -0,0 +1,15 @@
+export function SvgBackground() {
+  return (
+  <svg width="24" height="24" viewBox="0 0 24 24">
+    <rect x="2" y="2" width="20" height="20" stroke="currentColor" fill="none" strokeWidth="2"/>
+    <path fill="currentColor" d="M12 10c1.66 0 3-1.34 3-3s-1.34-3-3-3-3 1.34-3 3 1.34 3 3 3zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"/>
+  </svg>
+  )
+} 
+export function SvgBackgroundCancel() {
+  return (
+  <svg width="24" height="24" viewBox="0 0 24 24">
+    <path fill="currentColor" d="M12 10c1.66 0 3-1.34 3-3s-1.34-3-3-3-3 1.34-3 3 1.34 3 3 3zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"/>
+  </svg>
+  )
+} 
diff --git a/webapp/components/use/imageSegmentation.ts b/webapp/components/use/imageSegmentation.ts
@@ -0,0 +1,149 @@
+import { ImageSegmenter, FilesetResolver, ImageSegmenterResult } from '@mediapipe/tasks-vision'
+
+let imageSegmenter: ImageSegmenter
+let webcamRunning: boolean = false
+let streamForVirtualBackground: MediaStream | null = null
+
+const videoWidth = 480
+const videoHeight = 360
+
+// 创建背景图片元素
+const backgroundImage = new Image()
+backgroundImage.src = './background.jpg'
+
+// 初始化视频元素
+const video = document.createElement('video')
+const canvas = document.createElement('canvas')
+const canvasCtx = canvas.getContext('2d')!
+
+// 设置画布尺寸
+canvas.width = videoWidth
+canvas.height = videoHeight
+
+// 创建临时画布用于处理视频帧
+const tempCanvas = document.createElement('canvas')
+tempCanvas.width = videoWidth
+tempCanvas.height = videoHeight
+const tempCtx = tempCanvas.getContext('2d')!
+
+// 创建 ImageSegmenter
+async function createImageSegmenter() {
+  try {
+    const vision = await FilesetResolver.forVisionTasks(
+      "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    )
+
+    imageSegmenter = await ImageSegmenter.createFromOptions(vision, {
+      baseOptions: {
+        modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_multiclass_256x256/float32/latest/selfie_multiclass_256x256.tflite",
+        delegate: "GPU"
+      },
+      outputCategoryMask: true,
+      runningMode: "VIDEO"
+    })
+    console.log("创建分割器成功")
+  } catch (error) {
+    console.error("创建分割器失败:", error)
+  }
+}
+
+function callbackForVideo(segmentationResult: ImageSegmenterResult) {
+  if (!segmentationResult || !segmentationResult.categoryMask) return
+  const imageData = tempCtx.getImageData(0, 0, videoWidth, videoHeight).data
+  // 获取分割结果
+  // 0 - background
+  // 1 - hair
+  // 2 - body-skin
+  // 3 - face-skin
+  // 4 - clothes
+  // 5 - others (accessories)
+  const maskData = segmentationResult.categoryMask.getAsUint8Array()
+
+  for (let i = 0; i < maskData.length; ++i) {
+    const maskVal = maskData[i]
+    const j = i * 4
+    // 将特定类的像素点设置为透明
+    if (maskVal == 0) { 
+      imageData[j + 3] = 0 // A - 透明
+    }
+  }
+
+  // 清空主画布
+  canvasCtx.clearRect(0, 0, videoWidth, videoHeight)
+  
+  // 绘制背景图片
+  if (backgroundImage.complete && backgroundImage.naturalHeight !== 0) {
+    canvasCtx.drawImage(backgroundImage, 0, 0, videoWidth, videoHeight)
+  }
+
+  const uint8Array = new Uint8ClampedArray(imageData.buffer)
+  const dataNew = new ImageData(
+    uint8Array,
+    video.videoWidth,
+    video.videoHeight
+  )
+
+  // 将处理后的视频帧绘制到主画布上
+  tempCtx.putImageData(dataNew, 0, 0)
+  canvasCtx.drawImage(tempCanvas, 0, 0)
+  
+  // 释放资源
+  // segmentationResult.close();
+
+  window.requestAnimationFrame(predictWebcam)
+}
+
+// 处理视频帧
+async function predictWebcam() {
+  if (!imageSegmenter || !webcamRunning) return
+  try {
+     // 在临时画布上绘制视频帧
+    tempCtx.drawImage(video, 0, 0, videoWidth, videoHeight)
+    imageSegmenter.segmentForVideo(video, performance.now(), callbackForVideo)
+  } catch (error) {
+    console.error("处理视频帧时出错:", error)
+  }
+}
+
+async function enableSegmentation(deviceId: string) {
+  try {
+    if (!imageSegmenter) {
+      await createImageSegmenter()
+    }
+      // 开始图像分割
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: false, video: { width: 480, height: 360, deviceId: deviceId } })
+      video.srcObject = stream
+      video.onloadeddata = async () => {
+        video.play()
+        webcamRunning = true
+        await predictWebcam()
+        streamForVirtualBackground = canvas.captureStream()
+      }
+  } catch (error) {
+    console.error("启动摄像头失败:", error);
+  }
+}
+
+async function disableSegmentation() {
+  if (streamForVirtualBackground === null) return
+  webcamRunning = false
+  const stream = video.srcObject as MediaStream
+  const tracks = stream.getTracks()
+  tracks.forEach(track => track.stop())
+  video.srcObject = null
+  canvasCtx.clearRect(0, 0, videoWidth, videoHeight)
+  streamForVirtualBackground = null
+}
+
+async function asyncGetStreamForVirtualBackground(deviceId: string): Promise<MediaStream> {
+  await enableSegmentation(deviceId)
+  while (streamForVirtualBackground === null) {
+    await new Promise(resolve => setTimeout(resolve, 100)) // 每100ms检查一次
+  }
+  return streamForVirtualBackground
+}
+
+export {
+  asyncGetStreamForVirtualBackground,
+  disableSegmentation,
+}
diff --git a/webapp/components/use/whip.ts b/webapp/components/use/whip.ts
@@ -5,9 +5,11 @@ import { WHIPClient } from 'whip-whep/whip'
 import {
   deviceNone,
   deviceScreen,
+  deviceSegmenter,
   asyncGetAudioStream,
   asyncGetVideoStream,
 } from '../../lib/device'
+import { disableSegmentation, asyncGetStreamForVirtualBackground } from './imageSegmentation'
 
 interface WHIPData extends Data {
   setUserName: (name: string) => void,
@@ -19,6 +21,8 @@ interface WHIPData extends Data {
   setCurrentDeviceVideo: (current: string) => Promise<void>,
   toggleEnableAudio: () => Promise<void>,
   toggleEnableVideo: () => Promise<void>,
+
+  toggleEnableVirtualBackground: () => Promise<void>
 }
 
 class WHIPContext extends Context {
@@ -29,6 +33,8 @@ class WHIPContext extends Context {
   currentDeviceVideo = deviceNone.deviceId
   toggleEnableAudio = async () => this.setCurrentDeviceAudio(this.userStatus.audio ? deviceNone.deviceId : this.currentDeviceAudio)
   toggleEnableVideo = async () => this.setCurrentDeviceVideo(this.userStatus.video ? deviceNone.deviceId : this.currentDeviceVideo)
+  toggleEnableVirtualBackground = async () => this.setCurrentDeviceVideo(this.virtualBackgroundEnabled ? this.currentDeviceVideo : deviceSegmenter.deviceId)
+  virtualBackgroundEnabled = false
 
   constructor(id: string) {
     super(id)
@@ -70,6 +76,8 @@ class WHIPContext extends Context {
       setCurrentDeviceVideo: (current: string) => this.setCurrentDeviceVideo(current),
       toggleEnableAudio: () => this.toggleEnableAudio(),
       toggleEnableVideo: () => this.toggleEnableVideo(),
+
+      toggleEnableVirtualBackground: () => this.toggleEnableVirtualBackground(),
     }
   }
 
@@ -148,22 +156,29 @@ class WHIPContext extends Context {
   async setCurrentDeviceVideo(current: string) {
     const { stream, setStream, userStatus, currentDeviceVideo } = this
 
-    if (current !== currentDeviceVideo || !userStatus.video) {
+    if (current !== currentDeviceVideo || !userStatus.video || this.virtualBackgroundEnabled) {
       // Closed old tracks
       stream.getVideoTracks().map(track => {
         track.stop()
         stream.removeTrack(track)
       })
-
-      const mediaStream = await asyncGetVideoStream(current)
+      let mediaStream: MediaStream
+      if (current === deviceSegmenter.deviceId) {
+        this.virtualBackgroundEnabled = true
+        mediaStream = await asyncGetStreamForVirtualBackground(current)
+      } else {
+        this.virtualBackgroundEnabled = false
+        await disableSegmentation()
+        mediaStream = await asyncGetVideoStream(current)
+      }
       const audioTracks = stream.getAudioTracks()
       const videoTracks = mediaStream.getVideoTracks()
 
       setStream(new MediaStream([...audioTracks, ...videoTracks]))
       userStatus.video = current === deviceNone.deviceId ? false : true
       // NOTE: screen share
       userStatus.screen = current !== deviceScreen.deviceId ? false : true
-      this.currentDeviceVideo = current === deviceNone.deviceId ? this.currentDeviceVideo : current
+      this.currentDeviceVideo = (current === deviceNone.deviceId || current === deviceSegmenter.deviceId) ? this.currentDeviceVideo : current
 
       this.sync()
       this.syncUserStatus(userStatus)
diff --git a/webapp/lib/device.ts b/webapp/lib/device.ts
@@ -13,6 +13,11 @@ const deviceScreen = {
   label: 'screen',
 }
 
+const deviceSegmenter = {
+  deviceId: 'segmenter',
+  lable: 'segmenter'
+}
+
 async function asyncGetAudioStream(deviceId: string): Promise<MediaStream> {
   let stream: MediaStream = new MediaStream()
   if (deviceId !== 'none') {
@@ -38,6 +43,7 @@ export {
   asyncGetVideoStream,
   deviceNone,
   deviceScreen,
+  deviceSegmenter,
 }
 
 export type {
diff --git a/webapp/public/background.jpg b/webapp/public/background.jpg