feat: webchat 支持语音输入

2025-01-11 18:54:40 +08:00
parent be662b913c
commit a09998f910
3 changed files with 148 additions and 28 deletions
@@ -5,7 +5,7 @@ import os
 from typing import Awaitable, Any
 from astrbot.api.platform import Platform, AstrBotMessage, MessageMember, MessageType, PlatformMetadata
 from astrbot.api.event import MessageChain
-from astrbot.api.message_components import Plain, Image  # noqa: F403
+from astrbot.api.message_components import Plain, Image, Record  # noqa: F403
 from astrbot.api import logger
 from astrbot.core import web_chat_queue, web_chat_back_queue
 from .webchat_event import WebChatMessageEvent
@@ -70,6 +70,14 @@ class WebChatAdapter(Platform):
                    abm.message.append(Image.fromFileSystem(os.path.join(self.imgs_dir, img)))
            else:
                abm.message.append(Image.fromFileSystem(os.path.join(self.imgs_dir, payload['image_url'])))
+        if payload['audio_url']:
+            if isinstance(payload['audio_url'], list):
+                for audio in payload['audio_url']:
+                    path = os.path.join(self.imgs_dir, audio)
+                    abm.message.append(Record(file=path, path=path))
+            else:
+                path = os.path.join(self.imgs_dir, payload['audio_url'])
+                abm.message.append(Record(file=path, path=path))
            
        logger.debug(f"WebChatAdapter: {abm.message}")
        
@@ -17,11 +17,14 @@ class ChatRoute(Route):
            '/chat/get_conversation': ('GET', self.get_conversation),
            '/chat/delete_conversation': ('GET', self.delete_conversation),
            '/chat/get_file': ('GET', self.get_file),
-            '/chat/post_image': ('POST', self.post_image)
+            '/chat/post_image': ('POST', self.post_image),
+            '/chat/post_file': ('POST', self.post_file)
        }
        self.db = db
        self.register_routes()
        self.imgs_dir = "data/webchat/imgs"
+        
+        self.supported_imgs = ['jpg', 'jpeg', 'png', 'gif', 'webp']
    
    async def get_file(self):
        filename = request.args.get('filename')
@@ -30,7 +33,13 @@ class ChatRoute(Route):
        
        try:
            with open(os.path.join(self.imgs_dir, filename), "rb") as f:
-                return QuartResponse(f.read(), mimetype="image/jpeg")
+                if filename.endswith(".wav"):
+                    return QuartResponse(f.read(), mimetype="audio/wav")
+                elif filename.split('.')[-1] in self.supported_imgs:
+                    return QuartResponse(f.read(), mimetype="image/jpeg")
+                else:
+                    return QuartResponse(f.read())
+                
        except FileNotFoundError:
            return Response().error("File not found").__dict__
        
@@ -47,6 +56,25 @@ class ChatRoute(Route):
        return Response().ok(data={
            'filename': filename
        }).__dict__
+        
+    async def post_file(self):
+        post_data = await request.files
+        if 'file' not in post_data:
+            return Response().error("Missing key: file").__dict__
+        
+        file = post_data['file']
+        filename = f"{str(uuid.uuid4())}"
+        print(file)
+        # 通过文件格式判断文件类型
+        if file.content_type.startswith('audio'):
+            filename += ".wav"
+        
+        path = os.path.join(self.imgs_dir, filename)
+        await file.save(path)
+        
+        return Response().ok(data={
+            'filename': filename
+        }).__dict__

    async def chat(self):
        username = g.get('username', 'guest')
@@ -61,14 +89,16 @@ class ChatRoute(Route):
        message = post_data['message']
        conversation_id = post_data['conversation_id']
        image_url = post_data.get('image_url')
-        if not message and not image_url:
-            return Response().error("Message and image_url are empty").__dict__
+        audio_url = post_data.get('audio_url')
+        if not message and not image_url and not audio_url:
+            return Response().error("Message and image_url and audio_url are empty").__dict__
        if not conversation_id:
            return Response().error("conversation_id is empty").__dict__
        
        await web_chat_queue.put((username, conversation_id, {
            'message': message,
-            'image_url': image_url # list
+            'image_url': image_url, # list
+            'audio_url': audio_url
        }))
        
        async def stream():
@@ -98,6 +128,8 @@ class ChatRoute(Route):
            }
            if image_url:
                new_his['image_url'] = image_url
+            if audio_url:
+                new_his['audio_url'] = audio_url
            history.append(new_his)
            for r in ret:
                history.append({
@@ -58,13 +58,21 @@ marked.setOptions({
                                    <div
                                        style="padding: 12px; border-radius: 8px; background-color: rgba(94, 53, 177, 0.15)">
                                        <span>{{ msg.message }}</span>
-                                        <div style="display: flex; gap: 8px; margin-top: 8px;" v-if="msg.image_url && msg.image_url.length > 0">
+                                        <div style="display: flex; gap: 8px; margin-top: 8px;"
+                                            v-if="msg.image_url && msg.image_url.length > 0">
                                            <div v-for="(img, index) in msg.image_url" :key="index"
                                                style="position: relative; display: inline-block;">
                                                <img :src="img"
                                                    style="width: 100px; height: 100px; border-radius: 8px; box-shadow: 0 0 5px rgba(0, 0, 0, 0.1);" />
                                            </div>
                                        </div>
+                                        <!-- audio -->
+                                        <div>
+                                            <audio controls v-if="msg.audio_url && msg.audio_url.length > 0">
+                                                <source :src="msg.audio_url" type="audio/wav">
+                                                Your browser does not support the audio element.
+                                            </audio>
+                                        </div>
                                    </div>
                                </div>
                                <div v-else style="display: flex; justify-content: flex-start; gap: 16px;">
@@ -79,26 +87,28 @@ marked.setOptions({

                        <div
                            style="width: 100%; justify-content: center; align-items: center; display: flex; flex-direction: column; margin-top: 8px;">
-                            
-                            <v-text-field id="input-field" variant="outlined" v-model="prompt" label="聊天吧!"
+
+                            <v-text-field id="input-field" variant="outlined" v-model="prompt" :label="inputFieldLabel"
                                placeholder="Start typing..." loading clear-icon="mdi-close-circle" clearable
                                @click:clear="clearMessage" @keyup.enter="sendMessage"
-                                style="width: 100%; max-width: 930px;">
+                                style="width: 100%; max-width: 850px;">
                                <template v-slot:loader>
-                                    <v-progress-linear
-                                    :active="loadingChat"
-                                    :color="color"
-                                    height="6"
-                                    indeterminate
-                                    ></v-progress-linear>
+                                    <v-progress-linear :active="loadingChat" :color="color" height="6"
+                                        indeterminate></v-progress-linear>
                                </template>

                                <template v-slot:append>
                                    <v-icon @click="sendMessage" size="35" icon="mdi-arrow-up-circle" />
+                                    <v-tooltip text="语音输入">
+                                        <template v-slot:activator="{ props }">
+                                            <v-icon :color="isRecording ? 'error' : ''" v-bind="props" @click="isRecording ? stopRecording() : startRecording()" size="35" icon="mdi-record-circle" />
+                                        </template>
+                                    </v-tooltip>
+                                    
                                </template>
                            </v-text-field>

-                            <div>
+                            <div style="display: flex; gap: 8px; margin-top: -8px;">
                                <div v-for="(img, index) in stagedImagesUrl" :key="index"
                                    style="position: relative; display: inline-block;">
                                    <img :src="img"
@@ -106,6 +116,14 @@ marked.setOptions({
                                    <v-icon @click="removeImage(index)" size="20" color="red"
                                        style="position: absolute; top: 0; right: 0; cursor: pointer;">mdi-close-circle</v-icon>
                                </div>
+                                <div style="display: inline-block; width: 50px; height: 50px;">
+                                    <div v-if="stagedAudioUrl" style="position: relative; padding: 6px; border-radius: 8px; background-color: rgba(94, 53, 177, 0.15); display: inline-block;">
+                                        新录音
+                                        <v-icon @click="removeAudio" size="20" color="red"
+                                            style="position: absolute; top: 0; right: 0; cursor: pointer;">mdi-close-circle</v-icon>
+                                    </div>
+                                    
+                                </div>
                            </div>
                        </div>
                    </div>
@@ -128,7 +146,14 @@ export default {
            conversations: [],
            currCid: '',
            stagedImagesUrl: [],
-            loadingChat: false
+            loadingChat: false,
+
+            inputFieldLabel: '聊天吧!',
+
+            isRecording: false,
+            audioChunks: [],
+            stagedAudioUrl: "",
+            mediaRecorder: null
        }
    },

@@ -136,10 +161,54 @@ export default {
        this.getConversations();
        let inputField = document.getElementById('input-field');
        inputField.addEventListener('paste', this.handlePaste);
-
    },

    methods: {
+
+        removeAudio() {
+            this.stagedAudioUrl = null;
+        },
+
+        async startRecording() {
+            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            this.mediaRecorder = new MediaRecorder(stream);
+            this.mediaRecorder.ondataavailable = (event) => {
+                this.audioChunks.push(event.data);
+            };
+            this.mediaRecorder.start();
+            this.isRecording = true;
+            this.inputFieldLabel = "录音中，请说话...";
+        },
+
+        async stopRecording() {
+            this.isRecording = false;
+            this.inputFieldLabel = "聊天吧!";
+            this.mediaRecorder.stop();
+            this.mediaRecorder.onstop = async () => {
+                const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
+                this.audioChunks = [];
+
+                const formData = new FormData();
+                formData.append('file', audioBlob);
+
+                try {
+                    const response = await axios.post('/api/chat/post_file', formData, {
+                        headers: {
+                            'Content-Type': 'multipart/form-data',
+                            'Authorization': 'Bearer ' + localStorage.getItem('token')
+                        }
+                    });
+
+                    const audio = response.data.data.filename;
+                    console.log('Audio uploaded:', audio);
+
+                    this.stagedAudioUrl = `/api/chat/get_file?filename=${audio}`;
+                } catch (err) {
+                    console.error('Error uploading audio:', err);
+                }
+            };
+        },
+
        async handlePaste(event) {
            console.log('Pasting image...');
            const items = event.clipboardData.items;
@@ -198,6 +267,9 @@ export default {
                            message[i].image_url[j] = `/api/chat/get_file?filename=${message[i].image_url[j]}`;
                        }
                    }
+                    if (message[i].audio_url) {
+                        message[i].audio_url = `/api/chat/get_file?filename=${message[i].audio_url}`;
+                    }
                }
                this.messages = message;
            }).catch(err => {
@@ -250,24 +322,26 @@ export default {
            this.messages.push({
                type: 'user',
                message: this.prompt,
-                image_url: this.stagedImagesUrl
+                image_url: this.stagedImagesUrl,
+                audio_url: this.stagedAudioUrl
            });

-            // let bot_resp = {
-            //     type: 'bot',
-            //     message: ref('')
-            // }
-
-            // this.messages.push(bot_resp);
-
            this.scrollToBottom();

+            // images
            let image_filenames = [];
            for (let i = 0; i < this.stagedImagesUrl.length; i++) {
                let img = this.stagedImagesUrl[i].replace('/api/chat/get_file?filename=', '');
                image_filenames.push(img);
            }

+            // audio
+            let audio_filenames = [];
+            if (this.stagedAudioUrl) {
+                let audio = this.stagedAudioUrl.replace('/api/chat/get_file?filename=', '');
+                audio_filenames.push(audio);
+            }
+
            this.loadingChat = true;


@@ -277,11 +351,17 @@ export default {
                    'Content-Type': 'application/json',
                    'Authorization': 'Bearer ' + localStorage.getItem('token')
                },
-                body: JSON.stringify({ message: this.prompt, conversation_id: this.currCid, image_url: image_filenames })  // 发送请求体
+                body: JSON.stringify({ 
+                    message: this.prompt, 
+                    conversation_id: this.currCid, 
+                    image_url: image_filenames,
+                    audio_url: audio_filenames
+                })  // 发送请求体
            })
                .then(response => {
                    this.prompt = '';
                    this.stagedImagesUrl = [];
+                    this.stagedAudioUrl = "";

                    this.loadingChat = false;