我正在使用nodejs和react中的openAI whisper API创建一个转录器。我希望用户能够记录在浏览器中的音频文件和转录他们的录音。我通过将他们录制的音频blob的缓冲区数据保存到mp3文件中来做到这一点,然后使用createTranscription()API调用我输入fs.createReadStream(recorded_audio_file.mp3),它输出400错误。当我使用windows录音机录制音频文件并输入该文件时,API调用就可以正常工作了。这是我在react中的记录器组件
import React, { useState, useEffect, useRef } from "react";
import Microphone from "./Microphone/Microphone";
const TSST = () => {
const BASE_URL = process.env.REACT_APP_SERVER_URL || "http://localhost:5000";
const mediaRecorder = useRef(null);
const [stream, setStream] = useState(null);
const [audioChunks, setAudioChunks] = useState([]);
const [audio, setAudio] = useState(null);
const [audioFile, setAudioFile] = useState(null);
const [transcribtion, setTranscription] = useState("");
const [audioBlob, setAudioBlob] = useState("");
const [audioBuffer, setAudioBuffer] = useState("");
useEffect(() => {
const initializeMediaRecorder = async () => {
if ("MediaRecorder" in window) {
try {
const streamData = await navigator.mediaDevices.getUserMedia({ audio: true });
setStream(streamData);
} catch (err) {
console.log(err.message);
}
} else {
console.log("The MediaRecorder API is not supported in your browser.");
}
}
initializeMediaRecorder();
}, [])
const handleStartRecording = () => {
const media = new MediaRecorder(stream, { type: "audio/mp3" });
mediaRecorder.current = media;
mediaRecorder.current.start();
let chunks = [];
mediaRecorder.current.ondataavailable = (e) => {
chunks.push(e.data);
};
setAudioChunks(chunks);
}
const handleStopRecording = () => {
mediaRecorder.current.stop();
mediaRecorder.current.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: "audio/mp3" });
const audioUrl = URL.createObjectURL(audioBlob);
setAudioBlob(audioBlob)
setAudio(audioUrl);
setAudioChunks([]);
let file = new File([audioUrl], "recorded_audio.mp3",{type:"audio/mp3", lastModified:new Date().getTime()});
let container = new DataTransfer();
container.items.add(file);
document.getElementById("audioFile").files = container.files;
setAudioFile(container.files[0]);
console.log(file);
};
}
const handleSubmitRecording = async () => {
try {
// Assuming you have an audio blob called 'audioBlob'
// Convert the audio blob to a base64 string
const reader = new FileReader();
reader.onloadend = async () => {
const base64String = reader.result.split(',')[1]; // Extract base64 data from the result
const res = await fetch(`${BASE_URL}/api/openai/transcriber`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ audioBuffer: base64String, lang: "en" })
})
const data = await res.json();
setTranscription(data);
};
reader.readAsDataURL(audioBlob);
} catch (error) {
console.log(error);
} finally {
}
}
return (
<div className="h-[calc(100vh-73px)] flex justify-center items-center">
<div className="w-[40%] flex justify-between items-center">
<div className="flex flex-col">
<Microphone startFunction={ handleStartRecording } stopFunction={ handleStopRecording } />
<button onClick={handleStartRecording} className="w-fit my-10 p-5 bg-gray-200 rounded-lg">Start Recording</button>
<button onClick={handleStopRecording} className="w-fit mb-10 p-5 bg-gray-200 rounded-lg">Stop Recording</button>
<audio className="mb-10" src={audio && audio} controls></audio>
<input id="audioFile" type="file" onChange={ (e) => {setAudioFile(e.target.files[0])}}/>
</div>
<div>
<button className="p-10 bg-yellow-500 rounded-xl" onClick={ handleSubmitRecording } >Submit</button>
</div>
</div>
<div className="w-[40%] flex justify-center items-center">
<textarea value={transcribtion} readOnly className="w-[60%] aspect-square resize-none shadow-lg shadow-black"></textarea>
</div>
</div>
);
};
export default TSST;
以下是API:
export const transcribe = async (req, res) => {
// const lang = JSON.parse(req.body.json).lang;
// const audioBuffer = req.file;
const { audioBuffer, lang} = req.body;
const audioBufferBase64 = Buffer.from(audioBuffer, 'base64');
const fileName = "test.mp3";
const folderName = `./audio/${fileName}`
const writableStream = fs.createWriteStream(folderName); // Replace with your desired file path and extension
writableStream.write(audioBufferBase64);
const readStream = fs.createReadStream(folderName);
readStream.on('data', (data) => {
console.log('Read stream data:', data);
});
try {
const whisperRes = await openai.createTranscription(
readStream,
"whisper-1",
)
const chatResponse = whisperRes.data.text;
console.log(chatResponse)
res.status(200).json({ chatResponse: chatResponse });
} catch (error) {
//console.log(error);
res.status(500).json({ message: error });
}
}
下面是服务器调用:
import express from "express";
import cors from "cors";
import * as dotenv from "dotenv";
import mongoose from "mongoose";
import multer from "multer";
import { dalle, chatGPT, summarize, translate, transcribe } from "./api/openai.js";
import { getImages, postImage } from "./api/imageShowcase.js";
import { login, signup } from "./api/user.js";
dotenv.config();
const app = express();
const upload = multer();
const storage = multer.memoryStorage();
const uploadMiddleware = multer({ storage: storage });
app.use(cors());
app.use(express.json({limit: '50mb'}));
const atlasURL = process.env.MONGODB_URL;
const PORT = process.env.PORT || 5000;
mongoose.connect(atlasURL)
.then(() => app.listen(PORT, () => console.log(`Successfully connected to port ${PORT}`)))
.catch(error => console.log("There was an error: ", error));
app.get("/", async (req, res) => {
res.send("Server is RUNNING");
})
app.post("/api/openai/transcriber",(req, res) => transcribe(req, res));
保存的MP3文件工作正常。apikey是正确的当我用windows recorder录制我自己的mp3,并使用createReadStream的时候,它工作得很好。保存的文件数据是以下形式的缓冲区
我试着改变保存文件的方式,对缓冲区使用不同的格式化方法,二进制十六进制,base64。尝试直接上传缓冲区到耳语API。尝试使用axios直接发布到API url。我试着在保存mp3文件的过程中做出一个承诺,然后创建ReadStream和许多其他的小变化。试图直接从缓冲区中读取。我认为所有类似的问题与答案没有用。
1条答案
按热度按时间ghg1uchk1#
只需在
try
,catch
或transcribe
函数中调用transcribeAudio
函数即可。此外,请确保您能够
create
.mp3
文件locally
并尝试播放它。有时,audio file
是not correct
,这会在执行代码时导致problems
。