NodeJS 来自记录音频blob的耳语API

rslzwgfq  于 2023-06-22  发布在  Node.js
关注(0)|答案(1)|浏览(120)

我正在使用nodejs和react中的openAI whisper API创建一个转录器。我希望用户能够记录在浏览器中的音频文件和转录他们的录音。我通过将他们录制的音频blob的缓冲区数据保存到mp3文件中来做到这一点,然后使用createTranscription()API调用我输入fs.createReadStream(recorded_audio_file.mp3),它输出400错误。当我使用windows录音机录制音频文件并输入该文件时,API调用就可以正常工作了。这是我在react中的记录器组件

import React, { useState, useEffect, useRef } from "react";

import Microphone from "./Microphone/Microphone";
const TSST = () => {
  const BASE_URL = process.env.REACT_APP_SERVER_URL || "http://localhost:5000";

  const mediaRecorder = useRef(null);
  const [stream, setStream] = useState(null);
  const [audioChunks, setAudioChunks] = useState([]);
  const [audio, setAudio] = useState(null);
  const [audioFile, setAudioFile] = useState(null);
  const [transcribtion, setTranscription] = useState("");
  const [audioBlob, setAudioBlob] = useState("");
  const [audioBuffer, setAudioBuffer] = useState("");

  useEffect(() => {
    const initializeMediaRecorder = async () => {
      if ("MediaRecorder" in window) {
        try {
            const streamData = await navigator.mediaDevices.getUserMedia({ audio: true });
            setStream(streamData);
        } catch (err) {
            console.log(err.message);
        }
      } else {
          console.log("The MediaRecorder API is not supported in your browser.");
      }
    }

    initializeMediaRecorder();
  }, [])

  const handleStartRecording = () => {
    const media = new MediaRecorder(stream, { type: "audio/mp3" });

    mediaRecorder.current = media;
    mediaRecorder.current.start();

    let chunks = [];
    mediaRecorder.current.ondataavailable = (e) => {
       chunks.push(e.data);
    };
    setAudioChunks(chunks);
  }
  const handleStopRecording = () => {
    mediaRecorder.current.stop();
    mediaRecorder.current.onstop = () => {
      const audioBlob = new Blob(audioChunks, { type: "audio/mp3" });
      const audioUrl = URL.createObjectURL(audioBlob);

      setAudioBlob(audioBlob)
      setAudio(audioUrl);
      setAudioChunks([]);

      let file = new File([audioUrl], "recorded_audio.mp3",{type:"audio/mp3", lastModified:new Date().getTime()});
      let container = new DataTransfer();
      container.items.add(file);
      document.getElementById("audioFile").files = container.files;
      setAudioFile(container.files[0]);

      console.log(file);
    };
  }

  const handleSubmitRecording = async () => {
    try {
      // Assuming you have an audio blob called 'audioBlob'

      // Convert the audio blob to a base64 string
      const reader = new FileReader();
      reader.onloadend = async () => {
        const base64String = reader.result.split(',')[1]; // Extract base64 data from the result
        const res = await fetch(`${BASE_URL}/api/openai/transcriber`, {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({ audioBuffer: base64String, lang: "en" })
        })
        const data = await res.json();
        setTranscription(data);
      };
      reader.readAsDataURL(audioBlob);

    } catch (error) {
      console.log(error);

    } finally {
    }
  }

    return (
      <div className="h-[calc(100vh-73px)] flex justify-center items-center">
        <div className="w-[40%] flex justify-between items-center">
          <div className="flex flex-col">
            <Microphone startFunction={ handleStartRecording } stopFunction={ handleStopRecording } />
            <button onClick={handleStartRecording} className="w-fit my-10 p-5 bg-gray-200 rounded-lg">Start Recording</button>
            <button onClick={handleStopRecording} className="w-fit mb-10 p-5 bg-gray-200 rounded-lg">Stop Recording</button>

            <audio className="mb-10" src={audio && audio} controls></audio>
            <input id="audioFile" type="file" onChange={ (e) => {setAudioFile(e.target.files[0])}}/>
          </div>
          
          <div>
            <button className="p-10 bg-yellow-500 rounded-xl" onClick={ handleSubmitRecording } >Submit</button>
          </div>
        </div>

        <div className="w-[40%] flex justify-center items-center">
          <textarea value={transcribtion} readOnly className="w-[60%] aspect-square resize-none shadow-lg shadow-black"></textarea>
        </div>
      </div>
    );
};
export default TSST;

以下是API:

export const transcribe = async (req, res) => {
    // const lang = JSON.parse(req.body.json).lang;
    // const audioBuffer = req.file;
    const { audioBuffer, lang} = req.body;

    const audioBufferBase64 = Buffer.from(audioBuffer, 'base64');

    const fileName = "test.mp3";
    const folderName = `./audio/${fileName}`

    const writableStream = fs.createWriteStream(folderName); // Replace with your desired file path and extension
    writableStream.write(audioBufferBase64);

    const readStream = fs.createReadStream(folderName);

    readStream.on('data', (data) => {
        console.log('Read stream data:', data);
    });

    try {
        const whisperRes = await openai.createTranscription(
            readStream,
            "whisper-1",
        )

        const chatResponse = whisperRes.data.text;
        console.log(chatResponse)

        res.status(200).json({ chatResponse: chatResponse });
    } catch (error) {
        //console.log(error);
        res.status(500).json({ message: error });
    }
}

下面是服务器调用:

import express from "express";
import cors from "cors";
import * as dotenv from "dotenv";
import mongoose from "mongoose";
import multer from "multer";

import { dalle, chatGPT, summarize, translate, transcribe } from "./api/openai.js";
import { getImages, postImage } from "./api/imageShowcase.js";
import { login, signup } from "./api/user.js";

dotenv.config();

const app = express();
const upload = multer();
const storage = multer.memoryStorage();
const uploadMiddleware = multer({ storage: storage });

app.use(cors());
app.use(express.json({limit: '50mb'}));

const atlasURL = process.env.MONGODB_URL;    
const PORT = process.env.PORT || 5000;

mongoose.connect(atlasURL)
    .then(() => app.listen(PORT, () => console.log(`Successfully connected to port ${PORT}`)))
    .catch(error => console.log("There was an error: ", error));

app.get("/", async (req, res) => {
    res.send("Server is RUNNING");
})

app.post("/api/openai/transcriber",(req, res) => transcribe(req, res));

保存的MP3文件工作正常。apikey是正确的当我用windows recorder录制我自己的mp3,并使用createReadStream的时候,它工作得很好。保存的文件数据是以下形式的缓冲区
我试着改变保存文件的方式,对缓冲区使用不同的格式化方法,二进制十六进制,base64。尝试直接上传缓冲区到耳语API。尝试使用axios直接发布到API url。我试着在保存mp3文件的过程中做出一个承诺,然后创建ReadStream和许多其他的小变化。试图直接从缓冲区中读取。我认为所有类似的问题与答案没有用。

ghg1uchk

ghg1uchk1#

只需在trycatchtranscribe函数中调用transcribeAudio函数即可。
此外,请确保您能够create.mp3文件locally并尝试播放它。有时,audio filenot correct,这会在执行代码时导致problems

try {
        const whisperRes = await transcribeAudio(readStream);

        const chatResponse = whisperRes.data.text;
        console.log(chatResponse)

        res.status(200).json({ chatResponse: chatResponse });
    } catch (error) {
        //console.log(error);
        res.status(500).json({ message: error });
    }
import FormData from "form-data";
import axios from 'axios'

const transcribeAudio = async (file) => {
  let data = new FormData();

  data.append("file", fs.createReadStream(file));
  data.append("model", "whisper-1");
  data.append("language", "en");

  let config = {
    method: "post",
    maxBodyLength: Infinity,
    url: "https://api.openai.com/v1/audio/transcriptions",
    headers: {
      Authorization:
        `Bearer ${process.env.OPENAI_API_KEY}`,
      "Content-Type": "multipart/form-data",
      ...data.getHeaders(),
    },
    data: data,
  };

  try {
    const response = await axios.request(config);
    const data = response.data;

    return { data };
  } catch (error) {
    return {};
  }
};

相关问题