NodeJS 将URL中的视频保存为Firebase存储中的MP3

brtdzjyr  于 11个月前  发布在  Node.js
关注(0)|答案(3)|浏览(120)

我正在解决一个过去几天一直困扰我的问题。我使用Node.js with Express(v4.18.2)最终创建一个Firebase部署,可以接收视频URL并将音频mp3输出到Firebase Firestore。我已经取得了一些进展,但在某些方面仍然不成功。
我不能使用fs在本地保存文件,但在这个例子中,我已经证明了它可以使用FS。我成功地保存了一个本地.mp3文件。
首先,我有几个功能:

async function downloadVideo(videoUrl) {
    try {
      const response = await axios.get(videoUrl, {
        responseType: 'stream',
      });
  
      if (response.status === 200) {
        return response.data;
      } else {
        throw new Error('Failed to fetch the video');
      }
    } catch (error) {
      throw new Error('Error fetching the video: ' + error.message);
    }
  }
async function extractAudioFromVideo(videoUrl) {
    try {
      const videoStream = await downloadVideo(videoUrl);
  
      // Create a PassThrough stream to pipe the video data
      const passThrough = new PassThrough();
      videoStream.pipe(passThrough);

      const outputFile = 'output.mp3';
      const outputStream = fs.createWriteStream(outputFile);
  
        return new Promise((resolve, reject) => {
            const audioBuffers = [];

            passThrough.on('data', chunk => {
                audioBuffers.push(chunk)
                outputStream.write(chunk); // Write chunks to a local file
              });
        
              passThrough.on('error', err => {
                reject(err);
              });
        

            ffmpeg()
            .input(passThrough)
            .output('/dev/null') // Null output as a placeholder
            .outputOptions('-vn') // Extract audio only
            .noVideo()
            .audioQuality(0)
            .audioCodec('libmp3lame') // Set audio codec
            .format('mp3')
            .on('end', () => {
                const audioBuffer = Buffer.concat(audioBuffers)
                if (audioBuffer.length > 0) {
                    resolve(audioBuffer);
                  } else {
                    reject(new Error('Empty audio buffer'));
                  }
              })
            .on('error', err => reject(err))
            .run();
        })
    } catch (error) {
      throw new Error('Error extracting audio: ' + error.message);
    }
  }
async function saveAudioToFirebase(audioBuffer, fileName) {
    try {
        let storage = admin.storage()
    let storageRef = storage.bucket(serviceAccount.storage_bucket_content)
      const file = storageRef.file(fileName) // Specify the desired file name here

      const renamedFileName = fileName.replace(/\.[^/.]+$/, '.mp3'); // Change the file extension to .mp3
  
      await file.save(audioBuffer, {
        metadata: {
          contentType: 'audio/mpeg', // Adjust the content type as needed
        },
      });

      await file.setMetadata({
        contentType: 'audio/mpeg'
      })

      await file.move(renamedFileName); // Rename the file with the .mp3 extension
  
      console.log('Audio saved to Firebase Storage.');
    } catch (error) {
      console.error('Error saving audio to Firebase Storage:', error);
    }
  }

什么工作:

  • 通过Axios下载视频
  • 保存到Firebase存储(没有初始化或指向Firebase的指针问题)
  • 输出名为“output.mp3”的本地.mp3文件
  • 我能够记录extractAudioFromVideo的结果,并在我的终端中记录缓冲区

什么不起作用:

  • 将一个文件保存到Firebase Storage中,该文件是.mp3。它在URL中显示为“.mp3”,内容类型为“audio/mpeg”,但实际上它是.mp4。仍然有视频并在浏览器窗口中播放视频。

我愿意使用其他库,如tmp,如果建议和解决方案的工作。

laawzig2

laawzig21#

const ffmpeg = require('fluent-ffmpeg');
const { PassThrough } = require('stream');

async function extractAudioFromVideo(videoUrl) {
  try {
    const videoStream = await downloadVideo(videoUrl);

    return new Promise((resolve, reject) => {
      const passThrough = new PassThrough();
      videoStream.pipe(passThrough);

      const audioBuffers = [];

      ffmpeg(passThrough)
        .outputOptions('-vn') // Extract audio only
        .audioCodec('libmp3lame') // Set audio codec to MP3
        .format('mp3')
        .on('data', chunk => {
          audioBuffers.push(chunk);
        })
        .on('end', () => {
          const audioBuffer = Buffer.concat(audioBuffers);
          if (audioBuffer.length > 0) {
            resolve(audioBuffer);
          } else {
            reject(new Error('Empty audio buffer'));
          }
        })
        .on('error', err => {
          reject(err);
        })
        .run();
    });
  } catch (error) {
    throw new Error('Error extracting audio: ' + error.message);
  }
}

字符串

4bbkushb

4bbkushb2#

前言

不要指望StackOverflow经常给出这样的答案,我只是喜欢解决这个问题,然后就忘乎所以了。

  • 注:*下面的代码已编码徒手,预计错别字。欢迎更正。

问题
看看你目前的方法,视频文件首先下载到内存中(在音频转换之前,作为audioBuffer),然后写出来作为output.mp3的文件(在音频转换之前)。这是由这些行引起的(为了清晰起见,重新排列):

const audioBuffers = [];

const passThrough = new PassThrough()
  .on('data', chunk => {
    audioBuffers.push(chunk)
    outputStream.write(chunk); // Write chunks to a local file
  })
  .on('error', err => {
    reject(err);
  });

videoStream.pipe(passThrough);

字符串
请注意,上面的行没有提到MP3文件转换。这就是为什么您上传的文件和本地文件都是具有.mp3文件扩展名的视频。在这些行下面,您将passThrough流的输出馈送到ffmpeg并丢弃结果(通过将其发送到/dev/null)。

ffmpeg()
  .input(passThrough)
  .output('/dev/null')
  /* ... other config */


完全不需要与文件系统交互,应该可以提取原始视频流,通过删除视频内容并根据需要转换音轨来转换流内容,然后将生成的音频流直接加载到Google Cloud Storage中。这被称为ETL管道(用于提取、转换、加载),并有助于最大限度地减少托管此云功能所需的资源。

潜在解决方案

在第一个代码块中,我已经将extractAudioFromVideosaveAudioToFirebase辅助方法合并到一个streamAudioTrackToCloudStorage辅助方法中。将这些组件放在一起有助于防止传递没有适当侦听器的流。它还有助于将当前上下文绑定到转换和加载步骤中抛出的错误。这一点特别重要,因为正在上传的文件可能会被如果FFMPEG不能正确处理传入的流,则为不完整或空。由错误处理代码来处理不完整的文件。
streamAudioTrackToCloudStorage方法接受一个downloadable布尔参数,该参数可以在上传时生成Firebase Storage下载URL。这对于将文件的记录插入Cloud Firestore或实时数据库(如果是公共消费)非常有用。

import { randomUUID } from "crypto"; // Requires node v14.17+
import * as ffmpeg from "fluent-ffmpeg";

/**
 * Generates a token and download URL that could target the given GCS file
 * reference. However, it is up to the caller to upload this information to
 * file's metadata.
 *
 * @param {import('@google-cloud/storage').File} storageFile - The GCS File
 * object to generate a download token for.
 * @returns {[token: string, url: string]} - A tuple containing the generated
 * download token and an assembled Firebase Storage URL containing that token.
 */
const generateDownloadURLParts = (storageFile) => {
  // this is random enough as it doesn't need to actually be unique
  const token = randomUUID(); 
  const url = "https://firebasestorage.googleapis.com/v0/b/" + storageFile.bucket.name +
    "/o/" + encodeURIComponent(storageFile.name) +
    "?alt=media&token=" + token;
  return [ token, url ];
}

/**
 * Uploads the audio track of the provided stream to the given Google Cloud Storage
 * file reference.
 *
 * @param {import('@google-cloud/storage').File} storageFile - The GCS File object
 * to write the stream to.
 * @param {ReadableStream} sourceStreamWithAudio - A stream that can be ingested
 * by FFMPEG to produce the uploaded audio track.
 * @param {boolean} [downloadable] - Determines whether a download token is
 * attached to the uploaded file.
 * @returns {Promise<[ file: import('@google-cloud/storage').File, bytesUploaded: number, downloadURL: string ]>} - A
 * promise that resolves to a tuple containing the reference to the uploaded
 * GCS file, its size in bytes and its download URL if available.
 */
const streamAudioTrackToCloudStorage = (storageFile, sourceStreamWithAudio, downloadable = false) => {
  return new Promise((resolve, reject) => {
    let byteCount = 0;

    // Generate download token and URL if requested.
    // (using downloadURL as Firebase uses getDownloadURL in the client SDKs)
    const [firebaseStorageDownloadTokens, downloadURL] = downloadable
      ? generateDownloadURLParts(storageFile)
      : [null, null];
    
    // before calling reject, try to bind some metadata to the error for debugging
    const onErrorCb = (error, source) => {
      const context = { byteCount, file: storageFile, source, downloadURL };

      try {
        Object.assign(error, context); // add context to error object
      } catch (_ignored) {
        console.error("Failed to bind context to thrown error.", { error, context });
      }

      reject(error);
    }

    // define Google Cloud Storage upload stream
    const uploadStream = storageFile
      .createWriteStream({
        metadata: {
          contentType: 'audio/mpeg',
          ...(downloadable ? { metadata: { firebaseStorageDownloadTokens }} : {}) 
        },
        resumable: false,
      })
      .on('finish', () => resolve([file, byteCount, downloadURL])) // 'finish' event fired once upload confirmed by Cloud Storage
      .on('error', err => onErrorCb(err, "storage"));

    // define source to audio transform stream
    ffmpeg(sourceStreamWithAudio)
      .output(storageFile)
      .outputOptions('-vn') // Extract audio only
      .noVideo()
      .audioQuality(0)
      .audioCodec('libmp3lame') // Set audio codec
      .format('mp3')
      .on('data', chunk => (byteCount += chunk.length))
      .on('end', () => { 
        if (byteCount === 0) {
          onErrorCb(new Error('Empty audio buffer'), "convert");
        }
      })
      .on('error', err => onErrorCb(err, "convert"))
      .run();
  });
}


现在我们已经有了transform和load流,我们需要获得extract流。
随着Node v18中原生Node Fetch API的引入,我已经删除了axios,因为它实际上并没有被用于本步骤的任何有用功能。如果您正在使用拦截器进行身份验证或类似的操作,您可以修改下面的脚本以将其重新添加进去。
在这个代码块中,我们定义了storeAudioFromRemoteVideo辅助方法,它接受要转换的视频URL沿着转换后的mp3文件的最终上传路径。除非提供另一个GCS bucket作为选项参数的一部分,该文件将被上传到您在共享的代码中指定的默认存储桶。选项参数的其余属性作为第二个参数,如果你需要指定像Authorization头、API键或请求体之类的东西。

import * as admin from "firebase-admin";

/**
 * Attempts to ingest the body of the provided response as if it
 * were a JSON-encoded string, falling back to plain text on failure.
 *
 * This allows for simple handling of HTML, plain text and JSON-encoded
 * bodies as part of error handling.
 *
 * @param {Response} res - The response containing the body to consume.
 * @returns {unknown} - The parsed content of this response.
 */
const attemptToParseResponseBodyAsJSON = (res) => {
  const text = res.text();
  try {
    return JSON.parse(text);
  } catch (err) {
    return text;
  }
}

/**
 * Streams remote video's content through FFMPEG to extract the
 * audio track and immediately uploads it to Google Cloud Storage.
 *
 * This method assumes that the provided file path is safe to write content to.
 *
 * @param {string} storedFilePath - File path in Cloud Storage to upload the
 * stream to. Should include ".mp3" file extension.
 * @param {string} videoUrl - URL of the video to be converted.
 * @param {Object} [options] - Optional object to override target GCS bucket and
 * pass options through to `fetch()`.
 * @returns {Promise<[ file: import('@google-cloud/storage').File, bytesUploaded: number, downloadURL: string ]>} - A
 * promise that resolves to a tuple containing the reference to the uploaded
 * GCS file, its size in bytes and its download URL if available.
 */
const storeAudioFromRemoteVideo = async (storedFilePath, videoUrl, { bucket, ...fetchOptions } = {}) => {

  // custom bucket not provided? use default
  if (!bucket) {
    bucket = admin.storage()
      .bucket(serviceAccount.storage_bucket_content);
  }

  const response = await fetch(videoUrl, fetchOptions);
  const responseContentType = response.headers.get('Content-Type');

  if (!response.ok || !/^(?:audio|video)\//.test(responseContentType)) {
    const err = new Error(`Unexpected HTTP ${response.status} response with [Content-Type]="${responseContentType}" from remote server.`);
    Object.assign(err, { // add context to error object
      response,
      status: response.status,
      body: attemptToParseResponseBodyAsJSON(response),
      source: "remote-http"
    });
    throw err;
  }

  // The Content-Disposition header of the response may contain the
  // filename if you want to use it for the uploaded file.
  // It is assumed that the calling method has taken care to prevent
  // overwriting existing files.
  const file = bucket.file(storedFilePath);

  return streamAudioTrackToCloudStorage(file, response.body);
}

用法

现在定义了上述方法,您的Cloud Function代码可能如下所示:

import * as functions from "firebase-functions";
import { randomUUID } from "crypto"; // Requires node v14.17+

const { HttpsError } = functions.https;

export const ingestVideoFile = functions.https.onCall((data, context) => {
  if (!context.auth) {
    throw new HttpsError("unauthenticated", "You must be logged in to continue.");
  }

  // TODO: Check if user has permission to call this endpoint (e.g. admin/maintainer/creator/etc.)

  const targetUrl = data.source || data.videoUrl || data.targetUrl;
  if (!targetUrl) {
    throw new HttpsError("failed-precondition", "No source provided.");
  }

  // TODO: Implement API quota?

  // randomUUID() for demo only, use v4 from uuid or a Cloud Firestore
  // document ID for a more stable unique ID for production use
  const targetFilePath = "ingestedVideos/" + randomUUID() + ".mp3";
  const startTimeMS = Date.now();

  return ingestVideoFile(targetUrl, targetFilePath)
    .then(([file, size, downloadURL) => {
      return {
        bucket: file.bucket,
        name: file.name,
        size,
        downloadURL,
        jobDurationMS: Date.now() - startTimeMS
      });
    })
    .catch((err) => {
      const { source, file } = err && typeof err === "object" ? err : {};
      console.error("Failed to ingest video file", err);

      if (file) {
        await file.delete({ ignoreNotFound: true })
          .catch(err => console.error(
            "Failed to cleanup errored file. Manual cleanup required.", 
            { bucket: file.bucket, name: file.name }
          ));
      }

      throw new HttpsError(
        "internal",
        `Failed due to an error in the ${source} component`,
        {
          ...(file ? { bucket: file.bucket, name: file.name } : {}),
          source,
          jobDurationMS: Date.now() - startTimeMS
        }
      );
    });
});

可能的后续步骤:

  • 调查防病毒/反恶意软件保护。
  • 为调用此函数实现基于用户的API配额和/或速率限制。
  • 使用返回的jobDurationMS值和执行此函数的项目成本对性能进行基准测试。
  • 如果处理时间预计超过9分钟,则将Cloud Function部署为第二代Cloud Function。
  • 决定如何处理功能出错时上传的不完整/空文件。保留调查?错误时删除?等。
kg7wmglp

kg7wmglp3#

对于那些正在寻找这个问题的解决方案的人来说,我相信仍然有一种方法可以通过passthrough来解决这个问题,但是我将使用'os' Node.js包来提供我的解决方案。

const fs = require('fs');
const fetch = require("node-fetch");
const ffmpeg = require('fluent-ffmpeg')
const { tmpdir } = require('os');

function createTempFile(data, name) {
    const tmpFilePath = join(tmpdir(), name); // Define your temporary file path
  
    // Write data to the temporary file synchronously
    if(data){
        fs.writeFileSync(tmpFilePath, data);
    }else{
        fs.openSync(tmpFilePath, 'w')
    }

    return tmpFilePath;
  }

  async function videoToMp3(videoUrl){
    try{
        const response = await fetch(videoUrl)
        const buffer = await response.buffer();
        const temporaryFilePath = await createTempFile( buffer, 'id_input.mp4');
        
        return new Promise((resolve, reject) => {
            const temporaryFilePath2 = join(tmpdir(), 'id_output.mp3')
        
            ffmpeg()
            .input(temporaryFilePath)
            .toFormat('mp3')
            .on('end', () => {
                resolve(temporaryFilePath2);
            })
            .on('error', (err, stdout, stderr) => {
                // console.error('Error converting to MP3:', err.message);
                // console.error('FFmpeg stdout:', stdout);
                // console.error('FFmpeg stderr:', stderr);
                reject(new Error(`Error converting to MP3: ${err}`));
            })
            .save(temporaryFilePath2);
        })
    }catch(error){
        throw new Error('Error extracting audio: ' + error.message);
    }
  }

字符串

相关问题