javascript 使用 NodeJS 读取pdf文件

83qze16e  于 2023-02-02  发布在  Java
关注(0)|答案(1)|浏览(613)

我正在尝试读取pdf文件从网址如下

const axios = require("axios");
const jsdom = require("jsdom");
const PdfReader = require('pdfreader').PdfReader;
const { JSDOM } = jsdom;

axios.get("https://url-to-pdf.pdf").then(function(result) {
    new PdfReader().parseBuffer(result.data, function(err, item) {
        if (err)
            console.log(err);
        else if (item.text)
            console.log(item.text);
    });
}).catch(function(err) {

});

它显示

An error occurred while parsing the PDF: stream must have data
{
  parserError: 'An error occurred while parsing the PDF: stream must have data'
}

如何解决这个问题。

cczfrluj

cczfrluj1#

关键是求arraybufferresponseType,但随后必须将其转换为Buffer

try {
    var options = {
      method: 'get',
      url: url,
      headers: { 'User-Agent': 'PostmanRuntime/7.26.8' },
      timeout: 2000,
      responseEncoding: 'utf8',
      maxRedirects: 5,
      httpAgent: new http.Agent({ keepAlive: true }),
      responseType: 'arraybuffer'
    }
    let response = await axios(options);
    console.log(response.status);
    console.log(response.headers['content-type']);
    if (response.headers['content-type'].indexOf('pdf') != -1) {
      console.log("pdf");
      console.log(typeof response.data);
      var buff = new Buffer.alloc(0);
      buff = Buffer.concat([buff, response.data]);
      temp = await extract_pdf.readlines(buff).catch(function (o) { console.log(o); return; });
      console.log(temp);
    }
  } catch (error) {
    if (error.response) {
    }
    else {
      console.log(error);
    }
  }

相关问题