使用NodeJs读取大文件的最后一行

bnl4lu3b  于 2023-06-29  发布在  Node.js
关注(0)|答案(6)|浏览(276)

我使用this example读取一个大文件:

var fs = require('fs');
var readline = require('readline');
var stream = require('stream');

var instream = fs.createReadStream('your/file');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);

rl.on('line', function(line) {
  // process line here
});

rl.on('close', function() {
  // do something on finish here
});

我想知道line是文件的最后一行。我读了docs,但找不到解决方案。我已经试过了:

rl.on('line', function(line) {
    if (line == '' || line == require("os").EOL)
        console.log('eof');        
});

但是没有用。
你有什么建议吗?感谢阅读。

cidc1ykv

cidc1ykv1#

阿克塞尔的答案的问题在于他假设文件的最后一行是空的或者是EOL。事实并非如此。

// fileTools.js
const fs = require('fs');
const readline = require('readline');
const Stream = require('stream');

exports.getLastLine = (fileName, minLength) => {
    let inStream = fs.createReadStream(fileName);
    let outStream = new Stream;
    return new Promise((resolve, reject)=> {
        let rl = readline.createInterface(inStream, outStream);

        let lastLine = '';
        rl.on('line', function (line) {
            if (line.length >= minLength) {
                lastLine = line;
            }
        });

        rl.on('error', reject)

        rl.on('close', function () {
            resolve(lastLine)
        });
    })
}

用途:

const getLastLine = require('./fileTools.js').getLastLine
const fileName = 'C:\\someWinDir\\somelog.log'
const minLineLength = 1
getLastLine(fileName, 1)
    .then((lastLine)=> {
        console.log(lastLine)
    })
    .catch((err)=> {
        console.error(err)
    })
v8wbuo2f

v8wbuo2f2#

我在我的mocha测试套件中使用了exec(tail -n 1 <filepath>),因为我确切地知道它做什么,而且它用更少的代码行就可以完成。
不过,在构建应用程序时,它可能不太理想

const { exec } = require("child_process");
exec("tail -n 1 nginx_access.log",  (error, stdout, stderr) => {
   console.log(stdout)
})
a11xaf1n

a11xaf1n3#

将接收到的行保存在全局变量中,然后在到达文件末尾时显示它。

var lastLine = '';
rl.on('line', function(line) {
    if (line == '' || line == require("os").EOL) {
        console.log('eof, last line is', lastLine);
        return;
    }

    lastLine = line;
});
djp7away

djp7away4#

为了高效地读取大文件的最后N行,我们可以使用这个npm包read-last-lines
https://www.npmjs.com/package/read-last-lines
阅读文件最后50行的示例:

const readLastLines = require('read-last-lines');
readLastLines.read('path/to/file', 50)
    .then((lines) => console.log(lines));
e7arh2l6

e7arh2l65#

保存当前行,当你到达文件的末尾时,你就有了最后一行。当输入流的所有数据都被消耗时,输入流发出“end”事件。https://nodejs.org/api/stream.html#stream_event_end

var fs = require('fs');
var readline = require('readline');
var stream = require('stream');

var instream = fs.createReadStream('your/file');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);

var currentLine;

rl.on('line', function(line) {
    currentLine = line;

    // process line here
});

instream.on('end', function() {
    // currentLine is now the last line
    // use currentLine here
});

您也可以使用rl.on('end')事件,但它会因其他原因而触发,例如中断或调用rl.close(),但这些可能不会影响您。https://nodejs.org/api/readline.html#readline_event_close

ktca8awb

ktca8awb6#

https://github.com/alexbbt/read-last-lines添加一个依赖项,并因此为mz/fs添加一个依赖项,对于一个函数来说似乎很多。
另外,read-last-lines的代码似乎有点过时。我采用了read-last-lines的代码,并对其进行了一点重构。所有的功劳都归于read-last-lines
下面的代码基本上是从后到前逐个字符地读取文件,并计算遇到了多少个换行符。
我没有测试这是否真的比通过一个巨大的CSV文件的所有行的流更快。

import * as fs from "fs";

const NEW_LINE_CHARACTERS = ["\n"];

async function readPreviousChar(
    stat: fs.Stats,
    file: number,
    currentCharacterCount: number,
    encoding: BufferEncoding = "utf-8"
): Promise<string> {
    return new Promise((resolve, reject) => {
        fs.read(
            file,
            Buffer.alloc(1),
            0,
            1,
            stat.size - 1 - currentCharacterCount,
            (err, bytesRead, buffer) => {
                if (err) {
                    reject(err);
                } else {
                    resolve(buffer.toString(encoding));
                }
            }
        );
    });
}

/**
 * Read in the last `n` lines of a file
 * @param  {string}   inputFilePath   - file (direct or relative path to file.)
 * @param  {int}      maxLineCount    - max number of lines to read in.
 * @param  {encoding} encoding        - specifies the character encoding to be used, or 'buffer'. defaults to 'utf8'.
 *
 * @return {promise}  a promise resolved with the lines or rejected with an error.
 */
export async function readLastLines(
    inputFilePath: string,
    maxLineCount: number,
    encoding: BufferEncoding = "utf-8"
): Promise<string> {
    if (!fs.existsSync(inputFilePath)) throw new Error(`File ${inputFilePath} does not exist.`);

    const [stat, file] = await Promise.all([
        new Promise<fs.Stats>((resolve, reject) =>
            // Load file Stats.
            fs.stat(inputFilePath, (err, stat) => {
                if (err) {
                    reject(err);
                } else {
                    resolve(stat);
                }
            })
        ),
        new Promise<number>((resolve, reject) =>
            // Open file for reading.
            fs.open(inputFilePath, "r", (err, file) => {
                if (err) {
                    reject(err);
                } else {
                    resolve(file);
                }
            })
        ),
    ]);

    let chars = 0;
    let lineCount = 0;
    let lines = "";

    while (lines.length < stat.size && lineCount < maxLineCount) {
        const nextCharacter = await readPreviousChar(stat, file, chars, encoding);

        lines = nextCharacter + lines;
        if (NEW_LINE_CHARACTERS.includes(nextCharacter) && lines.length > 1) {
            lineCount++;
        }
        chars++;

        if (lines.length > stat.size) {
            lines = lines.substring(lines.length - stat.size);
        }
    }

    if (NEW_LINE_CHARACTERS.includes(lines.substring(0, 1))) {
        lines = lines.substring(1);
    }

    fs.closeSync(file);

    return lines;
}

相关问题