#!/bin/bash
fname=input.csv
# Get number of rows in CSV
nrows=$(gocsv nrow $fname) || exit 1
# Get desired number of files
printf "%s" "Number of split CSVs: "
read nfiles
# Calculate rows per file
nRowsPerFile=$(( (nrows / nfiles) + 1 ))
echo "Will split $fname with $nrows rows into $nfiles files, each with a max of $nRowsPerFile rows"
# Remove any previous splits, split to files starting with name "split"
rm split-*.csv
gocsv split -filename-base split -max-rows $nRowsPerFile $fname || exit 1
# Check work
echo ""
ls split-*.csv | while read CSV; do
nrows=$(gocsv nrow "$CSV") || exit 1
echo "$CSV $nrows"
done
Number of split CSVs: 1
Will split input.csv with 1234567 rows into 1 files, each with a max of 1234568 rows
split-1.csv 1234567
型
3个分割文件:
Number of split CSVs: 3
Will split input.csv with 1234567 rows into 3 files, each with a max of 411523 rows
split-1.csv 411523
split-2.csv 411523
split-3.csv 411521
型
5个分割文件:
Number of split CSVs: 5
Will split input.csv with 1234567 rows into 5 files, each with a max of 246914 rows
split-1.csv 246914
split-2.csv 246914
split-3.csv 246914
split-4.csv 246914
split-5.csv 246911
1条答案
按热度按时间2admgd591#
我不认为您可以使用常见的Unix行处理工具来完成CSV的拆分,至少不容易。
我推荐一个支持CSV的工具,它有一些功能,可以让你指定你想要多少个文件或每个文件多少行:
我喜欢GoCSV,所以我将展示如何使用它的split命令来实现我认为你想要的:
字符串
我用随机数据生成了一个CSV,它有1_234_567行(大约46MB)。当我运行以下输入时,我得到:
型
型
型