正在读取在2个不同位置以不同方式组织的csv文件

yzxexxkh  于 2023-06-19  发布在  其他
关注(0)|答案(3)|浏览(112)

我的csv文件看起来像

Metals:,E10
Al,0.1906
Ca,0.1132
Co,0.01951
Cu,0.5824
Cu,0.02383
Fe,0.03828
K,0.09577
Li,0.03024
Mg,0.007145
Na,0.1833
Ni,0.3236
Pb,0.0005787
Ti,0.4931
Tl,0.001887
Zn,0.07644

GLot,id,Slot,Scribe,Diameter,MPD,SResistivity,SThickness,TTV,LTV,Warp,Bow,S_U_A,Ep,Epi_L,Epi_Layer,Epi_Layer_2,EThick,E2thick,E2Dope,E2DopeT,E2DopeMax,E2DopeMin
31075046-001,XFB-LE00674.CP10023+001-12,1,22C1285,149.98,0,0.0217,334.71,1.91,1.03,5.35,-0.91,99.590582,1.0,1.0E18,9.8,1.12,9.9,9.6,9926193600000000,4.5574,10834500800000000,9551876800000000

我的代码看起来像这样:

namespace CsvHelperTest
{
    class CsvHelperTester
    {
        static void Main(string[] args)
        {
            var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
            {
                HasHeaderRecord = false,
                HeaderValidated = null,
                IgnoreBlankLines = true,
                MissingFieldFound = null,
                AllowComments = true,
                Comment = ';',
                Delimiter = ",",
                TrimOptions = TrimOptions.Trim, 
                PrepareHeaderForMatch = header => Regex.Replace(header.Header, ",", "\n"),
            };

            using (var streamReader = new StreamReader("C:\\Users\\eyoung\\Desktop\\parse test files\\XFB-1C2002A_62152_CoA.csv"))
            {
                using (var csvReader = new CsvReader(streamReader, csvConfig))
                {
                    for (var i = 0; i < 1; i++)
                    {
                        csvReader.Read();
                    }

                    var records = csvReader.GetRecords<EpiDataNames>().ToList();

                    var table = records[0];

                    records.RemoveAt(0);

                    var columns = records;

                    using (var writer = new CsvWriter(Console.Out, CultureInfo.InvariantCulture))
                    {
                        //writer.WriteField(records[0].Type);
                        //writer.NextRecord();

                        //records.RemoveAt(0);
                        //foreach (var item in records.Select(r => r.Type))
                        //{
                        //    writer.WriteField(item);
                        //}
                        //writer.NextRecord();
                        //foreach (var item in records.Select(r => r.Value))
                        //{
                        //    writer.WriteField(item);
                        //}
                        //writer.NextRecord(); 
                    }
                }
            }
        }

        public class EpiDataNames
        {
            [Index(0)]
            public string Type { get; set; }
            [Index(1)]
            public string Value { get; set; }
        }
    }
}

这是伟大的,因为它采取的第一组数据,并使他们成为2列,'类型'和'价值',然而,问题出现时,第二组数据显示,有没有一种方法,我只能读取第一块数据?当我试图省略最后那些头时,它表现得很奇怪,并删除了第一个数据块。

for (var i = 0; i < 1; i++)
{
    csvReader.Read(); //this skips the first line of data
}

for (var i = 0; i > 18; i++)
{
    csvReader.Read(); //I thought this would skip the last lines of data, but it doesn't.
}

第二个标题块的问题是标题阅读像
| 类型|价值|
| - -----|- -----|
| GLot|身份证|
当它应该是,
| 类型|价值|
| - -----|- -----|
| 格洛特|31075046-001|
有什么想法吗?我对这个很迷茫,我也应该序言,我没有事先编辑这个csv文件的控制权。

cyej8jka

cyej8jka1#

如果你只想读取第一个数据块,你的代码可以非常简单。

var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
    HasHeaderRecord = false
};

using (var reader = new StreamReader("C:\\Users\\eyoung\\Desktop\\parse test files\\XFB-1C2002A_62152_CoA.csv"))
using (var csv = new CsvReader(reader, config))
{
    var records = new List<EpiDataNames>();
    
    while(csv.Read() && csv.GetField(0) != "GLot" && csv.GetField(1) != "id")
    {
        records.Add(csv.GetRecord<EpiDataNames>());
    }
    
    records.Dump();
}

您还可以选择设置IgnoreBlankLines = false,然后使用空行作为断点。对于这两种解决方案,在读取第一个数据块之后,可以读取第二个数据块。
我看到的阅读第二个块的唯一潜在问题是,如果保留默认值HasHeaderRecord = true,则会丢失作为头的第一行数据Metals:,E10,但如果将其设置为false,则无法读取第二个数据块的头。

void Main()
{
    var data = @"Metals:,E10
Al,0.1906
Ca,0.1132
Co,0.01951
Cu,0.5824

GLot,id,Slot,Scribe
31075046-001,XFB-LE00674.CP10023+001-12,1,22C1285";

    var config = new CsvConfiguration(CultureInfo.InvariantCulture)
    {
        IgnoreBlankLines = false
    };

    using (var reader = new StringReader(data))
    using (var csv = new CsvReader(reader, config))
    {
        var records = new List<EpiDataNames>();

        while (csv.Read() && !csv.Parser.Record.All(r => r == string.Empty))
        {
            records.Add(csv.GetRecord<EpiDataNames>());
        }
        
        csv.Read(); // Remove this line if using the first solution
        csv.ReadHeader();
        
        var otherRecords = csv.GetRecords<MyOtherClass>().ToList();

        records.Dump();
        otherRecords.Dump();
    }
}

public class EpiDataNames
{
    [Index(0)]
    public string Type { get; set; }
    [Index(1)]
    public string Value { get; set; }
}

public class MyOtherClass
{
    public string GLot { get; set; }
    [Name("id")]
    public string Id { get; set; }
    public string Slot { get; set; }
    public string Scribe { get; set; }
}
wsxa1bj1

wsxa1bj12#

您的文本文件由两个单独的CSV表组成,表头由空行分隔。可以使用CsvHelper读取这样的文件,但是您需要手动逐行读取,并跟踪一个表结束和新表开始的时间。然后,当一个新表开始时,您将需要引入一些启发式方法来确定它是哪种类型的表。
下面的方法CsvExtensions.ReadTwoTableCsv()是一种方法:

public static class CsvExtensions
{
    public static void ReadTwoTableCsv<TRecord1, TRecord2>(TextReader reader, 
                                                           ClassMap<TRecord1> map1, out List<TRecord1> list1,
                                                           ClassMap<TRecord2> map2, out List<TRecord2> list2)
    {
        (List<TRecord1> l1, List<TRecord2> l2) = (new(), new());
        ReadMultiTableCsv(reader,
                          (map1, HeaderMatchesFirstMember, (map, csv) => l1.Add(csv.GetRecord<TRecord1>())),
                          (map2, HeaderMatchesFirstMember, (map, csv) => l2.Add(csv.GetRecord<TRecord2>())));
        (list1, list2) = (l1, l2);
    }
                                         
    static bool HeaderMatchesFirstMember(ClassMap map, CsvReader reader)
    {
        var firstMember = map.MemberMaps.Where(p => !p.Data.Ignore && p.Data.IsNameSet).SelectMany(p => p.Data.Names).FirstOrDefault();
        return Enumerable.Range(0, reader.Parser.Count)
            .Any(i => string.Equals(firstMember, reader.Parser[i], StringComparison.OrdinalIgnoreCase));
    }
    
    enum ReadState
    {
        Initial,
        Header,
        Data,
        UnknownData,
    }       
    
    public static void ReadMultiTableCsv(TextReader reader, 
                                         params (ClassMap map, Func<ClassMap, CsvReader, bool> isMatch, Action<ClassMap, CsvReader> readRecord) [] maps)
    {
        CsvConfiguration config = new(CultureInfo.InvariantCulture)
        {
            // These options are required to make ReadMultiTableCsv() work correctly:
            HasHeaderRecord = true,   // Headers are required, and are used to determine the table type,
            IgnoreBlankLines = false, // A blank line is used to delimit CSV sections, so we can't ignore it.
            // Other options as required by your application:
            HeaderValidated = null,
            MissingFieldFound = null,
            AllowComments = true,
            Comment = ';',
            Delimiter = ",",
            TrimOptions = TrimOptions.Trim, 
            PrepareHeaderForMatch = header => header.Header.ToLowerInvariant(),
        };
        
        using (var csv = new CsvReader(reader, config))
        {
            (int currentMap, ReadState state) = (-1, ReadState.Initial);
            int currentCount = -1;
            while (csv.Read())
            {
                if (csv.Parser.Count < 1 || csv.Parser.Count != currentCount)
                {
                    // Blank line or change in the number of columns
                    if (currentMap != -1)
                        csv.Context.UnregisterClassMap();
                    (currentMap, state) = (-1, ReadState.Initial);
                }

                currentCount = csv.Parser.Count;
                if (currentCount < 1 || state == ReadState.UnknownData)
                {
                    // Do nothing
                }
                else if (state == ReadState.Initial)
                {
                    var newMap = maps.Select((map, index) => (map, index)).Where(p => p.map.isMatch(p.map.map, csv)).Select(p => p.index).SingleOrDefault(-1);
                    if (newMap >= 0)
                    {
                        csv.Context.RegisterClassMap(maps[newMap].map);
                        csv.ReadHeader();
                        (currentMap, state) = (newMap, ReadState.Data);
                    }
                    else
                    {
                        (currentMap, state) = (-1, ReadState.UnknownData);
                    }
                }
                else if (state == ReadState.Data)
                {
                    maps[currentMap].readRecord(maps[currentMap].map, csv);
                }
                else
                {
                    throw new InvalidOperationException("Unexpected state");
                }
            }
        }           
    }
}

然后,如果您的两个数据模型看起来像:

public class EpiDataNames
{
    [Name("Metals:")]
    public string Type { get; set; }
    [Name("E10")]
    public string Value { get; set; }
}

class EpiDataNamesMap : ClassMap<EpiDataNames>
{
    public EpiDataNamesMap() : this(new CsvConfiguration(CultureInfo.InvariantCulture)) {}
    public EpiDataNamesMap(CsvConfiguration config) => AutoMap(config);
}

public class Model2
{
    // Auto generated by https://toolslick.com/generation/code/class-from-csv
    public string GLot { get; set; }
    public string Id { get; set; }
    public int Slot { get; set; }
    public string Scribe { get; set; }
    public double Diameter { get; set; }
    public int MPD { get; set; }
    public double SResistivity { get; set; }
    public double SThickness { get; set; }
    public double TTV { get; set; }
    public double LTV { get; set; }
    public double Warp { get; set; }
    public double Bow { get; set; }
    public double SUA { get; set; }
    public double Ep { get; set; }
    public string EpiL { get; set; }
    public double EpiLayer { get; set; }
    public double EpiLayer2 { get; set; }
    public double EThick { get; set; }
    public double E2thick { get; set; }
    public long E2Dope { get; set; }
    public double E2DopeT { get; set; }
    public long E2DopeMax { get; set; }
    public long E2DopeMin { get; set; }
}

public class Model2ClassMap : ClassMap<Model2>
{
    // Auto generated by https://toolslick.com/generation/code/class-from-csv
    public Model2ClassMap()
    {
        Map(m => m.GLot).Name("GLot");
        Map(m => m.Id).Name("id");
        Map(m => m.Slot).Name("Slot");
        Map(m => m.Scribe).Name("Scribe");
        Map(m => m.Diameter).Name("Diameter");
        Map(m => m.MPD).Name("MPD");
        Map(m => m.SResistivity).Name("SResistivity");
        Map(m => m.SThickness).Name("SThickness");
        Map(m => m.TTV).Name("TTV");
        Map(m => m.LTV).Name("LTV");
        Map(m => m.Warp).Name("Warp");
        Map(m => m.Bow).Name("Bow");
        Map(m => m.SUA).Name("S_U_A");
        Map(m => m.Ep).Name("Ep");
        Map(m => m.EpiL).Name("Epi_L");
        Map(m => m.EpiLayer).Name("Epi_Layer");
        Map(m => m.EpiLayer2).Name("Epi_Layer_2");
        Map(m => m.EThick).Name("EThick");
        Map(m => m.E2thick).Name("E2thick");
        Map(m => m.E2Dope).Name("E2Dope");
        Map(m => m.E2DopeT).Name("E2DopeT");
        Map(m => m.E2DopeMax).Name("E2DopeMax");
        Map(m => m.E2DopeMin).Name("E2DopeMin");
    }
}

您将能够将CSV文件读入List<EpiDataNames>List<Model2>,如下所示:

using var textReader = new StreamReader(fileName, Encoding.UTF8);
CsvExtensions.ReadTwoTableCsv(textReader, 
                              new EpiDataNamesMap(), out var list1, 
                              new Model2ClassMap(), out var list2);

注意事项:

  • 我用来从当前头文件中确定正确模型的算法HeaderMatchesFirstMember(ClassMap map, CsvReader reader)EpiDataNamesModel2)相当粗糙。我查看第一个Map模型成员的列名--"Metals:""GLot"--是否出现在当前的头列表中。这是因为两个名字不同。如果名字相同,比如"Id""Id",就需要使用更聪明的算法
  • 我使用https://toolslick.com/generation/code/class-from-csv自动生成了第二个数据模型。

演示小提琴here

cqoc49vn

cqoc49vn3#

尝试以下操作:

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace ConsoleApplication52
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.csv";
        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader(FILENAME);
            EpiDataNames dataNames = new EpiDataNames(reader);
            EpiDataParameters parameters = new EpiDataParameters(reader);
            
        }
    }
    public class EpiDataNames
    {
        public List<EpiDataNames> names { get; set; }
        string Type { get; set; }
        string Value { get; set; }

        public EpiDataNames() { }
        public EpiDataNames(StreamReader reader)
        {
            names = new List<EpiDataNames>();
            int row = 0;
            string line = "";
            while((line = reader.ReadLine()) != null)
            {
                line = line.Trim();
                if (line.Length == 0) break;
                row++;
                if (row > 1)
                {
                    string[] splitLine = line.Split(new char[] { ',' });
                    EpiDataNames epi = new EpiDataNames();
                    epi.Type = splitLine[0].Trim();
                    epi.Value = splitLine[1].Trim();
                    names.Add(epi);
                }
            }
        }
    }
    public class EpiDataParameters
    {
       public List<EpiDataParameters> parameters { get; set; } 
       string GLot { get;set;}
       string id { get;set;}
       int Slot { get;set;}
       string Scribe { get;set;}
       decimal Diameter { get;set;}
       decimal MPD { get;set;}
       decimal SResistivity { get;set;}
       decimal SThickness { get;set;}
       decimal TTV { get;set;}
       decimal LTV { get;set;}
       decimal Warp { get;set;}
       decimal Bow { get;set;}
       decimal S_U_A { get;set;}
       decimal Ep { get;set;}
       double Epi_L { get;set;}
       decimal Epi_Layer { get;set;}
       decimal Epi_Layer_2 { get;set;}
       decimal EThick { get;set;}
       decimal E2thick { get;set;}
       decimal E2Dope { get;set;}
       decimal E2DopeT { get;set;}
       decimal E2DopeMax { get;set;}
       decimal E2DopeMin { get; set; }

       public EpiDataParameters() { }
       public EpiDataParameters(StreamReader reader)
       {
           parameters = new List<EpiDataParameters>();
           int row = 0;
           string line = "";
           while ((line = reader.ReadLine()) != null)
           {
               line = line.Trim();
               if (line.Length == 0) break;
               row++;
               if (row > 1)
               {
                   string[] splitLine = line.Split(new char[] { ',' });
                   EpiDataParameters epi = new EpiDataParameters();
                   epi.GLot = splitLine[0].Trim();
                   epi.id = splitLine[1].Trim();
                   epi.Slot = int.Parse(splitLine[2]);
                   epi.Scribe = splitLine[3].Trim();
                   epi.Diameter = decimal.Parse(splitLine[4]);
                   epi.MPD = decimal.Parse(splitLine[5]);
                   epi.SResistivity  = decimal.Parse(splitLine[6]);
                   epi.SThickness = decimal.Parse(splitLine[7]);
                   epi.TTV = decimal.Parse(splitLine[8]);
                   epi.LTV = decimal.Parse(splitLine[9]);
                   epi.Warp= decimal.Parse(splitLine[10]);
                   epi.Bow = decimal.Parse(splitLine[11]);
                   epi.S_U_A = decimal.Parse(splitLine[12]);
                   epi.Ep = decimal.Parse(splitLine[13]);
                   epi.Epi_L = double.Parse(splitLine[14]);
                   epi.Epi_Layer = decimal.Parse(splitLine[15]);
                   epi.Epi_Layer_2 = decimal.Parse(splitLine[16]);
                   epi.EThick = decimal.Parse(splitLine[17]);
                   epi.E2Dope = decimal.Parse(splitLine[18]);
                   epi.E2DopeT = decimal.Parse(splitLine[19]);
                   epi.E2DopeMax = decimal.Parse(splitLine[20]);
                   epi.E2DopeMin = decimal.Parse(splitLine[21]);

                   parameters.Add(epi);
               }
           }
       }
    }
   

}

相关问题