使用Python文件从.txt文件到csv文件的数据提取

我试图从.txt文件中提取数据到我的csv文件。我的数据与流量工程有关。我已经包括了我的数据结构的屏幕截图。我需要从txt文件中获取队列长度和一些其他数据项。
下面是数据源SS。[在此处输入图像说明]（[https://i.stack.imgur.com/3q3Fd.png](https://i.stack.imgur.com/3q3Fd.png)）我的输出目前看起来像这样[在这里输入图像描述]（https://i.stack.imgur.com/qYJnF.png）

import csv
TOD_List = ['AM','PM']Years = 2025 #['2025','2026','2027','2028','2029',# '2030','2031','2032','2033','2034','2035','2036','2037','2038','2039',#'2040','2041','2042','2043','2044','2045']BEBR_List = ['Design']Folder = r"I:\TPA\PRJ\000011957\1.0_Task_Work_Orders_(TWOs)\1.5_TWO_5_081419\I75atSR52_Study\Traffic\B - Operational Analysis\test"Output = r"I:\TPA\PRJ\000011957\1.0_Task_Work_Orders_(TWOs)\1.5_TWO_5_081419\I75atSR52_Study\Traffic\B - Operational Analysis\test\test.csv"

Count=0Intersection = []

Col = []TM = []QL = []VC = []

with open(Output,'w',newline='') as OutCSV:fieldnames = ['TOD','Year','BEBR','Int','Mvmt','QL95','Stor','QL50']writer = csv.DictWriter(OutCSV, fieldnames=fieldnames)writer.writeheader()

for TOD in TOD_List:
    Year =2025
    for BEBR in BEBR_List:
            Input_Report = "{}\\Volume_{}_{}_{}_Report.txt".format(Folder,TOD,Year,BEBR)

            with open(Input_Report) as infile:
                reader = csv.reader(infile, delimiter='\t')
                
                for row in reader:
                    #print(row)
                    if (len(row)>0):
                        if (row[0] == 'Queues'):
                            Count=0
                            Sig=1
                            Unsig=0
                        Count +=1
                        #print(row)
                    

                        #Pull intersection ID
                        if (Count==2):
                            Intersection = row[0]
                            Intersection = Intersection.split(':')[0]


                        #Turning Movement Name
                        if (row[0] == 'Lane Group            '):
                            Num_Move = len(row)
                            for x in range(2,Num_Move):
                                Col.append(row[x])

                        #Turning Movement Volume
                        if (row[0] == 'Queue Length 95th (ft)'):
                            
                            for x in range(2,Num_Move):
                                TM.append(row[x])

                        #Turning Movement Volume
                        if (row[0] == 'Queue Length 50th (ft)'):
                            
                            for x in range(2,Num_Move):
                                QL.append(row[x])

                        #VC
                        if (row[0] == 'Turn Bay Length (ft)  '):
                            
                            for x in range(2,Num_Move):
                                VC.append(row[x])

                        #if (row[0] == 'Intersection Summary'):
                            #for x in range(0,Num_Move-2):
                                writer.writerow({'TOD': TOD,'Year': Year,'BEBR': BEBR,'Int': Intersection,'Mvmt': Col[x],'QL95': TM[x],'QL50': QL[x],'Stor': VC[x]})
                        Col = []
                        TM = []
                        QL = []
                        VC = []
print("Complete")

所以，这些都不是定位列。这些列由制表符分隔。这使得处理更容易一点。这将读取文件并为每个文件生成一个dataframe。也许这可以给予你一些开始。

import os, sys
import pandas as pd

names = [
    'Volume_AM_2025_Design_Report.txt',
    'Volume_PM_2025_Design_Report.txt'
]

def process(file):
    intersection = ''
    data = []
    with open(file) as f:
        while 1:
            columns = []

            # Find and ead the intersection name.

            for line in f:
                line = line.strip()
                if line != 'Queues':
                    intersection = line.split(':')[0]
                    break

            # Find and read the column headers.

            for line in f:
                line = line.strip()
                if not line:
                    continue
                if line.startswith('Lane Group'):
                    words = line.split('\t')
                    for word in words[2:]:
                        columns.append( word )
                    break

            # Translate the data to a dataframe.

            for line in f:
                row = {}
                line = line.strip()
                if not line:
                    break
                row['intersection'] = intersection
                words = line.split('\t')
                row['category'] = words[0].rstrip()
                for head,val in zip(columns,words[2:]):
                    row[head] = val.strip('~m#')
                data.append(row)

            # See if there is another page.

            for line in f:
                if line[0] == '\x0C':
                    break
            else:
                break

    return pd.DataFrame( data )

for name in names:
    df = process(BASE+names[0])
    print(df)
    print(df[df['category']== 'Queue Length 50th (ft)'])

输出（删节）：

intersection                 category   EBL   EBT  ...   NBR   SBL  SBT   SBR
0             1    Lane Group Flow (vph)        1426  ...   137   NaN  NaN   NaN
1             1                v/c Ratio        0.85  ...  0.49   NaN  NaN   NaN
2             1            Control Delay        26.5  ...  13.7   NaN  NaN   NaN
3             1              Queue Delay         0.0  ...   0.0   NaN  NaN   NaN
4             1              Total Delay        26.5  ...  13.7   NaN  NaN   NaN
5             1   Queue Length 50th (ft)         377  ...     0   NaN  NaN   NaN
6             1   Queue Length 95th (ft)         546  ...    56   NaN  NaN   NaN
7             1  Internal Link Dist (ft)         836  ...   NaN   NaN  NaN   NaN
8             1     Turn Bay Length (ft)              ...   NaN   NaN  NaN   NaN
9             1      Base Capacity (vph)        1687  ...   539   NaN  NaN   NaN
10            1   Starvation Cap Reductn           0  ...     0   NaN  NaN   NaN
11            1    Spillback Cap Reductn           0  ...     0   NaN  NaN   NaN
12            1      Storage Cap Reductn           0  ...     0   NaN  NaN   NaN
13            1        Reduced v/c Ratio        0.85  ...  0.25   NaN  NaN   NaN
14            3    Lane Group Flow (vph)     0  1505  ...         179        195
15            3                v/c Ratio        0.44  ...        0.44       0.54
16            3            Control Delay         5.9  ...        44.1       12.2
17            3              Queue Delay         0.0  ...         0.0        0.0
18            3              Total Delay         5.9  ...        44.1       12.2
19            3   Queue Length 50th (ft)         120  ...          55          0
20            3   Queue Length 95th (ft)         144  ...          89         64
21            3  Internal Link Dist (ft)        1076  ...        1261  NaN   NaN
22            3     Turn Bay Length (ft)              ...                    650
23            3      Base Capacity (vph)        3455  ...         411        359
24            3   Starvation Cap Reductn           0  ...           0          0
25            3    Spillback Cap Reductn           0  ...           0          0
26            3      Storage Cap Reductn           0  ...           0          0
27            3        Reduced v/c Ratio        0.44  ...        0.44       0.54
28            4    Lane Group Flow (vph)   174   742  ...   495     0    0     0
29            4                v/c Ratio  0.84  0.27  ...  0.70   NaN  NaN   NaN
30            4            Control Delay  76.3   9.3  ...  38.6   NaN  NaN   NaN
31            4              Queue Delay   0.0   0.0  ...   0.0   NaN  NaN   NaN
32            4              Total Delay  76.3   9.3  ...  38.6   NaN  NaN   NaN
33            4   Queue Length 50th (ft)   115    71  ...   158   NaN  NaN   NaN
34            4   Queue Length 95th (ft)   234    90  ...   222   NaN  NaN   NaN
35            4  Internal Link Dist (ft)         638  ...              610   NaN
36            4     Turn Bay Length (ft)   725        ...   725   NaN  NaN   NaN
37            4      Base Capacity (vph)   208  2687  ...   735   NaN  NaN   NaN
38            4   Starvation Cap Reductn     0     0  ...     0   NaN  NaN   NaN
39            4    Spillback Cap Reductn     0     0  ...     0   NaN  NaN   NaN
40            4      Storage Cap Reductn     0     0  ...     0   NaN  NaN   NaN
41            4        Reduced v/c Ratio  0.84  0.28  ...  0.67   NaN  NaN   NaN

[42 rows x 14 columns]
   intersection                category  EBL  EBT EBR  ... NBT  NBR  SBL  SBT  SBR
5             1  Queue Length 50th (ft)       377      ...        0  NaN  NaN  NaN
19            3  Queue Length 50th (ft)       120      ...            55         0
33            4  Queue Length 50th (ft)  115   71      ...      158  NaN  NaN  NaN

[3 rows x 14 columns]

   intersection                 category   EBL   EBT  ...   NBR   SBL  SBT   SBR
0             1    Lane Group Flow (vph)        1426  ...   137   NaN  NaN   NaN
1             1                v/c Ratio        0.85  ...  0.49   NaN  NaN   NaN
2             1            Control Delay        26.5  ...  13.7   NaN  NaN   NaN
3             1              Queue Delay         0.0  ...   0.0   NaN  NaN   NaN
4             1              Total Delay        26.5  ...  13.7   NaN  NaN   NaN
5             1   Queue Length 50th (ft)         377  ...     0   NaN  NaN   NaN
6             1   Queue Length 95th (ft)         546  ...    56   NaN  NaN   NaN
7             1  Internal Link Dist (ft)         836  ...   NaN   NaN  NaN   NaN
8             1     Turn Bay Length (ft)              ...   NaN   NaN  NaN   NaN
9             1      Base Capacity (vph)        1687  ...   539   NaN  NaN   NaN
10            1   Starvation Cap Reductn           0  ...     0   NaN  NaN   NaN
11            1    Spillback Cap Reductn           0  ...     0   NaN  NaN   NaN
12            1      Storage Cap Reductn           0  ...     0   NaN  NaN   NaN
13            1        Reduced v/c Ratio        0.85  ...  0.25   NaN  NaN   NaN
14            3    Lane Group Flow (vph)     0  1505  ...         179        195
15            3                v/c Ratio        0.44  ...        0.44       0.54
16            3            Control Delay         5.9  ...        44.1       12.2
17            3              Queue Delay         0.0  ...         0.0        0.0
18            3              Total Delay         5.9  ...        44.1       12.2
19            3   Queue Length 50th (ft)         120  ...          55          0
20            3   Queue Length 95th (ft)         144  ...          89         64
21            3  Internal Link Dist (ft)        1076  ...        1261  NaN   NaN
22            3     Turn Bay Length (ft)              ...                    650
23            3      Base Capacity (vph)        3455  ...         411        359
24            3   Starvation Cap Reductn           0  ...           0          0
25            3    Spillback Cap Reductn           0  ...           0          0
26            3      Storage Cap Reductn           0  ...           0          0
27            3        Reduced v/c Ratio        0.44  ...        0.44       0.54
28            4    Lane Group Flow (vph)   174   742  ...   495     0    0     0
29            4                v/c Ratio  0.84  0.27  ...  0.70   NaN  NaN   NaN
30            4            Control Delay  76.3   9.3  ...  38.6   NaN  NaN   NaN
31            4              Queue Delay   0.0   0.0  ...   0.0   NaN  NaN   NaN
32            4              Total Delay  76.3   9.3  ...  38.6   NaN  NaN   NaN
33            4   Queue Length 50th (ft)   115    71  ...   158   NaN  NaN   NaN
34            4   Queue Length 95th (ft)   234    90  ...   222   NaN  NaN   NaN
35            4  Internal Link Dist (ft)         638  ...              610   NaN
36            4     Turn Bay Length (ft)   725        ...   725   NaN  NaN   NaN
37            4      Base Capacity (vph)   208  2687  ...   735   NaN  NaN   NaN
38            4   Starvation Cap Reductn     0     0  ...     0   NaN  NaN   NaN
39            4    Spillback Cap Reductn     0     0  ...     0   NaN  NaN   NaN
40            4      Storage Cap Reductn     0     0  ...     0   NaN  NaN   NaN
41            4        Reduced v/c Ratio  0.84  0.28  ...  0.67   NaN  NaN   NaN

[42 rows x 14 columns]
   intersection                category  EBL  EBT EBR  ... NBT  NBR  SBL  SBT  SBR
5             1  Queue Length 50th (ft)       377      ...        0  NaN  NaN  NaN
19            3  Queue Length 50th (ft)       120      ...            55         0
33            4  Queue Length 50th (ft)  115   71      ...      158  NaN  NaN  NaN

[3 rows x 14 columns]

使用Python文件从.txt文件到csv文件的数据提取

1条答案

相关问题

热门标签

最新问答