python解析和数据形成困难

xvw2m8pv  于 2023-05-27  发布在  Python
关注(0)|答案(1)|浏览(149)

我从服务器获取数据如下

dataProducer00 ==>  Collected:      37402  Answer (MQ):       6234  Text/s:    12467   Text/s[3s]:    12467  lined:       0/75     liveData:   n/a    diff:    n/a     neardiff:       n/a     diffSeq:      n/a
dataProducer01 ==>  Collected:      45697  Answer (MQ):       7617  Text/s:    15232   Text/s[3s]:    15232  lined:       0/85     liveData:   n/a    diff:    n/a     neardiff:       n/a     diffSeq:      n/a
dataProducer02 ==>  Collected:      55936  Answer (MQ):       9326  Text/s:    18645   Text/s[3s]:    18645  lined:       0/121    liveData:   n/a    diff:    n/a     neardiff:       n/a     diffSeq:      n/a
dataCleaner00 ==>  Collected:          0  Answer:          0  Ratio:      0  Text/s:        0   Text/s[2s]:        0  lined:       0/0      liveData:   48042  diff:    0       neardiff:       0       diffSeq:      0
dataCleaner01 ==>  Collected:      65214  Answer:      34567  Ratio:   0.53  Text/s:    17283   Text/s[2s]:    17283  lined:       7/15     liveData:   48042  diff:    0       neardiff:       0       diffSeq:      0\
dataCleaner02 ==>  Collected:       2175  Answer (MQ):        543  Ratio:   0.25  Text/s:      271   Text/s[2s]:      271  lined:       0/15     liveData:   48042  diff:    0       neardiff:       0       diffSeq:      0\
MACH0_CA_CFE_01_A ==>  Collected: 0           breaks:  0      lined:        0/0      obtained:       0/0
MACH0_CA_MEC_AR_01_A ==>  Collected: 8248        breaks:  0      lined:       10/16     obtained:       1/4
MACH0_CA_MEC_AR_02_A ==>  Collected: 648         breaks:  0      lined:        1/16     obtained:       1/3
MACH2_CA_MEC_ITC_01_A ==>  Collected: 0           breaks:  0      lined:        0/0      obtained:       0/0
MACH2_CA_TAP_01_A ==>  Collected: 0           breaks:  0      lined:        0/0      obtained:       0/0
MACH2_CA_TAP_AR_01_A ==>  Collected: 0           breaks:  0      lined:        0/0      obtained:       0/0
MACH3_FI_A ==>  Collected: 0           breaks:  0      lined:        0/0      obtained:       0/0

我想要的输出如下

{
  "Collected": {
    "dataProducer00" : 374002,
..
    "dataCleaner00" : 0,
  ..
  "MACH0_CA_CFE_01_A" : 0,
    "MACH0_CA_MEC_AR_01_A" : 8248,
    "MACH0_CA_MEC_AR_02_A" : 648,
..  },
  "Answer (MQ)": {
    "dataProducer00" : 6234,
    "dataProducer01" : 7617,
    "dataProducer02" : 9326,
    "dataCleaner02" : 543
  },
  "Answer": {
    "dataCleaner00": 0,
    "dataCleaner01": 34567
  },
  "Text/s": {
    "dataProducer00" : 12467,
    "dataProducer01" : 15232,
  ..
    "dataCleaner00" : 0,
    "dataCleaner01" : 17283,
    ..
  },
  "lined": {
    "dataProducer00" : 0,
    "dataProducer01" : 0,
    "dataProducer02" : 0,
..  },
  "lined_Top": {
    "dataProducer00" : 75,
    "dataProducer01" : 85,
    "dataProducer02" : 121,
    "dataCleaner00" : 0,
    "dataCleaner01" : 15,
    "dataCleaner02" : 15,
    "MACH0_CA_CFE_01_A" : 0,
    "MACH0_CA_MEC_AR_01_A" : 16,
    "MACH0_CA_MEC_AR_02_A" : 16,
    "MACH2_CA_MEC_ITC_01_A": 0,
    "MACH2_CA_TAP_01_A": 0,
    "MACH2_CA_TAP_AR_01_A": 0,
    "MACH3_FI_A": 0
  },
  "liveData": {
    "dataProducer00" : "n/a",
..
    "dataCleaner02" : 48042
  },
"breaks": {
  "MACH0_CA_MEC_AR_01_A": 0,
..
  "MACH2_CA_TAP_AR_01_A": 0
},
  "Ratio": {
    "dataCleaner00": 0,
    "dataCleaner01": 0.53,
    "dataCleaner02": 0.25
  },
  "obtained": {
    "MACH0_CA_MEC_AR_01_A": 0,
  "MACH0_CA_MEC_AR_02_A": 1,
  ..},
  "obtained_Top": {
    "MACH0_CA_MEC_AR_01_A": 0,
  "MACH0_CA_MEC_AR_02_A": 4,
  "MACH2_CA_MEC_ITC_01_A": 3,
..  "MACH3_FI_A": 0
  },
  "diff": {
    "dataProducer00": "n/a",
  ..,  "dataCleaner02" : 0
  },
  "neardiff": {
    "dataProducer00": "n/a",
..    "dataCleaner00" : 0,
  },
  "diffSeq": {
    "dataProducer00": "n/a",
..    "dataCleaner02" : 0
  }
}

我花时间在循环中,没有什么能帮助我。得到的数据应该是通过发电机,所以这是更容易为我处理最后
我们可以通过简单的正则表达式或多个拆分来做到这一点吗?是否有任何通用的方法来处理所有类型的复杂数据。
这部分工作的代码,在我需要的消息和改变格式再次.

lines = data.strip().split('\n')

 

output = {}
field_regex = r'(\w+)\s*:\s*(\d+)'
#field_regex = r'(\w+)\s*:\s*'
field_pattern = re.compile(field_regex)
for line in lines:
    match = re.match(r'^(.*?) --> (.*)', line)
    if match:
        name = match.group(1)
        fields = match.group(2)
        field_matches = re.findall(field_pattern, fields)
        output[name] = {field: int(value) for field, value in field_matches}
print(output)
qacovj5a

qacovj5a1#

如果你想使用pandas

#pip install pandas
import pandas as pd

pattern = r"(?:(\w+)\s+==>\s+)?" \
           "(?P<inner_key>[\w\d./()\[\]\s]+):" \
           "\s+(?P<values>[\dn/a.]+)"

out = (
    pd.read_csv("file.txt", header=None)[0] 
        .str.extractall(pattern).assign(
            outer_key=lambda x: x.pop(0).ffill())
        .query("values != 'n/a'").reset_index(drop=True)
        .pipe(lambda x: x.groupby(x["inner_key"].str.strip(), sort=False)
              .apply(lambda g: dict(zip(g["outer_key"],
                    pd.to_numeric(g['values'], errors="ignore")))).to_dict())
)

输出:

import json;print(json.dumps(out, indent=4))

{
    "Collected": {
        "dataProducer00": 37402,
        "dataProducer01": 45697,
        "dataProducer02": 55936,
        "dataCleaner00": 0,
        "dataCleaner01": 65214,
        "dataCleaner02": 2175,
        "MACH0_CA_CFE_01_A": 0,
        "MACH0_CA_MEC_AR_01_A": 8248,
        "MACH0_CA_MEC_AR_02_A": 648,
        "MACH2_CA_MEC_ITC_01_A": 0,
        "MACH2_CA_TAP_01_A": 0,
        "MACH2_CA_TAP_AR_01_A": 0,
        "MACH3_FI_A": 0
    },
    "Answer (MQ)": {
        "dataProducer00": 6234,
        "dataProducer01": 7617,
        "dataProducer02": 9326,
        "dataCleaner02": 543
    },
    "Text/s": {
        "dataProducer00": 12467,
        "dataProducer01": 15232,
        "dataProducer02": 18645,
        "dataCleaner00": 0,
        "dataCleaner01": 17283,
        "dataCleaner02": 271
    },
    "Text/s[3s]": {
        "dataProducer00": 12467,
        "dataProducer01": 15232,
        "dataProducer02": 18645
    },
    "lined": {
        "dataProducer00": "0/75",
        "dataProducer01": "0/85",
        "dataProducer02": "0/121",
        "dataCleaner00": "0/0",
        "dataCleaner01": "7/15",
        "dataCleaner02": "0/15",
        "MACH0_CA_CFE_01_A": "0/0",
        "MACH0_CA_MEC_AR_01_A": "10/16",
        "MACH0_CA_MEC_AR_02_A": "1/16",
        "MACH2_CA_MEC_ITC_01_A": "0/0",
        "MACH2_CA_TAP_01_A": "0/0",
        "MACH2_CA_TAP_AR_01_A": "0/0",
        "MACH3_FI_A": "0/0"
    },
    "Answer": {
        "dataCleaner00": 0,
        "dataCleaner01": 34567
    },
    "Ratio": {
        "dataCleaner00": 0.0,
        "dataCleaner01": 0.53,
        "dataCleaner02": 0.25
    },
    "Text/s[2s]": {
        "dataCleaner00": 0,
        "dataCleaner01": 17283,
        "dataCleaner02": 271
    },
    "liveData": {
        "dataCleaner00": 48042,
        "dataCleaner01": 48042,
        "dataCleaner02": 48042
    },
    "diff": {
        "dataCleaner00": 0,
        "dataCleaner01": 0,
        "dataCleaner02": 0
    },
    "neardiff": {
        "dataCleaner00": 0,
        "dataCleaner01": 0,
        "dataCleaner02": 0
    },
    "diffSeq": {
        "dataCleaner00": 0,
        "dataCleaner01": 0,
        "dataCleaner02": 0
    },
    "breaks": {
        "MACH0_CA_CFE_01_A": 0,
        "MACH0_CA_MEC_AR_01_A": 0,
        "MACH0_CA_MEC_AR_02_A": 0,
        "MACH2_CA_MEC_ITC_01_A": 0,
        "MACH2_CA_TAP_01_A": 0,
        "MACH2_CA_TAP_AR_01_A": 0,
        "MACH3_FI_A": 0
    },
    "obtained": {
        "MACH0_CA_CFE_01_A": "0/0",
        "MACH0_CA_MEC_AR_01_A": "1/4",
        "MACH0_CA_MEC_AR_02_A": "1/3",
        "MACH2_CA_MEC_ITC_01_A": "0/0",
        "MACH2_CA_TAP_01_A": "0/0",
        "MACH2_CA_TAP_AR_01_A": "0/0",
        "MACH3_FI_A": "0/0"
    }
}

相关问题