python-3.x 从文本文件中删除包含具有相同十六进制值的字符串的行

zqdjd7g9  于 2022-11-26  发布在  Python
关注(0)|答案(3)|浏览(111)

我有一个文件in1.txt

info="0x0000b573" data="0x7" id="sp. PCU(Si)"
info="0x0000b573" data="0x00000007" id="HI all. SHa"
info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
info="0x205" data="0x00000010" id="cgc_15. PK"
info="0x205" data="0x10" id="cgsd_GH/BS (Scd)"

预期输出:输出.txt

info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"

我只需要将具有相同info值和不同data值的行写入out.txt。
但当前代码删除了所有包含字符串数据的行。

with open("in.txt", "r") as fin,open("out.txt", "w") as fout:
    for line in fin:
        if 'data' not in line:
            fout.write(line.strip()+'\n')

我需要的是行1和行2具有相同的"info="0x0000b573"“,并且数据是" "0x7" & "0x00000007“,这是相同的,则移除该行。

mpgws1up

mpgws1up1#

您可以使用regex

import re

s = '''info="0x0000b573" data="0x7" id="sp. PCU(Si)"
info="0x0000b573" data="0x00000007" id="HI all. SHa"
info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
info="0x205" data="0x00000010" id="cgc_15. PK"
info="0x205" data="0x10" id="cgsd_GH/BS (Scd)"'''

parsed_data = re.findall(r'info="([^"]+)" data="([^"]+)" id="[^"]+"', s, re.MULTILINE)
parsed_data = sorted([list(map(lambda x: int(x, 16), i)) + [index] for index,i in enumerate(parsed_data)])

row_numbers = [j for i in [[parsed_data[i][-1], parsed_data[i+1][-1]] for i in range(0,len(parsed_data),2) if parsed_data[i][1] != parsed_data[i+1][1]] for j in i]

final_output = []

for index,line in enumerate(s.split('\n')):
    if index in row_numbers:
        final_output.append(line)
        
        
final_out_text = '\n'.join(final_output)
print(final_out_text)

# info="0x00010AC3" data="0x00000003" id="abc_16. PS"
# info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
a11xaf1n

a11xaf1n2#

我想你也可以试试

#!/usr/bin/python3

records = {}
items = []
info = []
data = []

with open("in.dat", "r") as fin:
    for line in fin:
        items=line.split(' ')
        info = items[0].split('=')
        data = items[1].split('=')
        try:
            key = info[1].strip('"').lower()
            value = str(int(data[1].strip('"'), 16))
            records[key][value] += 1
        except KeyError:
            try:
                records[key][value] = 1
            except KeyError:
                records[key] = {value: 1}

out = dict()
for key in records:
    for value in records[key]:
        if records[key][value] == 1:
            try:
                out[key].append(value)
            except KeyError:
                out[key] = [value]
             

with open("out.dat", "w") as fout:
    for key in out:
        for value in out[key]:
            fout.write(f"{key}={value}\n")
3lxsmp7m

3lxsmp7m3#

类似这样的方法可以奏效:

found_info_values = []

with open("in.txt", "r") as fin,open("out.txt", "w") as fout:
    for line in fin:
        info = line.split('"')[1]
        if info not in found_info_values:
            fout.write(line.strip()+'\n')
        found_info_values += info

相关问题