将CSV文件转换为JSON字典

pieyvz9o  于 2023-10-13  发布在  其他
关注(0)|答案(2)|浏览(111)

我试图使用Python将CSV文件转换为JSON字典,但我没有操作JSON文件的经验。
csv示例:

Picture         Class   Region_count    Coordinates
foto_1jpg.jpg   tennis        1         "all_points_x":[154,157,230,275,278,218,160,11...
foto_1jpg.jpg   soccer        2         "all_points_x":[446,557,685,795,826,815,738,62...
foto_1jpg.jpg   basket        3         "all_points_x":[941,1065,1161,1310,1438,1497,1...
foto_2jpg.jpg   soccer        1         "all_points_x":[331,403,518,626,688,734,758,681,...
foto_2jpg.jpg   basket        2         "all_points_x":[972,887,830,802,789,804,857,96...

我用下面的代码转换了csv文件:

import csv 
import json 
 
def csv_to_json(csvFilePath, jsonFilePath):
    jsonArray = []
      
    #read csv file
    with open(csvFilePath, encoding='utf-8') as csvf: 
        #load csv file data using csv library's dictionary reader
        csvReader = csv.DictReader(csvf) 
 
        #convert each csv row into python dict
        for row in csvReader: 
            #add this python dict to json array
            jsonArray.append(row)
  
    #convert python jsonArray to JSON String and write to file
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf: 
        jsonString = json.dumps(jsonArray, indent=4)
        jsonf.write(jsonString)
          
csvFilePath = r'/content/test_set.csv'
jsonFilePath = r'train.json'
csv_to_json(csvFilePath, jsonFilePath)

这个转换的结果是一个具有以下结构的json列表:

[
    {
        "Picture": "foto_1jpg.jpg",
        "Class": "tennis",
        "Region_count": "1",
        "Coordinates": "\"all_points_x\":[154,157,230,275,278,218,160,112,113,154],\"all_points_y\":[461,461,455,495,576,625,625,563,505,463]"
    },
    {
        "Picture": "foto_1jpg.jpg",
        "Class": "soccer",
        "Region_count": "2",
        "Coordinates": "\"all_points_x\":[446,557,685,795,826,815,738,628,505,422,346,331,354,443],\"all_points_y\":[230,186,212,321,411,538,641,687,684,632,525,426,331,224]"
    },
    {
        "Picture": "foto_2jpg.jpg",
        "Class": "soccer",
        "Region_count": "1",
        "Coordinates": "all_points_x:[331,403,518,626,688,734,758,681,594,484,369,314,282,274,329],\"all_points_y\":[399,340,316,342,380,463,607,736,787,796,745,683,592,503,405]"
    }
]

我的目标是获得一个具有以下结构的json字典:

{"foto_1jpg.jpg121349":
  {"filename":"foto_1jpg.jpg","regions":[
    {"shape_attributes":{"name":"polygon","all_points_x":[154,157,230,275,278,218,160,112,113,154],"all_points_y":[461,461,455,495,576,625,625,563,505,463]},"region_attributes":{"name":"tennis"}},
    {"shape_attributes":{"name":"polygon","all_points_x":[446,557,685,795,826,815,738,628,505,422,346,331,354,443],"all_points_y":[230,186,212,321,411,538,641,687,684,632,525,426,331,224]},"region_attributes":{"name":"soccer"}},
    {"shape_attributes":{"name":"polygon","all_points_x":[941,1065,1161,1310,1438,1497,1509,1471,1382,1279,1124,998,916,874,847,874,938],"all_points_y":[132,44,26,48,144,266,396,514,628,673,687,631,560,479,328,233,135]},"region_attributes":{"name":"basket"}}],"file_attributes":{}},
"foto_2.jpg325912":
 {"filename":"foto_2.jpg","regions":[
   {"shape_attributes":{"name":"polygon","all_points_x":[331,403,518,626,688,734,758,681,594,484,369,314,282,274,329],"all_points_y":[399,340,316,342,380,463,607,736,787,796,745,683,592,503,405]},"region_attributes":{"name":"soccer"}},
   {"shape_attributes":{"name":"polygon","all_points_x":[1186,1233,1273,1282,1267,1231,1178,1154,1135,1131,1142,1182],"all_points_y":[921,921,891,845,806,777,775,789,819,859,895,919]},"region_attributes":{"name":"tennis"}}],"file_attributes":{}}

我尝试用下面的代码将键'Region'插入到json文件中:

import csv
from collections import defaultdict
 
def ctree():
    
    return defaultdict(ctree)
 
def build_leaf(name, leaf):
    res = {"Picture": name}
    if len(leaf.keys()) :
      res["Region"] = [build_leaf(k, v) for  k, v in leaf.items()]
    return res
 
def main():
    tree = ctree()
    
    with open('/content/templete.csv') as csvfile:
        reader = csv.reader(csvfile)
        for rid, row in enumerate(reader):
            if rid == 0:
                continue
            leaf = tree[row[0]]
            for cid in range(1, len(row)):
                leaf = leaf[row[cid]]
    res = []
    for name, leaf in tree.items():
        res.append(build_leaf(name, leaf))
 
    import json
    return json.dumps(res)
main()

结果是一个巨大的失败:

[
{"Picture": "foto_1jpg.jpg", "Region": [
  {"Picture": "tennis", "Region": [{"Picture": "1", "Region": [{"Picture": "\"all_points_x\":[154,157,230,275,278,218,160,112,113,154],\"all_points_y\":[461,461,455,495,576,625,625,563,505,463]"}]}]},
  {"Picture": "soccer", "Region": [{"Picture": "2", "Region": [{"Picture": "\"all_points_x\":[446,557,685,795,826,815,738,628,505,422,346,331,354,443],\"all_points_y\":[230,186,212,321,411,538,641,687,684,632,525,426,331,224]"}]}]},
  {"Picture": "basket", "Region": [{"Picture": "3", "Region": [{"Picture": "\"all_points_x\":[941,1065,1161,1310,1438,1497,1509,1471,1382,1279,1124,998,916,874,847,874,938],\"all_points_y\":[132,44,26,48,144,266,396,514,628,673,687,631,560,479,328,233,135]"}]}]}]}

我如何才能达到我的目标结构?
谢谢你的关注

wa7juj8i

wa7juj8i1#

在json VIA中尝试转换:

import csv, json

li = []
with open('/content/file.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        li.append({
            "filename": row['Picture'],
            "region": {
                "shape_attributes":{
                    "name":"polygon",
                    "Coordinates": row['Coordinates'],
                    "region_attribute":{
                        "name": row['Class']}}}})
with open("file.json", "w") as f:
    json.dump(li, f, indent=4)
}
aij0ehis

aij0ehis2#

我遇到了同样的问题,因为我想生成一个JSON来序列化并由REST API使用,它需要一个列表-而不是包含列表的字符串。
我发现了一个解决方案,它不知道CSV文件结构和列。这通过三个步骤完成:读取CSV,计算每行中包含的表达式,并导出为JSON。
首先,使用DictReader读取CSV:

for row in csv.DictReader(file, delimiter=";"):
  ...

要计算这些行,可以使用ast模块(Abstract模块)。重新调整https://stackoverflow.com/a/71137896/10966677中的代码片段,并将其简化为JSON格式的用例:

import ast

def literal_eval(source):
    # Adapted from `ast.literal_eval`
    def _convert(node):
        if isinstance(node, ast.Constant):
            return node.value
        if isinstance(node, ast.List):
            return list(map(_convert, node.elts))
        if isinstance(node, ast.Dict):
            return dict(zip(map(_convert, node.keys), map(_convert, node.values)))
        if isinstance(node, ast.Expression):
            return _convert(node.body)
        if isinstance(node, ast.Name):
            return ast.get_source_segment(source, node)
        raise ValueError('element cannot be parsed for a valid json')

    return _convert(ast.parse(source, mode='eval'))

这个函数将正确地评估列表或字典。例如,从DictReader读取的行是:

row = {'nb_simulations': '10000', 'units': '5000.0',
       'alpha': '[0.5, 0.95, 0.99, 0.999]',
       'method': 'sim'}

要将字符串'[0.5, 0.95, 0.99, 0.999]'作为列表计算:

dict((k, literal_eval(v)) for k, v in row.items())
# prints: {'nb_simulations': 10000, 'units': 7000.0, 
#   'alpha': [0.5, 0.95, 0.99, 0.999], 'method': 'sim'}

在代码中使用所有这些:

import csv, json

result = []
# read from CSV file
with open(csv_file, encoding='utf-8') as file:
    for row in csv.DictReader(file, delimiter=";"):
        result.append(dict((k, literal_eval(v)) for k, v in row.items()))

# write to JSON file
with open(json_file, 'w', encoding='utf-8') as file:
    json.dump(result, file, indent=4)

这也适用于相互嵌套的列表和字典。
CSV示例:

nb_simulations;units;alpha;method;rating
10000;1000.0;[0.5, 0.95, 0.99, 0.999];sim;{'type': 'A', 'scores': [1, 10]}
10000;2000.0;[0.5, 0.95, 0.99, 0.999];calc;{'type': 'A', 'scores': [2, 20]}
10000;3000.0;[0.5, 0.95, 0.99, 0.999];calc;{'type': 'B', 'scores': [3, 30]}

JSON输出示例:

[
    {
        "nb_simulations": 10000,
        "units": 1000.0,
        "alpha": [
            0.5,
            0.95,
            0.99,
            0.999
        ],
        "method": "sim",
        "rating": {
            "type": "A",
            "scores": [
                1,
                10
            ]
        }
    },
...

它与输入的列或结构无关。

相关问题