pandas 如何将JSON文件中的字典处理为txt文件

xjreopfe  于 2022-11-05  发布在  其他
关注(0)|答案(1)|浏览(142)

我有一个sample.json文件,其中包含了表示为字典的c lang数据集中的代码

{
  "0_0": 
"int curl_mvsprintf ( char * buffer , const char * format , va_list ap_save ) {\n int retcode ;\n retcode = dprintf_formatf ( & buffer , storebuffer , format , ap_save ) ;\n * buffer = 0 ;\n return retcode ;\n }",

  "0_1": 
"static int alloc_addbyter ( int output , FILE * data ) {\n struct asprintf * infop = ( struct asprintf * ) data ;\n unsigned char outc = ( unsigned char ) output ;\n if ( ! infop -> buffer ) {\n infop -> buffer = malloc ( 32 ) ;\n if ( ! infop -> buffer ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> alloc = 32 ;\n infop -> len = 0 ;\n }\n else if ( infop -> len + 1 >= infop -> alloc ) {\n char * newptr ;\n newptr = realloc ( infop -> buffer , infop -> alloc * 2 ) ;\n if ( ! newptr ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> buffer = newptr ;\n infop -> alloc *= 2 ;\n }\n infop -> buffer [ infop -> len ] = outc ;\n infop -> len ++ ;\n return outc ;\n }",  
}

并希望使用panda或python处理它以获取以下文本用于python中的分类任务,分隔符应为制表符'\t'并将其保存在sample.txt
如果文件名以(0)结尾,如0_0,则放置标签1_0),否则放置标签2,如(0_1)或(_1)

label1  int curl_mvsprintf ( char * buffer , const char * format , va_list ap_save ) {\n int retcode ;\n retcode = dprintf_formatf ( & buffer , storebuffer , format , ap_save ) ;\n * buffer = 0 ;\n return retcode ;\n }
lable2  static int alloc_addbyter ( int output , FILE * data ) {\n struct asprintf * infop = ( struct asprintf * ) data ;\n unsigned char outc = ( unsigned char ) output ;\n if ( ! infop -> buffer ) {\n infop -> buffer = malloc ( 32 ) ;\n if ( ! infop -> buffer ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> alloc = 32 ;\n infop -> len = 0 ;\n }\n else if ( infop -> len + 1 >= infop -> alloc ) {\n char * newptr ;\n newptr = realloc ( infop -> buffer , infop -> alloc * 2 ) ;\n if ( ! newptr ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> buffer = newptr ;\n infop -> alloc *= 2 ;\n }\n infop -> buffer [ infop -> len ] = outc ;\n infop -> len ++ ;\n return outc ;\n }

使用panda

读取txt文件后的预期输出

j13ufse2

j13ufse21#

像这样吗?

import json

with open("sample.json", "r") as f:
    sample_dict = json.load(f)

output_str = ""

for key, val in sample_dict.items():
    if key.endswith("0"):
        output_str += "label1\t"
    else:
        output_str += "label2\t"

    output_str += val + "\n"

with open("sample.txt", "w") as f:
    f.write(output_str)

import pandas as pd

df = pd.read_csv("sample.txt")

注意:您可以直接从json转到panda:

import json
import pandas as pd

with open("sample.json", "r") as f:
    sample_dict = json.load(f)

df_dict = {"label": [], "text": []}
for key, val in sample_dict.items():
    if key.endswith("0"):
        df_dict["label"].append("label1")
    else:
        df_dict["label"].append("label2") 
    df_dict["text"].append(val)        

df = pd.DataFrame(df_dict)
df

相关问题