在Golang中解析包含JSON数据的CSV文件

qhhrdooz  于 2023-10-21  发布在  Go
关注(0)|答案(4)|浏览(144)

我有一个CSV文件具有以下格式的数据.

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

我想解析CSV文件以将数据存储在结构中

type ItemRatings struct {
    RestaurantId          int     `json:“item_id”`
    Date       string `json:"date"`
    ItemData   []ItemData `json:“item_data”`
}

type ItemData struct {
    ItemID          int     `json:“item_id”`
    ItemName        string  `json:“item_name”`
    CurrentDayCount int     `json:“current_day_count”`
    CurrentDaySum   int     `json:“current_day_sum”`
    MTDCount        int     `json:“mtd_count”`
    MTDSum          int     `json:“mtd_sum”`
    WTDCount        int     `json:“wtd_count”`
    WTDSum          int     `json:“wtd_sum”`
}

POC代码:

reader := csv.NewReader(file)
reader.LazyQuotes = true

for {
   record, err := reader.Read()
   if err != nil {
      t.Fatalf("%v", err)
   }
   itemDetailsJson := record[2]
   var itemDetails []ItemData

   err = json.Unmarshal([]byte(itemDetailsJson), &itemDetails)
   if err != nil {
      t.Fatalf("Error unmarshalling: %v", err)
   }
   fmt.Printf("Unmarshalled Array: %+v\n", itemDetails)
}

请建议一种方法来做它在Golang。面临json数据列表中使用双引号和逗号的问题。请建议代码更改或替代CSV格式,可用于实现CSV中restaurantId字段汇总数据的目标。
Edit 1 - item_name甚至可以包含特殊字符,如“或+或/或[]。6”披萨

h6my8fg2

h6my8fg21#

由于您的数据既不是CSV也不是JSON,因此您希望以不同的方式处理它。而不是试图解析为CSV,恕我直言,你应该首先逐行阅读,然后将行解析为部分。即:

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "log"
    "os"
    "strconv"
    "strings"
)

type ItemRatings struct {
    RestaurantId int        `json:"item_id"`
    Date         string     `json:"date"`
    ItemData     []ItemData `json:"item_data"`
}

type ItemData struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

func main() {
    file, err := os.Open("mydata.csv")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()
    var itemRatings []ItemRatings

    scanner := bufio.NewScanner(file)

    counter := 0
    for scanner.Scan() {
        counter++

        if counter > 1 {
            items := strings.SplitN(scanner.Text(), ",", 3)
            date := items[0] // date, _ := time.Parse("2006-01-02", items[0])
            restaurantId, _ := strconv.Atoi(items[1])
            var itemDetails []ItemData
            err = json.Unmarshal([]byte(items[2]), &itemDetails)
            if err != nil {
                log.Fatalf("Error unmarshalling: %v", err)
            }
            itemRatings = append(itemRatings, ItemRatings{
                RestaurantId: restaurantId,
                Date:         date,
                ItemData:     itemDetails,
            })
        }
    }

    for _, rating := range itemRatings {
        fmt.Printf("RestaurantID: %d, Date: %s\n", rating.RestaurantId, rating.Date)
        for _, item := range rating.ItemData {
            fmt.Printf("   ItemID: %d, ItemName: %s, CurrentDayCount: %d, CurrentDaySum: %d, MTDCount: %d, MTDSum: %d, WTDCount: %d, WTDSum: %d\n", item.ItemID, item.ItemName, item.CurrentDayCount, item.CurrentDaySum, item.MTDCount, item.MTDSum, item.WTDCount, item.WTDSum)
        }
    }
}

示例输出来自:

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]
2023-10-09,235,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

是这样的:

RestaurantID: 232, Date: 2023-10-08
   ItemID: 8215117, ItemName: The Farmers Breakfast, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
   ItemID: 8215132, ItemName: The Great White, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
RestaurantID: 235, Date: 2023-10-09
   ItemID: 8215117, ItemName: The Farmers Breakfast, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
   ItemID: 8215132, ItemName: The Great White, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
nc1teljy

nc1teljy2#

你可以在Go中使用encoding/csv和encoding/json包来解析CSV数据并将其解组到你的结构中。你需要从CSV中解析JSON数据,然后将其解组到ItemData结构中。

package main

import (
    "encoding/csv"
    "encoding/json"
    "fmt"
    "log"
    "os"
    "strings"
)

type ItemRatings struct {
    RestaurantId int       `json:"item_id"`
    Date         string    `json:"date"`
    ItemData     []ItemData `json:"item_data"`
}

type ItemData struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

func main() {    
    file, err := os.Open("data.csv")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    reader := csv.NewReader(file)

    var itemRatingsList []ItemRatings

    for {
        record, err := reader.Read()
        if err != nil {
            break
        }

        jsonStr := record[2]

        var itemDataList []ItemData
        if err := json.Unmarshal([]byte(jsonStr), &itemDataList); err != nil {
            log.Fatalf("Error unmarshalling JSON: %v", err)
        }

        itemRating := ItemRatings{
            RestaurantId: 232, // Assuming a constant RestaurantId for this example
            Date:         record[0],
            ItemData:     itemDataList,
        }

        itemRatingsList = append(itemRatingsList, itemRating)
    }

    for _, itemRating := range itemRatingsList {
        fmt.Printf("RestaurantId: %d, Date: %s\n", itemRating.RestaurantId, itemRating.Date)
        for _, itemData := range itemRating.ItemData {
            fmt.Printf("ItemID: %d, ItemName: %s\n", itemData.ItemID, itemData.ItemName)
        }
    }
}
wkyowqbh

wkyowqbh3#

由于编码/csv,csv文件更新时使用了双引号。我花了一些时间,所以我把它作为一种替代方法添加到这里。

CSV(test.csv):

Date,RestaurantId,ItemRatings
2023-10-08,232,"[{""item_id"":8215117,""item_name"":""The Farmers Breakfast"",""current_day_count"":0,""current_day_sum"":0,""mtd_count"":1,""mtd_sum"":5,""wtd_count"":0,""wtd_sum"":0},{""item_id"":8215132,""item_name"":""The Great White"",""current_day_count"":0,""current_day_sum"":0,""mtd_count"":1,""mtd_sum"":5,""wtd_count"":0,""wtd_sum"":0}]"

验证码:

package main

import (
    "encoding/csv"
    "fmt"
    "os"
    "strings"
)

func main() {
    file, err := os.Open("test.csv")
    if err != nil {
        fmt.Println("Error:", err)
        return
    }
    defer file.Close()

    csvReader := csv.NewReader(file)

    for {
        record, err := csvReader.Read()
        if err != nil {
            break
        }

        if len(record) != 3 {
            fmt.Println("Invalid:", record)
            continue
        }

        date := record[0]
        restaurantID := record[1]
        itemRatingsCSV := record[2]

        fmt.Println("Date:", date)
        fmt.Println("Restaurant ID:", restaurantID)
        fmt.Println("Item Ratings (CSV):")

        itemRatingsJSON := strings.Trim(itemRatingsCSV, "[]")
        itemRatings := strings.Split(itemRatingsJSON, "},{")
        for _, item := range itemRatings {
            item = strings.Trim(item, "{}")
            fmt.Println(item)
        }

        fmt.Println()
    }
}

输出:

v64noz0r

v64noz0r4#

这就是你想要的吗

data.csv

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

脚本。去

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "os"
    "strings"
)

type ItemRating struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

type Data struct {
    Date         string        `json:"date"`
    RestaurantID int           `json:"restaurant_id"`
    ItemRatings  []ItemRating `json:"item_ratings"`
}

func main() {
    file, err := os.Open("data.csv")
    if err != nil {
        fmt.Println("Error opening file:", err)
        return
    }
    defer file.Close()

    scanner := bufio.NewScanner(file)

    // Skip the header line
    if scanner.Scan() {
        // Read and parse each line
        for scanner.Scan() {
            line := scanner.Text()
            parts := strings.Split(line, ",")
            date := parts[0]
            restaurantID := parts[1]
            itemRatingsJSON := strings.Join(parts[2:], ",")
            var itemRatings []ItemRating
            err := json.Unmarshal([]byte(itemRatingsJSON), &itemRatings)
            if err != nil {
                fmt.Println("Error parsing item ratings:", err)
                continue
            }

            // Process the data as needed
            fmt.Println("Date:", date)
            fmt.Println("Restaurant ID:", restaurantID)
            fmt.Println("Item Ratings:")
            for _, item := range itemRatings {
                fmt.Println("  Item ID:", item.ItemID)
                fmt.Println("  Item Name:", item.ItemName)
                fmt.Println("  Current Day Count:", item.CurrentDayCount)
                fmt.Println("  Current Day Sum:", item.CurrentDaySum)
                fmt.Println("  MTD Count:", item.MTDCount)
                fmt.Println("  MTD Sum:", item.MTDSum)
                fmt.Println("  WTD Count:", item.WTDCount)
                fmt.Println("  WTD Sum:", item.WTDSum)
            }
            fmt.Println()
        }
    }

    if err := scanner.Err(); err != nil {
        fmt.Println("Error reading file:", err)
    }
}

输出

Date: 2023-10-08
Restaurant ID: 232
Item Ratings:
  Item ID: 8215117
  Item Name: The Farmers Breakfast
  Current Day Count: 0
  Current Day Sum: 0
  MTD Count: 1
  MTD Sum: 5
  WTD Count: 0
  WTD Sum: 0
  Item ID: 8215132
  Item Name: The Great White
  Current Day Count: 0
  Current Day Sum: 0
  MTD Count: 1
  MTD Sum: 5
  WTD Count: 0
  WTD Sum: 0

相关问题