NodeJS 具有合并标题的报废表

kgqe7b3p  于 2023-01-08  发布在  Node.js
关注(0)|答案(1)|浏览(98)

我有问题现在关于scraping头表(合并)使用cheerio节点js,这意味着这是im用于分组或某事.我能scraping没有头.这里一个lil位Screenshot Table
以及用于HTML表格here的表格或小提琴的HTML代码:

<div class="wrap">
   <table class="tbl">
     <tr class="head">
       <td colspan="6" style="background-color:#656968">Monday</td>
     </tr>
     <tr class="head">
       <td class="center" width="20%">Code</td>
       <td class="center" width="40%">Title</td>
       <td class="center" width="20%">Price</td>
       <td class="center last" width="20%">Status</td>
     </tr>
     <tr class="td1">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
   </table>
   <table class="tbl">
     <tr class="head">
       <td colspan="6" style="background-color:#656968">Tuesday</td>
     </tr>
     <tr class="head">
       <td class="center" width="20%">Code</td>
       <td class="center" width="40%">Title</td>
       <td class="center" width="20%">Price</td>
       <td class="center last" width="20%">Status</td>
     </tr>
     <tr class="td1">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
   </table>
   <table class="tbl">
     <tr class="head">
       <td colspan="6" style="background-color:#656968">Wednesday</td>
     </tr>
     <tr class="head">
       <td class="center" width="20%">Code</td>
       <td class="center" width="40%">Title</td>
       <td class="center" width="20%">Price</td>
       <td class="center last" width="20%">Status</td>
     </tr>
     <tr class="td1">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
     <tr class="td2">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
     <tr class="td1">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
   </table>
   <table class="tbl">
     <tr class="head">
       <td colspan="6" style="background-color:#656968">Thursday</td>
     </tr>
     <tr class="head">
       <td class="center" width="20%">Code</td>
       <td class="center" width="40%">Title</td>
       <td class="center" width="20%">Price</td>
       <td class="center last" width="20%">Status</td>
     </tr>
     <tr class="td1">
       <td class="center">Code 1</td>
       <td class="center">Name 1</td>
       <td class="center">1.234</td>
       <td class="center last">
         <span class="green">Closed</span>
       </td>
     </tr>
   </table>
 </div>

这是我麦圈:

const sel = "tr.td1, tr.td2";
$(sel).each(function (i, e) {

  $(this).find("td:first").each(function (i, e) {
    code.push({
      code: $(this).text().trim()
    })
  });
  $(this).find("td:eq(1)").each(function (i, e) {
    title.push({
      title: $(this).text().trim()
    })
  });
  $(this).find("td:eq(2)").each(function (i, e) {
    price.push({
      price: $(this).text().trim()
    })
  });
  $(this).find("td:eq(3)").each(function (i, e) {
    status.push({
      status: $(this).text().trim()
    })
  });
let merged = [];
for (var i = 0; i < code.length; i++) {
  merged.push({
    ...code[i],
    ...title[i],
    ...price[i],
    ...status[i]
  })
}

是的,我能得到我希望的阵列,看起来像

[
  {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
  }
]

我需要的是,在json里面我有一个日期值,也就是位置在头合并处,我需要的最终结果是这样的

[
  {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Monday"

  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Monday"
  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Monday"
  },
      {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Tuesday"

  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Tuesday"
  },
 {
    "code": "Code 1",
    "title": "Name 1",
    "price": "1.234",
    "status": "Closed",
    "group": "Tuesday"
  }
]
bis0qfac

bis0qfac1#

与其从底部开始,然后再回到父组,我倒不如遍历父组,然后获取它们的子组,并在其中包含所需的分组信息,然后可以创建一个按组组织的嵌套结构,或者将其展开为预期的结果:

const cheerio = require("cheerio"); // 1.0.0-rc.12

const html = `<Your HTML from the question>`;
const headers = ["code", "title", "price", "status"];
const $ = cheerio.load(html);
const data = [...$(".tbl")].flatMap(table =>
  [...$(table).find(".td1, td2")].map(row => ({
    ...Object.fromEntries([...$(row).find("td")].map((e, i) =>
      [headers[i], $(e).text().trim()]
    )),
    group: $(table).find(".head").first().text().trim(),
  }))
);
console.log(data);

相关问题