我有一个scraper,我试图通过POST请求来获取数据,由于这个特定post数据分页的性质,我想在parse_links
方法中使用来自start_requests
的post_data,这样我就可以实现正确的分页。问题是当我试图打印response.meta.get('data_post')
时,它可以工作,但当将它传递给POST请求本身时,它会给出AttributeError: 'NoneType' object has no attribute 'format'
。如何在parse_links
方法中访问data_post
?
下面是我的代码:
import scrapy
import requests
from olx_egypt.items import OlxEgyptItem
class OlxScraper(scrapy.Spider):
name = "olx-eg-scraper"
custom_settings = {
"LOG_FILE": "olx_eg.log",
"ITEM_PIPELINES": {"olx_egypt.pipelines.OlxEgPipeline": 300},
}
listing_endpoint = "https://search.olx.com.eg/_msearch?filter_path=took%2C*.took%2C*.suggest.*.options.text%2C*.suggest.*.options._source.*%2C*.hits.total.*%2C*.hits.hits._source.*%2C*.hits.hits.highlight.*%2C*.error%2C*.aggregations.*.buckets.key%2C*.aggregations.*.buckets.doc_count%2C*.aggregations.*.buckets.complex_value.hits.hits._source%2C*.aggregations.*.filtered_agg.facet.buckets.key%2C*.aggregations.*.filtered_agg.facet.buckets.doc_count%2C*.aggregations.*.filtered_agg.facet.buckets.complex_value.hits.hits._source"
headers = {
"authority": "search.olx.com.eg",
"accept": "*/*",
"accept-language": "en,ru;q=0.9",
"authorization": "Basic b2x4LWVnLXByb2R1Y3Rpb24tc2VhcmNoOn1nNDM2Q0R5QDJmWXs2alpHVGhGX0dEZjxJVSZKbnhL",
"content-type": "application/x-ndjson",
"origin": "https://www.olx.com.eg",
"referer": "https://www.olx.com.eg/",
"sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="100", "Yandex";v="22"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.143 YaBrowser/22.5.0.1879 (beta) Yowser/2.5 Safari/537.36",
}
cities = [
"1-5",
"1-68",
"1-6",
"1-71",
"1-65",
"1-57",
"1-60",
"1-66",
"1-59",
"1-70",
"1-67",
"1-50",
"1-73",
"1-69",
"1-62",
"1-58",
"1-55",
"1-61",
"1-64",
"1-72",
"1-63",
"1-77",
"1-56",
"1-76",
"1-54",
"1-74",
]
city_ids = [
"2-288",
"2-381",
"2-408",
"2-332",
"2-287",
"2-320",
"2-322",
"2-263",
"2-382",
"2-319",
"2-380",
"2-269",
"2-398",
"2-359",
"2-424",
"2-385",
"2-286",
"2-321",
"2-140",
"2-295",
"2-134",
"2-139",
"2-133",
"2-131",
"2-333",
"2-393",
"2-391",
"2-132",
"2-383",
"2-144",
"2-401",
"2-289",
"2-389",
"2-386",
"2-292",
"2-136",
"2-143",
"2-150",
"2-397",
"2-142",
"2-217",
"2-205",
"2-212",
"2-213",
"2-417",
"2-230",
"2-194",
"2-210",
"2-211",
"2-413",
"2-384",
"2-222",
"2-197",
"2-198",
"2-208",
"2-195",
"2-414",
"2-221",
"2-233",
"2-207",
"2-204",
"2-199",
"2-219",
"2-201",
"2-224",
"2-229",
"2-200",
"2-202",
"2-227",
"2-196",
"2-411",
"2-206",
"2-422",
"2-203",
"2-228",
"2-317",
"2-309",
"2-301",
"2-303",
"2-113",
"2-318",
"2-302",
"2-312",
"2-119",
"2-304",
"2-299",
"2-100",
"2-390",
"2-308",
"2-412",
"2-307",
"2-109",
"2-297",
"2-416",
"2-124",
"2-106",
"2-99",
"2-300",
"2-306",
"2-305",
"2-115",
"2-116",
"2-121",
"2-311",
"2-112",
"2-362",
"2-104",
"2-125",
"2-105",
"2-107",
"2-111",
"2-96",
"2-114",
"2-117",
"2-95",
"2-387",
"2-172",
"2-174",
"2-171",
"2-173",
"2-178",
"2-175",
"2-177",
"2-79",
"2-165",
"2-77",
"2-94",
"2-166",
"2-78",
"2-365",
"2-159",
"2-377",
"2-373",
"2-420",
"2-376",
"2-370",
"2-368",
"2-371",
"2-363",
"2-372",
"2-375",
"2-367",
"2-366",
"2-369",
"2-157",
"2-158",
"2-364",
"2-374",
"2-419",
"2-40",
"2-34",
"2-35",
"2-37",
"2-42",
"2-32",
"2-27",
"2-26",
"2-39",
"2-36",
"2-38",
"2-28",
"2-33",
"2-30",
"2-41",
"2-29",
"2-31",
"2-80",
"2-87",
"2-86",
"2-84",
"2-83",
"2-82",
"2-85",
"2-81",
"2-23",
"2-19",
"2-16",
"2-15",
"2-164",
"2-415",
"2-22",
"2-17",
"2-24",
"2-25",
"2-20",
"2-21",
"2-179",
"2-409",
"2-183",
"2-399",
"2-182",
"2-180",
"2-181",
"2-184",
"2-162",
"2-89",
"2-92",
"2-88",
"2-90",
"2-91",
"2-93",
"2-326",
"2-328",
"2-327",
"2-331",
"2-329",
"2-330",
"2-325",
"2-245",
"2-242",
"2-243",
"2-247",
"2-246",
"2-249",
"2-251",
"2-250",
"2-244",
"2-248",
"2-252",
"2-396",
"2-189",
"2-187",
"2-191",
"2-193",
"2-185",
"2-192",
"2-61",
"2-57",
"2-62",
"2-56",
"2-54",
"2-55",
"2-53",
"2-60",
"2-59",
"2-58",
"2-6",
"2-10",
"2-14",
"2-127",
"2-5",
"2-9",
"2-378",
"2-11",
"2-2",
"2-12",
"2-3",
"2-1",
"2-8",
"2-7",
"2-13",
"2-4",
"2-126",
"2-404",
"2-339",
"2-341",
"2-344",
"2-337",
"2-343",
"2-342",
"2-346",
"2-340",
"2-345",
"2-338",
"2-50",
"2-51",
"2-43",
"2-49",
"2-45",
"2-52",
"2-48",
"2-47",
"2-46",
"2-44",
"2-74",
"2-76",
"2-160",
"2-161",
"2-73",
"2-75",
"2-72",
"2-235",
"2-241",
"2-234",
"2-239",
"2-240",
"2-238",
"2-236",
"2-237",
"2-163",
"2-392",
"2-63",
"2-67",
"2-64",
"2-66",
"2-69",
"2-71",
"2-65",
"2-68",
"2-70",
"2-274",
"2-277",
"2-418",
"2-316",
"2-282",
"2-281",
"2-276",
"2-283",
"2-284",
"2-278",
"2-279",
"2-280",
"2-285",
"2-350",
"2-356",
"2-347",
"2-351",
"2-357",
"2-349",
"2-353",
"2-348",
"2-354",
"2-355",
"2-265",
"2-268",
"2-271",
"2-272",
"2-270",
"2-315",
"2-266",
"2-167",
"2-334",
"2-170",
"2-336",
"2-169",
"2-335",
"2-168",
"2-258",
"2-253",
"2-254",
"2-256",
]
post_data = [
'{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":0,"track_total_hits":false,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"category.lvl1.externalID":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.lvl0.externalID":"138"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"category.lvl1.externalID","size":20}}}}}}}}}}}},"location.lvl2":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl1.externalID":"{city}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl2.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl2"]}}}}}}}}}}}}}}}}}},"location.lvl3":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl2.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl3.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl3"]}}}}}}}}}}}}}}}}}},"product":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["product"]}}}}}}}}}}}}}}}}}},"totalProductCount":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["totalProductCount"]}}}}}}}}}}}}}}}}}}}}}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":45,"track_total_hits":200000,"query":{{"function_score":{{"random_score":{{"seed":581}},"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}}}}}},"sort":["_score"]}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":{page},"size":{size},"track_total_hits":200000,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"sort":[{{"timestamp":{{"order":"desc"}}}},{{"id":{{"order":"desc"}}}}]}}\n',
'{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":0,"track_total_hits":false,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"category.lvl1.externalID":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.lvl0.externalID":"138"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"category.lvl1.externalID","size":20}}}}}}}}}}}},"location.lvl2":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl1.externalID":"{city}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl2.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl2"]}}}}}}}}}}}}}}}}}},"location.lvl3":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl2.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl3.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl3"]}}}}}}}}}}}}}}}}}},"product":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["product"]}}}}}}}}}}}}}}}}}},"totalProductCount":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["totalProductCount"]}}}}}}}}}}}}}}}}}}}}}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":45,"track_total_hits":200000,"query":{{"function_score":{{"random_score":{{"seed":234}},"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}}}}}},"sort":["_score"]}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":{page},"size":{size},"track_total_hits":200000,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"sort":[{{"_score":{{"order":"desc"}}}}]}}\n',
'{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":0,"track_total_hits":false,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"category.lvl1.externalID":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.lvl0.externalID":"138"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"category.lvl1.externalID","size":20}}}}}}}}}}}},"location.lvl2":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl1.externalID":"{city}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl2.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl2"]}}}}}}}}}}}}}}}}}},"location.lvl3":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl2.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl3.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl3"]}}}}}}}}}}}}}}}}}},"product":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["product"]}}}}}}}}}}}}}}}}}},"totalProductCount":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["totalProductCount"]}}}}}}}}}}}}}}}}}}}}}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":45,"track_total_hits":200000,"query":{{"function_score":{{"random_score":{{"seed":936}},"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}}}}}},"sort":["_score"]}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":{page},"size":{size},"track_total_hits":200000,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"sort":[{{"extraFields.price":{{"order":"asc"}}}},{{"extraFields.salary_to":{{"order":"asc"}}}},{{"id":{{"order":"desc"}}}}]}}\n',
'{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":0,"track_total_hits":false,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"category.lvl1.externalID":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.lvl0.externalID":"138"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"category.lvl1.externalID","size":20}}}}}}}}}}}},"location.lvl2":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl1.externalID":"{city}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl2.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl2"]}}}}}}}}}}}}}}}}}},"location.lvl3":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.lvl2.externalID":"{_id}"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"location.lvl3.externalID","size":40}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["location.lvl3"]}}}}}}}}}}}}}}}}}},"product":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["product"]}}}}}}}}}}}}}}}}}},"totalProductCount":{{"global":{{}},"aggs":{{"filtered_agg":{{"filter":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}}]}}}},"aggs":{{"facet":{{"terms":{{"field":"product","size":20}},"aggs":{{"complex_value":{{"top_hits":{{"size":1,"_source":{{"include":["totalProductCount"]}}}}}}}}}}}}}}}}}}}}}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":0,"size":45,"track_total_hits":200000,"query":{{"function_score":{{"random_score":{{"seed":99}},"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"product":"featured"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}}}}}},"sort":["_score"]}}\n{{"index":"olx-eg-production-ads-ar"}}\n{{"from":{page},"size":{size},"track_total_hits":200000,"query":{{"bool":{{"must":[{{"term":{{"category.slug":"properties"}}}},{{"term":{{"location.externalID":"{_id}"}}}}]}}}},"sort":[{{"extraFields.price":{{"order":"desc"}}}},{{"extraFields.salary_to":{{"order":"desc"}}}},{{"id":{{"order":"desc"}}}}]}}\n',
]
def start_requests(self):
page = 0
size = 0
for data in self.post_data:
for city in self.cities:
for _id in self.city_ids:
meta = dict(data_post=data, main_city=city, sub_city=_id)
yield scrapy.Request(
url=self.listing_endpoint,
method="POST",
headers=self.headers,
body=data.format(city=city, _id=_id, page=page, size=size),
callback=self.parse_links,
meta=meta,
)
def parse_links(self, response):
size = 45
page = 45
# dp = response.meta["data_post"]
# mc = response.meta["main_city"]
# sc = response.meta["sub_city"]
meta = {
"data": response.meta["data_post"],
"main_city": response.meta["main_city"],
"sub_city": response.meta["sub_city"],
}
total = response.json()["responses"][2]["hits"]["total"]["value"]
# print("data-post", dp.format(city=mc, _id=sc, page=page, size=size))
if total <= 100000 or total >= 10000:
for i in range(0, 9955):
page = i + 1
yield scrapy.Request(
url=self.listing_endpoint,
method="POST",
headers=self.headers,
body=meta["data"].format(
city=meta["main_city"],
_id=meta["sub_city"],
page=page,
size=size,
),
callback=self.parse_links,
meta=meta,
)
if total < 10000:
for i in range(0, total):
page = i + 1
yield scrapy.Request(
url=self.listing_endpoint,
method="POST",
headers=self.headers,
body=meta["data"].format(
city=meta["main_city"],
_id=meta["sub_city"],
page=page,
size=size,
),
callback=self.parse_links,
meta=meta,
)
if "hits" in response.json()["responses"][1]["hits"]:
try:
listing_data = response.json()["responses"][2]["hits"]["hits"]
except:
listing_data = response.json()["responses"][1]["hits"]["hits"]
for listing in listing_data:
listing_id = listing["_source"]["externalID"]
listing_url = "https://www.olx.com.eg/en/ad/" + listing_id
yield scrapy.Request(
url=listing_url,
headers=self.headers,
callback=self.parse_details,
meta={"listing_url": listing_url},
)
def parse_details(self, response):
item = OlxEgyptItem()
reference_id = response.css("div._171225da::text").get().replace("Ad id ", "")
sub_detail_list = response.css("div._676a547f ::text").extract()
item["URL"] = response.meta.get("listing_url")
try:
item["Breadcrumb"] = (
response.css("li._8c543153 ::text")[4].get()
+ "/"
+ response.css("li._8c543153 ::text")[3].get()
+ "/"
+ response.css("li._8c543153 ::text")[2].get()
+ "/"
+ response.css("li._8c543153 ::text")[1].get()
+ "/"
+ response.css("li._8c543153 ::text").get()
)
except:
item["Breadcrumb"] = (
+response.css("li._8c543153 ::text")[3].get()
+ "/"
+ response.css("li._8c543153 ::text")[2].get()
+ "/"
+ response.css("li._8c543153 ::text")[1].get()
+ "/"
+ response.css("li._8c543153 ::text").get()
)
item["Price"] = response.css("span._56dab877 ::text").get()
item["Title"] = response.css("h1.a38b8112::text").get()
item["Type"] = response.css("div.b44ca0b3 ::text")[1].get()
item["Bedrooms"] = response.css("span.c47715cd::text").get()
try:
item["Bathrooms"] = response.css("span.c47715cd::text")[1].get()
except:
item["Bathrooms"] = ""
try:
item["Area"] = response.css("span.c47715cd::text")[2].get()
except:
for sub in sub_detail_list:
if "Area (m²)" in sub_detail_list:
item["Area"] = sub_detail_list[
sub_detail_list.index("Area (m²)") + 1
]
else:
item["Area"] = ""
item["Location"] = response.css("span._8918c0a8::text").get()
try:
if response.css("div.b44ca0b3 ::text")[18].get() == "Compound":
item["Compound"] = response.css("div.b44ca0b3 ::text")[19].get()
elif response.css("div.b44ca0b3 ::text")[16].get() == "Compound":
item["Compound"] = response.css("div.b44ca0b3 ::text")[17].get()
except:
item["Compound"] = ""
item["seller"] = response.css("span._261203a9._2e82a662::text").getall()[1]
member_since = response.css("span._34a7409b ::text")[1].get()
if member_since == "Cars for Sale":
item["Seller_member_since"] = response.css("span._34a7409b ::text").get()
if "Commercial ID: " in member_since:
item["Seller_member_since"] = response.css("span._34a7409b ::text")[2].get()
else:
item["Seller_member_since"] = member_since
res = requests.get(
f"https://www.olx.com.eg/api/listing/{reference_id}/contactInfo/"
)
item["Seller_phone_number"] = res.json()["mobile"]
item["Description"] = (
response.css("div._0f86855a ::text").get().replace("\n", "")
)
item["Amenities"] = ",".join(response.css("div._27f9c8ac ::text").extract())
item["Reference"] = reference_id
item["Listed_date"] = response.css("span._8918c0a8 ::text")[1].get()
item["Level"] = ""
item["Payment_option"] = ""
item["Delivery_term"] = ""
item["Furnished"] = ""
item["Delivery_date"] = ""
item["Down_payment"] = ""
for sub_detail in sub_detail_list:
if "Level" in sub_detail_list:
item["Level"] = sub_detail_list[sub_detail_list.index("Level") + 1]
if "Payment Option" in sub_detail_list:
item["Payment_option"] = sub_detail_list[
sub_detail_list.index("Payment Option") + 1
]
if "Delivery Term" in sub_detail_list:
item["Delivery_term"] = sub_detail_list[
sub_detail_list.index("Delivery Term") + 1
]
if "Furnished" in sub_detail_list:
item["Furnished"] = sub_detail_list[
sub_detail_list.index("Furnished") + 1
]
if "Delivery Date" in sub_detail_list:
item["Delivery_date"] = sub_detail_list[
sub_detail_list.index("Delivery Date") + 1
]
if "Down Payment" in sub_detail_list:
item["Down_payment"] = sub_detail_list[
sub_detail_list.index("Down Payment") + 1
]
item["Image_url"] = response.css("picture._219b7e0a ::attr(srcset)")[1].get()
yield item
1条答案
按热度按时间frebpwbc1#
您应该考虑在
parse_links
方法的两个回调中添加 meta参数。主要的想法是将它们设置回原来的位置,因为随着递归的深入,你会一次又一次地得到它们。