Skip to content

Commit 1d2659f

Browse files
authored
[FIX] Add cookie in Collector, improve error handling and CI (#29)
* fix: add cookie in `Collector` * ci: use python 3.11 and 3.13 * fix: improve error handling in collect and downloadImage functions * fix: standardize header key for cookies to "Cookie"
1 parent 43c690f commit 1d2659f

9 files changed

Lines changed: 18 additions & 8 deletions

File tree

.github/workflows/test_on_schedule.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
fail-fast: false
1515
matrix:
1616
os: [ubuntu-latest, windows-latest, macos-latest]
17-
python-version: ["3.9", "3.10", "3.11"]
17+
python-version: ["3.9", "3.11", "3.13"]
1818

1919
steps:
2020
- uses: actions/checkout@v4

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
![](https://img.shields.io/pypi/v/pixiv-utils)
44
[![Daily test](https://github.com/CWHer/PixivCrawler/actions/workflows/test_on_schedule.yml/badge.svg)](https://github.com/CWHer/PixivCrawler/actions/workflows/test_on_schedule.yml)
5-
![](https://img.shields.io/badge/Python-3.9%20%7C%203.10%20%7C%203.11-green)
5+
![](https://img.shields.io/badge/Python-3.9%20%7C%203.11%20%7C%203.13-green)
66
![](https://img.shields.io/badge/Platform-Windows%20%7C%20Linux%20%7C%20MacOS-blue)
77

88
The Chinese version of README can be found [here](./README_CN.md).

pixiv_utils/pixiv_crawler/collector/collector.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,10 @@ def collect(self):
7878
f"https://www.pixiv.net/ajax/illust/{illust_id}/pages?lang=zh"
7979
for illust_id in self.id_group
8080
]
81+
# NOTE: Add COOKIE to collect R18 content
8182
additional_headers = [
8283
{
84+
"Cookie": user_config.cookie,
8385
"Referer": f"https://www.pixiv.net/artworks/{illust_id}",
8486
"x-user-id": user_config.user_id,
8587
}

pixiv_utils/pixiv_crawler/collector/collector_unit.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def collect(
3939
printInfo(f"{url} complete")
4040
return id_group
4141

42+
raise RuntimeError(f"Request failed with status code {response.status_code} for {url}")
43+
4244
except Exception as e:
4345
assertWarn(not debug_config.show_error, e)
4446
assertWarn(not debug_config.show_error, f"This is {i} attempt to collect {url}")

pixiv_utils/pixiv_crawler/crawlers/bookmark_crawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def _requestCount(self):
4444
url = self.user_url + "/bookmark/tags?lang=zh"
4545
printInfo("===== Requesting bookmark count =====")
4646

47-
headers = {"COOKIE": user_config.cookie}
47+
headers = {"Cookie": user_config.cookie}
4848
headers.update(network_config.headers)
4949
for i in range(download_config.retry_times):
5050
try:
@@ -104,7 +104,7 @@ def collect(self, artworks_per_json: int = 48):
104104
)
105105
)
106106

107-
additional_headers = {"COOKIE": user_config.cookie}
107+
additional_headers = {"Cookie": user_config.cookie}
108108
collect_bookmark_fn = functools.partial(
109109
collect, selector=selectBookmark, additional_headers=additional_headers
110110
)

pixiv_utils/pixiv_crawler/crawlers/keyword_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def collect(self, artworks_per_json: int = 60):
7575
for i in range(n_page):
7676
urls.add(url.format(i + 1))
7777

78-
additional_headers = {"COOKIE": user_config.cookie}
78+
additional_headers = {"Cookie": user_config.cookie}
7979
collect_keyword_fn = functools.partial(
8080
collect, selector=selectKeyword, additional_headers=additional_headers
8181
)

pixiv_utils/pixiv_crawler/crawlers/ranking_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def addDate(current: datetime.date, days):
7676
{
7777
"Referer": re.search("(.*)&p", url).group(1),
7878
"x-requested-with": "XMLHttpRequest",
79-
"COOKIE": user_config.cookie,
79+
"Cookie": user_config.cookie,
8080
}
8181
for url in urls
8282
]

pixiv_utils/pixiv_crawler/crawlers/users_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def collect(self):
2929
additional_headers = {
3030
"Referer": f"https://www.pixiv.net/users/{self.artist_id}/illustrations",
3131
"x-user-id": user_config.user_id,
32-
"COOKIE": user_config.cookie,
32+
"Cookie": user_config.cookie,
3333
}
3434
image_ids = collect(url, selectUser, additional_headers)
3535
if image_ids is not None:

pixiv_utils/pixiv_crawler/downloader/download_image.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,20 @@ def downloadImage(url: str, download_time: float = 10) -> float:
5353
if len(response.content) != image_size:
5454
time.sleep(download_config.fail_delay)
5555
download_time += 2
56-
continue
56+
raise RuntimeError(
57+
f"Image size mismatch: expected {image_size}, got {len(response.content)}"
58+
)
5759

5860
with open(image_path, "wb") as f:
5961
f.write(response.content)
6062
if debug_config.verbose:
6163
printInfo(f"{image_name} complete")
6264
return image_size / 2**20
6365

66+
raise RuntimeError(
67+
f"Request failed with status code {response.status_code} for {image_name}"
68+
)
69+
6470
except Exception as e:
6571
assertWarn(not debug_config.show_error, e)
6672
assertWarn(not debug_config.show_error, f"This is {i} attempt to download {image_name}")

0 commit comments

Comments
 (0)