Skip to content

Commit dc74f2f

Browse files
committed
shrink text: make epub final output instead of OEB folder
1 parent 03cb5ee commit dc74f2f

1 file changed

Lines changed: 116 additions & 7 deletions

File tree

library/mediafiles/process_text.py

Lines changed: 116 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,77 @@ def update_references(path, replacements):
6161
log.exception("Error occurred while updating references %s", path)
6262

6363

64+
def get_media_type(ext):
65+
"""Return MIME type for a file extension"""
66+
ext = ext.lower()
67+
media_types = {
68+
".avif": "image/avif",
69+
".jpg": "image/jpeg",
70+
".jpeg": "image/jpeg",
71+
".png": "image/png",
72+
".gif": "image/gif",
73+
".svg": "image/svg+xml",
74+
".webp": "image/webp",
75+
".bmp": "image/bmp",
76+
}
77+
return media_types.get(ext)
78+
79+
80+
def update_manifest(output_path, avif_files):
81+
"""Update content.opf manifest with converted image references"""
82+
manifest_path = output_path / "content.opf"
83+
84+
try:
85+
with open(manifest_path, "r", encoding="utf-8") as f:
86+
content = f.read()
87+
except Exception as e:
88+
log.warning("Failed to read content.opf: %s", e)
89+
return
90+
91+
modified = False
92+
93+
# Build mapping of old basename -> new basename for converted files
94+
for old_path, new_path in avif_files.items():
95+
if not new_path:
96+
continue
97+
98+
old_name = os.path.basename(old_path)
99+
new_name = os.path.basename(new_path)
100+
new_ext = os.path.splitext(new_name)[1]
101+
new_media_type = get_media_type(new_ext)
102+
103+
# Replace href attributes in manifest items
104+
# Pattern: href="images/old.jpg" -> href="images/old.avif"
105+
if f'href="{old_name}"' in content:
106+
content = content.replace(f'href="{old_name}"', f'href="{new_name}"')
107+
modified = True
108+
if f"href='{old_name}'" in content:
109+
content = content.replace(f"href='{old_name}'", f"href='{new_name}'")
110+
modified = True
111+
112+
# Update media-type attribute for this item
113+
if new_media_type:
114+
# Find the item line containing this file and update its media-type
115+
lines = content.split("\n")
116+
for i, line in enumerate(lines):
117+
if "<item" in line and old_name in line:
118+
# Update media-type attribute
119+
old_media_type = get_media_type(os.path.splitext(old_name)[1])
120+
if old_media_type and f'media-type="{old_media_type}"' in line:
121+
lines[i] = line.replace(
122+
f'media-type="{old_media_type}"', f'media-type="{new_media_type}"'
123+
)
124+
modified = True
125+
content = "\n".join(lines)
126+
127+
if modified:
128+
try:
129+
with open(manifest_path, "w", encoding="utf-8") as f:
130+
f.write(content)
131+
except Exception as e:
132+
log.warning("Failed to write content.opf: %s", e)
133+
134+
64135
def convert_to_text_pdf(args, path):
65136
import ocrmypdf, ocrmypdf.exceptions
66137

@@ -200,11 +271,14 @@ def process_path(args, path) -> str | None:
200271
processes.cmd(*command, limit_ram=True, nice=6)
201272
except subprocess.CalledProcessError:
202273
log.exception("[%s]: Calibre failed to process book. Skipping...", str(path))
274+
if output_path.exists():
275+
devices.rmtree(args, output_path) # Remove transcode
203276
return str(path)
204277

205278
if not output_path.exists() or path_utils.is_empty_folder(output_path):
206-
output_path.unlink() # Remove transcode
207279
log.error("Could not transcode %s", path)
280+
if output_path.exists():
281+
output_path.rmdir()
208282
return str(path)
209283

210284
# replace CSS
@@ -231,16 +305,51 @@ def process_path(args, path) -> str | None:
231305
for text_path in text_paths:
232306
update_references(text_path, replacements)
233307

234-
# compare final output size
235-
if args.delete_larger and path_utils.folder_size(output_path) > original_stats.st_size:
236-
devices.rmtree(args, output_path) # Remove transcode
308+
# Update content.opf manifest with converted image references
309+
if any(avif_files.values()):
310+
update_manifest(output_path, avif_files)
311+
312+
# Repackage to EPUB using content.opf as input
313+
opf_path = output_path / "content.opf"
314+
epub_path = output_path.with_suffix(".OEB.epub")
315+
epub_path = Path(devices.clobber_new_file(args, str(epub_path)))
316+
317+
epub_command = [
318+
"ebook-convert",
319+
str(opf_path),
320+
str(epub_path),
321+
"--no-default-epub-cover",
322+
"--epub-inline-toc",
323+
"--dont-split-on-page-breaks",
324+
]
325+
326+
if args.simulate:
327+
print(shlex.join(epub_command))
328+
return str(path)
329+
330+
try:
331+
processes.cmd(*epub_command, limit_ram=True, nice=6)
332+
except subprocess.CalledProcessError:
333+
log.exception("[%s]: Calibre failed to package EPUB", path)
334+
epub_path.unlink(missing_ok=True) # Remove transcode
335+
return str(path)
336+
337+
if not epub_path.exists():
338+
log.error("Could not create EPUB %s", path)
237339
return str(path)
238340

239-
if args.delete_larger:
341+
# Clean up .OEB folder
342+
devices.rmtree(args, output_path)
343+
344+
# compare final output size
345+
epub_size = os.path.getsize(epub_path)
346+
if args.delete_larger and epub_size > original_stats.st_size:
347+
epub_path.unlink(missing_ok=True) # Remove transcode
348+
return str(path)
349+
elif args.delete_larger:
240350
path.unlink() # Remove original
241-
path_utils.folder_utime(output_path, (original_stats.st_atime, original_stats.st_mtime))
242351

243-
return str(output_path)
352+
return str(epub_path)
244353

245354

246355
def process_text():

0 commit comments

Comments
 (0)