@@ -61,6 +61,77 @@ def update_references(path, replacements):
6161 log .exception ("Error occurred while updating references %s" , path )
6262
6363
64+ def get_media_type (ext ):
65+ """Return MIME type for a file extension"""
66+ ext = ext .lower ()
67+ media_types = {
68+ ".avif" : "image/avif" ,
69+ ".jpg" : "image/jpeg" ,
70+ ".jpeg" : "image/jpeg" ,
71+ ".png" : "image/png" ,
72+ ".gif" : "image/gif" ,
73+ ".svg" : "image/svg+xml" ,
74+ ".webp" : "image/webp" ,
75+ ".bmp" : "image/bmp" ,
76+ }
77+ return media_types .get (ext )
78+
79+
80+ def update_manifest (output_path , avif_files ):
81+ """Update content.opf manifest with converted image references"""
82+ manifest_path = output_path / "content.opf"
83+
84+ try :
85+ with open (manifest_path , "r" , encoding = "utf-8" ) as f :
86+ content = f .read ()
87+ except Exception as e :
88+ log .warning ("Failed to read content.opf: %s" , e )
89+ return
90+
91+ modified = False
92+
93+ # Build mapping of old basename -> new basename for converted files
94+ for old_path , new_path in avif_files .items ():
95+ if not new_path :
96+ continue
97+
98+ old_name = os .path .basename (old_path )
99+ new_name = os .path .basename (new_path )
100+ new_ext = os .path .splitext (new_name )[1 ]
101+ new_media_type = get_media_type (new_ext )
102+
103+ # Replace href attributes in manifest items
104+ # Pattern: href="images/old.jpg" -> href="images/old.avif"
105+ if f'href="{ old_name } "' in content :
106+ content = content .replace (f'href="{ old_name } "' , f'href="{ new_name } "' )
107+ modified = True
108+ if f"href='{ old_name } '" in content :
109+ content = content .replace (f"href='{ old_name } '" , f"href='{ new_name } '" )
110+ modified = True
111+
112+ # Update media-type attribute for this item
113+ if new_media_type :
114+ # Find the item line containing this file and update its media-type
115+ lines = content .split ("\n " )
116+ for i , line in enumerate (lines ):
117+ if "<item" in line and old_name in line :
118+ # Update media-type attribute
119+ old_media_type = get_media_type (os .path .splitext (old_name )[1 ])
120+ if old_media_type and f'media-type="{ old_media_type } "' in line :
121+ lines [i ] = line .replace (
122+ f'media-type="{ old_media_type } "' , f'media-type="{ new_media_type } "'
123+ )
124+ modified = True
125+ content = "\n " .join (lines )
126+
127+ if modified :
128+ try :
129+ with open (manifest_path , "w" , encoding = "utf-8" ) as f :
130+ f .write (content )
131+ except Exception as e :
132+ log .warning ("Failed to write content.opf: %s" , e )
133+
134+
64135def convert_to_text_pdf (args , path ):
65136 import ocrmypdf , ocrmypdf .exceptions
66137
@@ -200,11 +271,14 @@ def process_path(args, path) -> str | None:
200271 processes .cmd (* command , limit_ram = True , nice = 6 )
201272 except subprocess .CalledProcessError :
202273 log .exception ("[%s]: Calibre failed to process book. Skipping..." , str (path ))
274+ if output_path .exists ():
275+ devices .rmtree (args , output_path ) # Remove transcode
203276 return str (path )
204277
205278 if not output_path .exists () or path_utils .is_empty_folder (output_path ):
206- output_path .unlink () # Remove transcode
207279 log .error ("Could not transcode %s" , path )
280+ if output_path .exists ():
281+ output_path .rmdir ()
208282 return str (path )
209283
210284 # replace CSS
@@ -231,16 +305,51 @@ def process_path(args, path) -> str | None:
231305 for text_path in text_paths :
232306 update_references (text_path , replacements )
233307
234- # compare final output size
235- if args .delete_larger and path_utils .folder_size (output_path ) > original_stats .st_size :
236- devices .rmtree (args , output_path ) # Remove transcode
308+ # Update content.opf manifest with converted image references
309+ if any (avif_files .values ()):
310+ update_manifest (output_path , avif_files )
311+
312+ # Repackage to EPUB using content.opf as input
313+ opf_path = output_path / "content.opf"
314+ epub_path = output_path .with_suffix (".OEB.epub" )
315+ epub_path = Path (devices .clobber_new_file (args , str (epub_path )))
316+
317+ epub_command = [
318+ "ebook-convert" ,
319+ str (opf_path ),
320+ str (epub_path ),
321+ "--no-default-epub-cover" ,
322+ "--epub-inline-toc" ,
323+ "--dont-split-on-page-breaks" ,
324+ ]
325+
326+ if args .simulate :
327+ print (shlex .join (epub_command ))
328+ return str (path )
329+
330+ try :
331+ processes .cmd (* epub_command , limit_ram = True , nice = 6 )
332+ except subprocess .CalledProcessError :
333+ log .exception ("[%s]: Calibre failed to package EPUB" , path )
334+ epub_path .unlink (missing_ok = True ) # Remove transcode
335+ return str (path )
336+
337+ if not epub_path .exists ():
338+ log .error ("Could not create EPUB %s" , path )
237339 return str (path )
238340
239- if args .delete_larger :
341+ # Clean up .OEB folder
342+ devices .rmtree (args , output_path )
343+
344+ # compare final output size
345+ epub_size = os .path .getsize (epub_path )
346+ if args .delete_larger and epub_size > original_stats .st_size :
347+ epub_path .unlink (missing_ok = True ) # Remove transcode
348+ return str (path )
349+ elif args .delete_larger :
240350 path .unlink () # Remove original
241- path_utils .folder_utime (output_path , (original_stats .st_atime , original_stats .st_mtime ))
242351
243- return str (output_path )
352+ return str (epub_path )
244353
245354
246355def process_text ():
0 commit comments