@@ -86,6 +86,16 @@ def __init__(self, pipeline_options: PdfPipelineOptions) -> None:
8686 self .model = getattr (self .dotsocr_options , "model" , "dots-mocr" )
8787 self .max_completion_tokens = self .dotsocr_options .max_completion_tokens
8888 self .timeout = self .dotsocr_options .timeout
89+ retry_count = getattr (self .dotsocr_options , "retry_count" , 2 )
90+ try :
91+ retry_count = int (retry_count )
92+ except (TypeError , ValueError ):
93+ _log .warning (
94+ "Invalid genos_layout_options.retry_count=%r. Falling back to 2." ,
95+ retry_count ,
96+ )
97+ retry_count = 2
98+ self .retry_count = max (0 , retry_count )
8999
90100 def _use_dotsocr_table_structure (self ) -> bool :
91101 return (
@@ -585,47 +595,68 @@ def _process_page(self, conv_res: ConversionResult, page: Page) -> Page:
585595 # 바이트 스트림을 base64로 인코딩
586596 base64_image = base64 .b64encode (buffer .getvalue ()).decode ("utf-8" )
587597
588- response_text = call_vlm_server (
589- prompt = prompt ,
590- base64_image = base64_image ,
591- url = self .dotocr_endpoint ,
592- api_key = self .api_key ,
593- model = self .model ,
594- max_completion_tokens = self .max_completion_tokens ,
595- timeout = self .timeout ,
596- )
598+ total_attempts = self .retry_count + 1
599+ response = None
600+ result = None
601+ for attempt in range (1 , total_attempts + 1 ):
602+ try :
603+ response_text = call_vlm_server (
604+ prompt = prompt ,
605+ base64_image = base64_image ,
606+ url = self .dotocr_endpoint ,
607+ api_key = self .api_key ,
608+ model = self .model ,
609+ max_completion_tokens = self .max_completion_tokens ,
610+ timeout = self .timeout ,
611+ )
612+ if not isinstance (response_text , str ) or not response_text .strip ():
613+ raise ValueError ("Empty VLM response text" )
614+ response = _parse_vlm_json_response (response_text )
615+ except Exception :
616+ if attempt >= total_attempts :
617+ raise
618+ _log .warning (
619+ "DotsOCR layout request failed (page=%s, attempt=%d/%d). Retrying..." ,
620+ page .page_no ,
621+ attempt ,
622+ total_attempts ,
623+ exc_info = True ,
624+ )
625+ continue
626+
627+ result = _extract_layout_result_items (response )
628+ if isinstance (result , list ):
629+ if attempt > 1 :
630+ _log .info (
631+ "DotsOCR layout request recovered after retry (page=%s, attempt=%d/%d)." ,
632+ page .page_no ,
633+ attempt ,
634+ total_attempts ,
635+ )
636+ break
637+
638+ if attempt < total_attempts :
639+ _log .warning (
640+ "Unexpected VLM response schema (page=%s, attempt=%d/%d). Retrying. Parsed type=%s; value=%r" ,
641+ page .page_no ,
642+ attempt ,
643+ total_attempts ,
644+ type (response ).__name__ ,
645+ response ,
646+ )
647+ continue
597648
598- # 디버그용으로 response_text 화면에 출력
599- # print("VLM Response Data:", json.dumps(json.loads(response_text), indent=2, ensure_ascii=False))
600-
601- response = _parse_vlm_json_response (response_text )
602- if isinstance (response , dict ):
603- result = response .get ("result" )
604- if result is None :
605- # Fallback for providers that use a different list key.
606- result = response .get ("items" )
607- elif isinstance (response , list ):
608- result = response
609- else :
610- result = None
611-
612- if isinstance (result , str ):
613- nested = _parse_vlm_json_response (result )
614- if isinstance (nested , dict ):
615- result = nested .get ("result" )
616- if result is None :
617- result = nested .get ("items" )
618- elif isinstance (nested , list ):
619- result = nested
620-
621- if not isinstance (result , list ):
622649 _log .warning (
623- "Unexpected VLM response schema. Parsed type=%s; falling back to empty predictions. value=%r" ,
650+ "Unexpected VLM response schema after retries (page=%s, attempts=%d). Falling back to empty predictions. Parsed type=%s; value=%r" ,
651+ page .page_no ,
652+ total_attempts ,
624653 type (response ).__name__ ,
625654 response ,
626655 )
627656 result = []
628657
658+ assert isinstance (result , list )
659+
629660 clusters = []
630661 raw_table_html_by_cluster_id : dict [int , str ] = {}
631662 raw_formula_latex_by_cluster_id : dict [int , str ] = {}
@@ -905,6 +936,29 @@ def call_vlm_server(
905936 raise ValueError (f"응답 파싱 오류: { e } \n 응답 본문: { response .text } " ) from e
906937
907938
939+ def _extract_layout_result_items (response ):
940+ if isinstance (response , dict ):
941+ result = response .get ("result" )
942+ if result is None :
943+ # Fallback for providers that use a different list key.
944+ result = response .get ("items" )
945+ elif isinstance (response , list ):
946+ result = response
947+ else :
948+ result = None
949+
950+ if isinstance (result , str ):
951+ nested = _parse_vlm_json_response (result )
952+ if isinstance (nested , dict ):
953+ result = nested .get ("result" )
954+ if result is None :
955+ result = nested .get ("items" )
956+ elif isinstance (nested , list ):
957+ result = nested
958+
959+ return result
960+
961+
908962def _parse_vlm_json_response (response_text : str ):
909963 if not isinstance (response_text , str ):
910964 raise TypeError (
0 commit comments