@@ -183,71 +183,75 @@ def get_body(self):
183183 namespaces = NSPREFIXES )[0 ]
184184
185185 def search (self , search , result_type = 'text' , ignore_runs = True ):
186- '''Search for a regex, returns first matching element object or
187- None if nothing found. Will return the first element if match
188- spans multiple text elements.'''
186+ '''Search each paragraph for a regex, returns first matching
187+ element object or None if nothing found. Will return the
188+ first element if match spans multiple text elements.'''
189189 searchre = re .compile (search )
190190 result = None
191191 if ignore_runs :
192- para_list = [child for child in
193- self .document .iter ('{' + NSPREFIXES ['w' ] + '}p' )]
194- text_positions = []
195- raw_text = []
196- start = 0
197- for para in para_list :
198- for element in para .iter ('{' + NSPREFIXES ['w' ] + '}t' ):
199- if element .text :
200- raw_text .append (element .text )
201- text_positions .append ((start ,
202- start + len (element .text ) - 1 , element ))
203- start += len (element .text )
204- match = searchre .search ('' .join (raw_text ))
205- if match :
206- for value in text_positions :
207- if match .start () in range (value [0 ], value [1 ] + 1 ):
208- result = value [2 ]
209- break
192+ for paragraph in self .document .iter ('{' + NSPREFIXES ['w' ] + '}p' ):
193+ text_positions = []
194+ start = 0
195+ paragraph_string = get_text (paragraph )
196+ for text_element in paragraph .iter ('{' + NSPREFIXES ['w' ] +
197+ '}t' ):
198+ if text_element .text :
199+ text_positions .append ({'start' : start ,
200+ 'end' : start + len (text_element .text ) - 1 ,
201+ 'element' : text_element })
202+ start += len (text_element .text )
203+ match = searchre .search (paragraph_string )
204+ if match :
205+ for position in text_positions :
206+ if match .start () in range (position ['start' ],
207+ position ['end' ] + 1 ):
208+ result = position ['element' ]
209+ break
210+ break
210211 else :
211212 for element in self .document .iter ('{' + NSPREFIXES ['w' ] + '}t' ):
212213 if element .text and searchre .search (element .text ):
213214 result = element
214215 break
215216 if result is not None :
216217 if result_type .lower () == 'paragraph' :
217- while not result .tag == '{' + NSPREFIXES ['w' ] + '}p' :
218- result = result .getparent ()
218+ if (result .iterancestors ('{' + NSPREFIXES ['w' ] + '}p' )
219+ is not None ):
220+ while not result .tag == '{' + NSPREFIXES ['w' ] + '}p' :
221+ result = result .getparent ()
222+ else :
223+ raise
219224 elif result_type .lower () == 'run' :
220- while not result .tag == '{' + NSPREFIXES ['w' ] + '}r' :
221- result = result .getparent ()
225+ if (result .iterancestors ('{' + NSPREFIXES ['w' ] + '}r' )
226+ is not None ):
227+ while not result .tag == '{' + NSPREFIXES ['w' ] + '}r' :
228+ result = result .getparent ()
229+ else :
230+ raise
222231 return result
223232
224233 def replace (self , search , replace , ignore_runs = True ):
225234 '''Replace all occurrences of string with a different string.
226235 If ignore_runs is true, the function will ignore separate run
227- and text elements and instead search each raw paragraph text
236+ and text elements and instead search each paragraph text
228237 content as a single string. Note that this will also ignore
229238 formatting elements within a paragraph such as tabs, which
230239 may cause unexpected results. Set ignore_runs to false if you
231240 want a more conservative search.'''
232241 searchre = re .compile (search )
233242 if ignore_runs :
234- para_list = [child for child in
235- self .document .iter ('{' + NSPREFIXES ['w' ] + '}p' )]
236- for para in para_list :
237- paratext = []
243+ for paragraph_element in self .document .iter ('{' + NSPREFIXES ['w' ] +
244+ '}p' ):
238245 rundict = collections .OrderedDict ()
239246 start = 0
240- for element in para .iter ('{' + NSPREFIXES ['w' ] + '}r' ):
241- runtext = []
242- for subelement in element .iter ('{' + NSPREFIXES ['w' ] + '}t' ):
243- if subelement .text :
244- paratext .append (subelement .text )
245- runtext .append (subelement .text )
246- rundict [element ] = [start , start +
247- len (subelement .text ), '' .join (runtext )]
248- start += len (subelement .text )
247+ for run_element in paragraph_element .iter ('{' + NSPREFIXES ['w' ]
248+ + '}r' ):
249+ run_string = get_text (run_element )
250+ rundict [run_element ] = [start , start + len (run_string ),
251+ run_string ]
252+ start += len (run_string )
249253 match_slices = [match .span () for match in
250- re .finditer (searchre , '' . join ( paratext ))]
254+ re .finditer (searchre , get_text ( paragraph_element ))]
251255 preliminary_runs = collections .OrderedDict ()
252256 runs_to_exclude = set ()
253257 for run , text_info in rundict .items ():
@@ -275,7 +279,7 @@ def replace(self, search, replace, ignore_runs=True):
275279 previous_text .text += text_info [2 ]
276280 previous_text_info [1 ] += len (text_info [2 ])
277281 previous_text_info [2 ] += text_info [2 ]
278- para .remove (run )
282+ paragraph_element .remove (run )
279283 overflow = 0
280284 for index , (run , text_info ) in enumerate (
281285 runs_to_modify .items ()):
@@ -325,7 +329,7 @@ def clean(self):
325329 element .getparent ().remove (element )
326330
327331 def add_style (self , styleId , type , default = None , name = None ):
328- if default in ( 1 , '1' , True ) :
332+ if default :
329333 style = makeelement ('style' , attributes = {'styleId' : styleId ,
330334 'type' : type , 'default' : default })
331335 else :
@@ -428,10 +432,16 @@ def get_document_text(self):
428432 return paratextlist
429433
430434 def merge (self , docpath , page_break = True ):
435+ '''Appends a .docx to the end of this document. docpath can
436+ either be a Docx object or a file path. This method will likely
437+ break if both documents possess the same type of elements that
438+ require id mapping such as lists or comments. Pictures and other
439+ <w: drawing> elements, however, should work 100% of the time.'''
431440 if isinstance (docpath , Docx ):
432441 fromdoc = docpath
433442 else :
434443 fromdoc = Docx (docpath )
444+ # Update relationship Ids
435445 for relationship in fromdoc .relationships :
436446 old_rId = relationship .values ()[0 ]
437447 relationship_type = relationship .values ()[1 ]
@@ -460,8 +470,13 @@ def merge(self, docpath, page_break=True):
460470 head , tail = os .path .split (dirpath )
461471 if tail == 'media' :
462472 for file in filenames :
463- shutil .copyfile (os .path .join (fromdoc .media_dir , file ),
464- os .path .join (self .media_dir , file ))
473+ if os .path .join (self .media_dir , file ) not in tofiles :
474+ shutil .copyfile (os .path .join (fromdoc .media_dir , file ),
475+ os .path .join (self .media_dir , file ))
476+ else : # Account for duplicate picture names
477+ shutil .copyfile (os .path .join (fromdoc .media_dir ,
478+ 'new_' + file ),
479+ os .path .join (self .media_dir , file ))
465480 else :
466481 for file in filenames :
467482 if not os .path .isdir (os .path .join (self .write_dir , relpath )):
@@ -498,6 +513,7 @@ def merge(self, docpath, page_break=True):
498513 self .body .extend (fromdoc .body .iterchildren ())
499514
500515 def save (self , output ):
516+ '''Saves the Docx to the output path provided.'''
501517 docxfile = zipfile .ZipFile (output , mode = 'w' ,
502518 compression = zipfile .ZIP_DEFLATED )
503519 # Move to the template data path
@@ -506,7 +522,8 @@ def save(self, output):
506522 # Write changes made to xml files in write directory between __init__()
507523 # and save()
508524 for xmlfile , relpath in self .xmlfiles .items ():
509- absolutepath = os .path .split (os .path .join (self .write_dir , relpath ))[0 ]
525+ absolutepath = os .path .split (
526+ os .path .join (self .write_dir , relpath ))[0 ]
510527 if not os .path .isdir (absolutepath ):
511528 os .mkdir (absolutepath )
512529 newdoc = io .open (relpath , 'w' )
@@ -529,19 +546,14 @@ def save(self, output):
529546 pass
530547
531548def merge_text (run ):
532- runtext = ''
533- first = True
534- for child in run .iterchildren ('{' + NSPREFIXES ['w' ] + '}t' ):
535- if child .text :
536- runtext == ''
537- runtext += child .text
538- if first :
539- first_text_element = child
540- first = False
541- else :
542- run .remove (child )
543- first_text_element .text = runtext
544-
549+ '''Combines the text of all text elements in a run into a single
550+ text element, removes the other text elements.'''
551+ for index , child in enumerate (
552+ run .iterchildren ('{' + NSPREFIXES ['w' ] + '}t' )):
553+ if index == 0 :
554+ child .text = get_text (run )
555+ else :
556+ run .remove (child )
545557
546558def modify_font (elements , name = 'default' , size = 'default' , underline = 'default' ,
547559color = 'default' , highlight = 'default' , strikethrough = 'default' , bold = 'default' ,
0 commit comments