11from openpyxl import load_workbook
2+ from openpyxl .utils import get_column_letter , column_index_from_string
3+
24
35
46def pull_up_files (file_list :list ):
@@ -17,52 +19,51 @@ def find_main_data(book, sheet:str):
1719 if cell .value == 'date' :
1820 data_beginning_row = cell .row + 1
1921 if isinstance (page [f'A{ data_beginning_row } ' ].value , float ): # check of row follows next to data_beginng row
20- # print('begin:', data_beginning_row)
22+ ## print('begin:', data_beginning_row)
2123 continue
2224 else :
2325 raise ValueError ("Error: 'date'-row is found but float-type date-cell don't follow further" )
2426 if data_beginning_row is not None and type (cell .value ) == float :
2527 data_range += 1
26- # print('value:', cell.value, cell.row, 'count:', data_range)
28+ ## print('value:', cell.value, cell.row, 'count:', data_range)
2729 elif data_beginning_row is not None and data_range > 1 :
28- # print('end of range sirching wiht result:', data_range)
30+ ## print('end of range sirching wiht result:', data_range)
2931 break
3032 if data_beginning_row is None :
3133 print (f'no main data finded on "{ sheet } "' )
3234 return
33- # print(book, 'len =', data_range, f'range = A{data_beginning_row}:A{data_beginning_row+data_range-1}', sep=',')
35+ ## print(book, 'len =', data_range, f'range = A{data_beginning_row}:A{data_beginning_row+data_range-1}', sep=',')
3436 return data_beginning_row , data_beginning_row + data_range - 1
3537
3638
37- def find_quantity_columns (book , sheet :str ):
38- page = book .get_sheet_by_name (sheet ) #tmp
39- mainData_range = find_main_data (book , sheet )
39+ def find_quantity_columns (book , sheet :str , mainData_range ):
40+ page = book .get_sheet_by_name (sheet )
4041 data_beginning_row = str (mainData_range [0 ])
4142 beginning = None
4243 end = None
4344 for cell in page [data_beginning_row ]:
44- # print('---ITER---:', cell.value)
45- # print('BEGIN:', beginning)
46- # print('END:', end)
45+ ## print('---ITER---:', cell.value)
46+ ## print('BEGIN:', beginning)
47+ ## print('END:', end)
4748 if not isinstance (cell .value , int ):
48- # print(cell.value, 'is not int')
49+ ## print(cell.value, 'is not int')
4950 if beginning is not None :
50- # print(f'for {cell.value} beginning {beginning} is not None')
51+ ## print(f'for {cell.value} beginning {beginning} is not None')
5152 if not isinstance (cell .value , str ):
52- # print(cell.value, 'is not float, beginning is set to None')
53+ ## print(cell.value, 'is not float, beginning is set to None')
5354 beginning = None
5455 continue
5556 elif ',' in cell .value : #TODO: REG digit-digit-comma-digit-digit
56- # print(f'for {cell.value} else, {cell.row} is end')
57- end = cell .column
57+ ## print(f'for {cell.value} else, {cell.row} is end')
58+ end = cell .column + 1 # +1 is for include 'code' column, that follows quite after amount ( ',' cell.value)
5859 break
5960 continue
6061 elif beginning is None :
61- # print(f'elif: {cell.value} is beginning with N {cell.column}')
62+ ## print(f'elif: {cell.value} is beginning with N {cell.column}')
6263 beginning = cell .column
63- if not end - beginning == 4 :
64+ if not end - beginning == 5 :
6465 raise ValueError ("Error during defining range of columns containing quantity values" )
65- return ( beginning , end )
66+ return tuple ( get_column_letter ( i ) for i in range ( beginning , end + 1 ) )
6667
6768
6869def get_column (book , sheet :str , cells ):
@@ -85,10 +86,31 @@ def is_varieties_or_costumers(data_list:list):
8586 return True
8687
8788
88- def pars (book ):
89+ def correct_priece_format (price :str , book , sheet :str ):
90+ if not isinstance (price , int ):
91+ raise ValueError (f'ValueError: while convert to right format value { price } \
92+ from column Prise in book { book } , page { sheet } ' )
93+ price = str (price )
94+ if len (price ) == 3 :
95+ return f'0,{ price } '
96+ elif len (price ) > 3 :
97+ return f'{ price [:- 3 ]} ,{ price [- 3 :]} '
98+ else :
99+ raise ValueError (f'ValueError: while convert to right format value { price } \
100+ from column Prise in book { book } , page { sheet } ' )
101+
102+
103+
104+ def parse (book ):
89105 sheets = book .get_sheet_names ()[1 :] #list of sheets without title-page
90106 varieties = []
91107 costumers = []
108+ numbers = []
109+ pieces = []
110+ totals = []
111+ prices = []
112+ amounts = []
113+ codes = []
92114
93115 for sh in sheets :
94116 mainData_range = find_main_data (book , sh )
@@ -104,14 +126,40 @@ def pars(book):
104126 raise ValueError (f"Error in column 'costumer' in { book } on page '{ sheet } '" )
105127 costumers += costumer
106128
107- return varieties , costumers
129+ quantity_colums = find_quantity_columns (book , sh , mainData_range )
130+
131+ number = get_range_from_column (book , sh , mainData_range , quantity_colums [0 ])
132+ numbers += number
133+
134+ piece = get_range_from_column (book , sh , mainData_range , quantity_colums [1 ])
135+ pieces += piece
136+
137+ total = get_range_from_column (book , sh , mainData_range , quantity_colums [2 ])
138+ totals += total
139+
140+ price = get_range_from_column (book , sh , mainData_range , quantity_colums [3 ])
141+ price = [correct_priece_format (i , book , sh ) for i in price ]
142+ prices += price
143+
144+ amount = get_range_from_column (book , sh , mainData_range , quantity_colums [4 ])
145+ amounts += amount
146+
147+ code = get_range_from_column (book , sh , mainData_range , quantity_colums [5 ])
148+ codes += code
149+
150+
151+ return varieties , costumers , numbers , pieces , totals , prices , amounts , codes
108152
109153
110154if __name__ == '__main__' :
111- file = '/home/emil/Загрузки/out/pdfFile4 .xlsx'
155+ file = '/home/emil/Загрузки/out/pdfFile .xlsx'
112156 wb = load_workbook (file )
113- sheet = 'Page 2'
157+ sheet = wb .get_sheet_by_name ('Page 3' )
158+
114159
115- print (find_quantity_columns (wb , sheet ))
160+
161+
162+
163+
116164
117165
0 commit comments