Skip to content

Commit cd2120f

Browse files
author
Emil Kadermetov
committed
function 'find_quantities_singleUse' and other little functions added
1 parent 6a5570f commit cd2120f

File tree

1 file changed

+80
-40
lines changed

1 file changed

+80
-40
lines changed

parser.py

Lines changed: 80 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ def pull_up_files(file_list:list):
77
return [load_workbook(f) for f in file_list]
88

99

10-
def find_additional_data(book, sheet:str):
11-
pass
12-
13-
1410
def find_main_data(book, sheet:str):
1511
page = book.get_sheet_by_name(sheet)
1612
data_beginning_row = None
@@ -19,20 +15,16 @@ def find_main_data(book, sheet:str):
1915
if cell.value == 'date':
2016
data_beginning_row = cell.row + 1
2117
if isinstance(page[f'A{data_beginning_row}'].value, float): # check of row follows next to data_beginng row
22-
## print('begin:', data_beginning_row)
2318
continue
2419
else:
2520
raise ValueError("Error: 'date'-row is found but float-type date-cell don't follow further")
2621
if data_beginning_row is not None and type(cell.value) == float:
2722
data_range += 1
28-
## print('value:', cell.value, cell.row, 'count:', data_range)
2923
elif data_beginning_row is not None and data_range > 1:
30-
## print('end of range sirching wiht result:', data_range)
3124
break
3225
if data_beginning_row is None:
3326
print(f'no main data finded on "{sheet}"')
3427
return
35-
## print(book, 'len =', data_range, f'range = A{data_beginning_row}:A{data_beginning_row+data_range-1}', sep=',')
3628
return data_beginning_row, data_beginning_row + data_range - 1
3729

3830

@@ -77,16 +69,13 @@ def find_singleUse_data(book, sheet:str):
7769
data_range = 0
7870
c = 0
7971
for cell in page['A']:
80-
# c+=1
81-
# print(f'{c})' ,cell.value)
8272
if cell.value == 'Single use packaging':
8373
if not page[f'A{cell.row+1}'].value == 'Date':
8474
raise ValueError('error during searching singleUse_range: \
8575
"Date" row don`t follow after "Single use packaging" row')
8676
data_beginning_row = cell.row + 2
8777
continue
8878
if data_beginning_row is not None:
89-
# print(f'{c})' ,cell.value, '2if')
9079
if is_longFormat_date(cell.value):
9180
data_range += 1
9281
continue
@@ -95,10 +84,37 @@ def find_singleUse_data(book, sheet:str):
9584
if data_beginning_row is None:
9685
print(f'no "Single use packaging" data finded on "{sheet}"')
9786
return
98-
print('OUT:', (data_beginning_row, data_beginning_row + data_range - 1))
99-
data_beginning_row, data_beginning_row + data_range - 1
87+
return data_beginning_row, data_beginning_row + data_range - 1
88+
89+
90+
def find_quantities_singleUse(book, sheet:str, singleUse_range):
91+
page = book.get_sheet_by_name(sheet)
92+
data_beginning_row = singleUse_range[0]
93+
headline_row = str(data_beginning_row - 1)
94+
quantity_columns = [cell.column for cell in page[headline_row]
95+
if cell.value in ('Number', 'Rate')]
96+
return tuple(get_column_letter(i) for i in quantity_columns)
97+
10098

99+
def is_rows_merged(rowData:list):
100+
cell = rowData[0]
101+
if '\n' in cell:
102+
return True
103+
return False
101104

105+
106+
def check_codes(codes:list, book, sheet):
107+
for string in codes:
108+
string = str(string)
109+
if not len(string) == 3:
110+
raise ValueError(f'code value {sheet} in {book} on {sheet} does not look like code')
111+
for i in string:
112+
if not i.isdigit():
113+
raise ValueError(f'code value {sheet} in {book} on {sheet} does not look like code')
114+
return True
115+
116+
117+
102118
def get_column(book, sheet:str, cells):
103119
page = book.get_sheet_by_name(sheet)
104120
return [i.value for i in page[cells]]
@@ -122,67 +138,91 @@ def is_varieties_or_costumers(data_list:list):
122138
def correct_priece_format(price:str, book, sheet:str):
123139
if not isinstance(price, int):
124140
raise ValueError(f'ValueError: while convert to right format value {price}\
125-
from column Prise in book {book}, page {sheet}')
141+
from column Prise in book {book}, page {sheet}')
126142
price = str(price)
127143
if len(price) == 3:
128144
return f'0,{price}'
129145
elif len(price) > 3:
130146
return f'{price[:-3]},{price[-3:]}'
131147
else:
132148
raise ValueError(f'ValueError: while convert to right format value {price}\
133-
from column Prise in book {book}, page {sheet}')
149+
from column Prise in book {book}, page {sheet}')
134150

135151

136152
def parse(book):
137153
sheets = book.get_sheet_names()[1:] #list of sheets without title-page
138154
varieties = []; costumers = []; numbers = []; pieces = []
139155
totals = [] ; prices = [] ;amounts = [] ; codes = []
156+
157+
codes_singleUse = []
140158

141159
for sh in sheets:
142160
mainData_range = find_main_data(book, sh)
143161
if mainData_range is not None:
144162

145-
variety = get_range_from_column(book, sh, mainData_range, 'C')
146-
if not is_varieties_or_costumers(variety):
147-
raise ValueError(f"Error in column 'variety' in {book} on page '{sheet}'")
148-
varieties += variety
163+
variety = get_range_from_column(book, sh, mainData_range, 'C')
164+
if not is_varieties_or_costumers(variety):
165+
raise ValueError(f"Error in column 'variety' in {book} on page '{sheet}'")
166+
varieties += variety
149167

150-
costumer = get_range_from_column(book, sh, mainData_range, 'F')
151-
if not is_varieties_or_costumers(costumer):
152-
raise ValueError(f"Error in column 'costumer' in {book} on page '{sheet}'")
168+
costumer = get_range_from_column(book, sh, mainData_range, 'F')
169+
if not is_varieties_or_costumers(costumer):
170+
raise ValueError(f"Error in column 'costumer' in {book} on page '{sheet}'")
153171
costumers += costumer
154172

155-
quantity_colums = find_quantity_columns(book, sh, mainData_range)
173+
quantity_colums = find_quantity_columns(book, sh, mainData_range)
156174

157-
number = get_range_from_column(book, sh, mainData_range, quantity_colums[0])
158-
numbers += number
175+
number = get_range_from_column(book, sh, mainData_range, quantity_colums[0])
176+
numbers += number
159177

160-
piece = get_range_from_column(book, sh, mainData_range, quantity_colums[1])
161-
pieces += piece
178+
piece = get_range_from_column(book, sh, mainData_range, quantity_colums[1])
179+
pieces += piece
162180

163-
total = get_range_from_column(book, sh, mainData_range, quantity_colums[2])
164-
totals += total
181+
total = get_range_from_column(book, sh, mainData_range, quantity_colums[2])
182+
totals += total
165183

166-
price = get_range_from_column(book, sh, mainData_range, quantity_colums[3])
167-
price = [correct_priece_format(i, book, sh) for i in price]
168-
prices += price
184+
price = get_range_from_column(book, sh, mainData_range, quantity_colums[3])
185+
price = [correct_priece_format(i, book, sh) for i in price]
186+
prices += price
169187

170-
amount = get_range_from_column(book, sh, mainData_range, quantity_colums[4])
171-
amounts += amount
188+
amount = get_range_from_column(book, sh, mainData_range, quantity_colums[4])
189+
amounts += amount
172190

173-
code = get_range_from_column(book, sh, mainData_range, quantity_colums[5])
174-
codes += code
191+
code = get_range_from_column(book, sh, mainData_range, quantity_colums[5])
192+
codes += code
175193

194+
singleUse_range = find_singleUse_data(book, sh)
195+
if singleUse_range is not None:
176196

177-
return varieties, costumers, numbers, pieces, totals, prices, amounts, codes
197+
code_singleUse = get_range_from_column(book, sh, singleUse_range, 'B')
198+
if len(code_singleUse) == 1 and is_rows_merged(code_singleUse):
199+
code_singleUse = [i for i in code_singleUse[0].split('\n')]
200+
check_codes(code_singleUse, book, sh)
201+
codes_singleUse += code_singleUse
202+
203+
204+
205+
206+
207+
return varieties, costumers, numbers, pieces, totals, prices, amounts, codes, codes_singleUse
178208

179209

180210
if __name__ == '__main__':
181-
file = '/home/emil/Загрузки/out/pdfFile53.xlsx'
211+
file = '/home/emil/Загрузки/out/pdfFile.xlsx'
182212
wb = load_workbook(file)
183-
sheet = wb.get_sheet_by_name('Page 2')
213+
sheet = wb.get_sheet_by_name('Page 4')
214+
215+
sr = find_singleUse_data(wb, 'Page 4')
184216

185-
find_singleUse_data(wb, 'Page 2')
217+
qc = find_quantities_singleUse(wb, 'Page 4', sr)
218+
print(qc)
219+
220+
# b = get_range_from_column(wb, 'Page 3', col_range=a, column_name='A')
221+
# print(b)
222+
223+
224+
225+
186226

187227

188228

0 commit comments

Comments
 (0)