Skip to content

Commit f95caf4

Browse files
author
Emil Kadermetov
committed
'parser' function now able to retrieve all desired columns
1 parent 4981c6d commit f95caf4

File tree

1 file changed

+71
-23
lines changed

1 file changed

+71
-23
lines changed

parser.py

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from openpyxl import load_workbook
2+
from openpyxl.utils import get_column_letter, column_index_from_string
3+
24

35

46
def pull_up_files(file_list:list):
@@ -17,52 +19,51 @@ def find_main_data(book, sheet:str):
1719
if cell.value == 'date':
1820
data_beginning_row = cell.row + 1
1921
if isinstance(page[f'A{data_beginning_row}'].value, float): # check of row follows next to data_beginng row
20-
# print('begin:', data_beginning_row)
22+
## print('begin:', data_beginning_row)
2123
continue
2224
else:
2325
raise ValueError("Error: 'date'-row is found but float-type date-cell don't follow further")
2426
if data_beginning_row is not None and type(cell.value) == float:
2527
data_range += 1
26-
# print('value:', cell.value, cell.row, 'count:', data_range)
28+
## print('value:', cell.value, cell.row, 'count:', data_range)
2729
elif data_beginning_row is not None and data_range > 1:
28-
# print('end of range sirching wiht result:', data_range)
30+
## print('end of range sirching wiht result:', data_range)
2931
break
3032
if data_beginning_row is None:
3133
print(f'no main data finded on "{sheet}"')
3234
return
33-
# print(book, 'len =', data_range, f'range = A{data_beginning_row}:A{data_beginning_row+data_range-1}', sep=',')
35+
## print(book, 'len =', data_range, f'range = A{data_beginning_row}:A{data_beginning_row+data_range-1}', sep=',')
3436
return data_beginning_row, data_beginning_row + data_range - 1
3537

3638

37-
def find_quantity_columns(book, sheet:str):
38-
page = book.get_sheet_by_name(sheet) #tmp
39-
mainData_range = find_main_data(book, sheet)
39+
def find_quantity_columns(book, sheet:str, mainData_range):
40+
page = book.get_sheet_by_name(sheet)
4041
data_beginning_row = str(mainData_range[0])
4142
beginning = None
4243
end = None
4344
for cell in page[data_beginning_row]:
44-
# print('---ITER---:', cell.value)
45-
# print('BEGIN:', beginning)
46-
# print('END:', end)
45+
## print('---ITER---:', cell.value)
46+
## print('BEGIN:', beginning)
47+
## print('END:', end)
4748
if not isinstance(cell.value, int):
48-
# print(cell.value, 'is not int')
49+
## print(cell.value, 'is not int')
4950
if beginning is not None:
50-
# print(f'for {cell.value} beginning {beginning} is not None')
51+
## print(f'for {cell.value} beginning {beginning} is not None')
5152
if not isinstance(cell.value, str):
52-
# print(cell.value, 'is not float, beginning is set to None')
53+
## print(cell.value, 'is not float, beginning is set to None')
5354
beginning = None
5455
continue
5556
elif ',' in cell.value: #TODO: REG digit-digit-comma-digit-digit
56-
# print(f'for {cell.value} else, {cell.row} is end')
57-
end = cell.column
57+
## print(f'for {cell.value} else, {cell.row} is end')
58+
end = cell.column + 1 # +1 is for include 'code' column, that follows quite after amount ( ',' cell.value)
5859
break
5960
continue
6061
elif beginning is None:
61-
# print(f'elif: {cell.value} is beginning with N {cell.column}')
62+
## print(f'elif: {cell.value} is beginning with N {cell.column}')
6263
beginning = cell.column
63-
if not end - beginning == 4:
64+
if not end - beginning == 5:
6465
raise ValueError("Error during defining range of columns containing quantity values")
65-
return (beginning, end)
66+
return tuple(get_column_letter(i) for i in range(beginning, end+1))
6667

6768

6869
def get_column(book, sheet:str, cells):
@@ -85,10 +86,31 @@ def is_varieties_or_costumers(data_list:list):
8586
return True
8687

8788

88-
def pars(book):
89+
def correct_priece_format(price:str, book, sheet:str):
90+
if not isinstance(price, int):
91+
raise ValueError(f'ValueError: while convert to right format value {price}\
92+
from column Prise in book {book}, page {sheet}')
93+
price = str(price)
94+
if len(price) == 3:
95+
return f'0,{price}'
96+
elif len(price) > 3:
97+
return f'{price[:-3]},{price[-3:]}'
98+
else:
99+
raise ValueError(f'ValueError: while convert to right format value {price}\
100+
from column Prise in book {book}, page {sheet}')
101+
102+
103+
104+
def parse(book):
89105
sheets = book.get_sheet_names()[1:] #list of sheets without title-page
90106
varieties = []
91107
costumers = []
108+
numbers = []
109+
pieces = []
110+
totals = []
111+
prices = []
112+
amounts = []
113+
codes = []
92114

93115
for sh in sheets:
94116
mainData_range = find_main_data(book, sh)
@@ -104,14 +126,40 @@ def pars(book):
104126
raise ValueError(f"Error in column 'costumer' in {book} on page '{sheet}'")
105127
costumers += costumer
106128

107-
return varieties, costumers
129+
quantity_colums = find_quantity_columns(book, sh, mainData_range)
130+
131+
number = get_range_from_column(book, sh, mainData_range, quantity_colums[0])
132+
numbers += number
133+
134+
piece = get_range_from_column(book, sh, mainData_range, quantity_colums[1])
135+
pieces += piece
136+
137+
total = get_range_from_column(book, sh, mainData_range, quantity_colums[2])
138+
totals += total
139+
140+
price = get_range_from_column(book, sh, mainData_range, quantity_colums[3])
141+
price = [correct_priece_format(i, book, sh) for i in price]
142+
prices += price
143+
144+
amount = get_range_from_column(book, sh, mainData_range, quantity_colums[4])
145+
amounts += amount
146+
147+
code = get_range_from_column(book, sh, mainData_range, quantity_colums[5])
148+
codes += code
149+
150+
151+
return varieties, costumers, numbers, pieces, totals, prices, amounts, codes
108152

109153

110154
if __name__ == '__main__':
111-
file = '/home/emil/Загрузки/out/pdfFile4.xlsx'
155+
file = '/home/emil/Загрузки/out/pdfFile.xlsx'
112156
wb = load_workbook(file)
113-
sheet = 'Page 2'
157+
sheet = wb.get_sheet_by_name('Page 3')
158+
114159

115-
print(find_quantity_columns(wb, sheet))
160+
161+
162+
163+
116164

117165

0 commit comments

Comments
 (0)