-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparseOSfile.py
More file actions
66 lines (60 loc) · 3.46 KB
/
parseOSfile.py
File metadata and controls
66 lines (60 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from docx import *
import re
def parseOSfile(osfn):
'''Parse the given OS file, extracting the paths of interest.'''
osfile = Document(osfn)
paths = {'weak + behind': [],
'weak + ontime': [],
'branches': []} # 'branches' is different from the others: it hold
# all options for 'unspecified' branches.
truepaths = ['weak + behind','weak + ontime']
for par in osfile.paragraphs:
# First, go through the paragraphs and pull out any numbers starting lines.
if re.match(' ?[0-9][0-9][0-9]?\.',par.text):
itemno = par.text.split('.')[0].replace(' ','').zfill(3)
if 'skip if behind' in par.text.lower():
paths['weak + ontime'].append(itemno.encode('ascii'))
else:
for path in truepaths:
paths[path].append(itemno.encode('ascii'))
osfile = Document(osfn)
for tab in osfile.tables:
# Next, go through the tables.
branchpaths = [] # Keep track of "undefined" branches - i.e. those not defined by strength and speed
if len(tab.columns[0].cells) > 1:
defaultitemno = tab.columns[0].cells[1].paragraphs[0].text.split('.')[0].replace(' ','').zfill(3)
# defaultitemno is the fallback if the OS describes the branch as 'same as...'.
for col in tab.columns:
try:
branchpaths.append([])
colheader = col.cells[0].paragraphs[0].text
for par in col.cells[1].paragraphs:
if re.match(' ?[0-9][0-9][0-9]?\.',par.text):
itemno = par.text.split('.')[0].replace(' ','').zfill(3)
#elif re.match('^same',par.text.lower()): # Removing this because it's not used in the weak
# itemno = defaultitemno # but potentially causes problems.
else: itemno = ''
if not itemno == '':
if 'weak' in colheader.lower():
if not ('skip if behind' in colheader.lower() or 'not behind' in colheader.lower()):
paths['weak + behind'].append(itemno.encode('ascii'))
if not colheader.lower() == 'behind':
paths['weak + ontime'].append(itemno.encode('ascii'))
elif not ('average' in colheader.lower() or 'strong' in colheader.lower()):
if 'behind' in colheader.lower():
if not ('skip if behind' in colheader.lower() or 'not behind' in colheader.lower()):
paths['weak + behind'].append(itemno.encode('ascii'))
if not colheader.lower() == 'behind':
paths['weak + ontime'].append(itemno.encode('ascii'))
else:
branchpaths[-1].append(itemno.encode('ascii'))
except IndexError:
pass
except Exception as e:
print e
pass
if sum([len(p) for p in branchpaths]) > 0:
paths['branches'].append(branchpaths)
for path in truepaths:
paths[path] = sorted(list(set(paths[path]))) # Remove duplicates and sort
return paths