|
12 | 12 |
|
13 | 13 | Tk().withdraw() |
14 | 14 | if len(sys.argv) > 1: |
15 | | - filename = sys.argv[1] |
| 15 | + filename = sys.argv[1] |
16 | 16 | else: |
17 | | - filename = askopenfilename(**{'title':'Select the OS file'}) |
| 17 | + filename = askopenfilename(**{'title':'Select the OS file'}) |
18 | 18 |
|
19 | 19 | if not filename[-4:] == 'docx': |
20 | | - try: |
21 | | - raise Exception() |
22 | | - except Exception as e: |
23 | | - print >> sys.stderr, 'OS file must be of type *.docx' |
24 | | - exit(3) |
| 20 | + try: |
| 21 | + raise Exception() |
| 22 | + except Exception as e: |
| 23 | + print >> sys.stderr, 'OS file must be of type *.docx' |
| 24 | + exit(3) |
25 | 25 | lesson = re.search('[0-9][0-9][0-9]',filename).group() |
26 | 26 | filepath = '/'.join(filename.split('/')[:-1]) + '/' |
27 | 27 |
|
28 | 28 | paths = parseOSfile(filename) |
29 | 29 |
|
30 | 30 | allitems = [] |
31 | 31 | for fn in [f for f in os.listdir(filepath + 'Scripts/') if 'doc' in f]: |
32 | | - allitems += [fn.split('.doc')[0].encode('ascii')] |
| 32 | + allitems += [fn.split('.doc')[0].encode('ascii')] |
33 | 33 |
|
34 | 34 | for path in ['weak + behind','weak + ontime']: |
35 | | - allitems += [lesson + '-' + p for p in paths[path]] |
| 35 | + allitems += [lesson + '-' + p for p in paths[path]] |
36 | 36 |
|
37 | 37 | for branch in paths['branches']: |
38 | | - allitems += [lesson + '-' + i for b in branch for i in b] |
| 38 | + allitems += [lesson + '-' + i for b in branch for i in b] |
39 | 39 | allitems = sorted(list(set(allitems))) |
40 | 40 |
|
41 | 41 | itemstats = {} |
42 | 42 |
|
43 | 43 | itemcoefficients = { |
44 | | - 'submit time': 0.302, |
45 | | - 'WTD count': 29.055, |
46 | | - 'next count': 5.602, |
47 | | - 'dialogue time (total)': 0.887, |
48 | | - 'dialogue time (main branch)': 0.443, |
49 | | - 'dialogue time (NR branch)': -0.344, |
50 | | - 'onscreen text word count': 0.114, |
51 | | - 'long submit time': -0.049, |
52 | | - 'corrects per branch': -3.133, |
53 | | - 'y-intercept': 20.293, |
54 | | - 'branch count': 1 |
| 44 | + 'submit time': 0.302, |
| 45 | + 'WTD count': 29.055, |
| 46 | + 'next count': 5.602, |
| 47 | + 'dialogue time (total)': 0.887, |
| 48 | + 'dialogue time (main branch)': 0.443, |
| 49 | + 'dialogue time (NR branch)': -0.344, |
| 50 | + 'onscreen text word count': 0.114, |
| 51 | + 'long submit time': -0.049, |
| 52 | + 'corrects per branch': -3.133, |
| 53 | + 'y-intercept': 20.293, |
| 54 | + 'branch count': 1 |
55 | 55 | } |
56 | 56 |
|
57 | 57 | lessoncoefficients = { |
58 | | - 'WTD count': 32.970, |
59 | | - 'next count': 3.004, |
60 | | - 'dialogue time (total)': 1.213, |
61 | | - 'onscreen text word count': -0.092, |
62 | | - 'medium count': 6.307, |
63 | | - 'nonstandard submit time': 0.290, |
64 | | - 'long submit time': -0.234, |
65 | | - 'corrects per branch': -72.396, |
66 | | - 'branch count': 0., |
67 | | - 'total corrects': 0., |
68 | | - 'y-intercept': 640.44 |
| 58 | + 'WTD count': 32.970, |
| 59 | + 'next count': 3.004, |
| 60 | + 'dialogue time (total)': 1.213, |
| 61 | + 'onscreen text word count': -0.092, |
| 62 | + 'medium count': 6.307, |
| 63 | + 'nonstandard submit time': 0.290, |
| 64 | + 'long submit time': -0.234, |
| 65 | + 'corrects per branch': -72.396, |
| 66 | + 'branch count': 0., |
| 67 | + 'total corrects': 0., |
| 68 | + 'y-intercept': 640.44 |
69 | 69 | } |
70 | 70 |
|
71 | 71 | def timeFormat(time): |
72 | | - '''Format a time in seconds as mm:ss.''' |
73 | | - minutes = int(time/60) |
74 | | - seconds = int(round(time-minutes*60)) |
75 | | - return str(minutes) + ':' + str(seconds).zfill(2) |
| 72 | + '''Format a time in seconds as mm:ss.''' |
| 73 | + minutes = int(time/60) |
| 74 | + seconds = int(round(time-minutes*60)) |
| 75 | + return str(minutes) + ':' + str(seconds).zfill(2) |
76 | 76 |
|
77 | 77 | def predLength(stats,coefs): |
78 | | - '''Calculate a prediction from a set of coefficients for the given set of variables.''' |
79 | | - prediction = coefs['y-intercept'] |
80 | | - prediction += sum([stats[f]*coefs[f] for f in coefs if f != 'y-intercept']) |
81 | | - return prediction |
| 78 | + '''Calculate a prediction from a set of coefficients for the given set of variables.''' |
| 79 | + prediction = coefs['y-intercept'] |
| 80 | + prediction += sum([stats[f]*coefs[f] for f in coefs if f != 'y-intercept']) |
| 81 | + return prediction |
82 | 82 |
|
83 | 83 | def lessonStats(itemstats): |
84 | | - '''Aggregate lesson item statistics for a given path through the lesson.''' |
85 | | - lessonstats = {} |
86 | | - for i in itemstats: |
87 | | - if 'corrects per branch' in i: |
88 | | - i['total corrects'] = i['corrects per branch']*i['branch count'] |
89 | | - for feat in lessoncoefficients: |
90 | | - lessonstats[feat] = 0 |
91 | | - for i in itemstats: |
92 | | - if feat in i: |
93 | | - lessonstats[feat] += i[feat] |
94 | | - |
95 | | - lessonstats['corrects per branch'] = lessonstats['total corrects']/lessonstats['branch count'] |
96 | | - |
97 | | - return lessonstats |
| 84 | + '''Aggregate lesson item statistics for a given path through the lesson.''' |
| 85 | + lessonstats = {} |
| 86 | + for i in itemstats: |
| 87 | + if 'corrects per branch' in i: |
| 88 | + i['total corrects'] = i['corrects per branch']*i['branch count'] |
| 89 | + for feat in lessoncoefficients: |
| 90 | + lessonstats[feat] = 0 |
| 91 | + for i in itemstats: |
| 92 | + if feat in i: |
| 93 | + lessonstats[feat] += i[feat] |
| 94 | + |
| 95 | + lessonstats['corrects per branch'] = lessonstats['total corrects']/lessonstats['branch count'] |
| 96 | + |
| 97 | + return lessonstats |
98 | 98 |
|
99 | 99 | csvfilename = filepath + lesson + '_timing.csv' |
100 | 100 | warning = False |
101 | 101 | with open(csvfilename,'w') as csvfile: |
102 | | - csvfile.write('item,time\n') |
103 | | - for i in sorted(allitems): |
104 | | - item = '-'.join(i.split('-')[1:]) |
105 | | - itemfile = filepath + 'Scripts/' + i + '.docx' |
106 | | - |
107 | | - if os.path.exists(itemfile.replace('docx','doc')) and not os.path.exists(itemfile): |
108 | | - print >> sys.stderr, 'WARNING: script for item ' + item + ' is in *.doc format, not *.docx; skipping.' |
109 | | - itemstats[item] = {} |
110 | | - csvfile.write(i + ',,(incorrect file format)\n') |
111 | | - elif not os.path.exists(itemfile): |
112 | | - print >> sys.stderr, 'WARNING: Scripts/' + lesson + '-' + item + '.docx not found; skipping.' |
113 | | - itemstats[item] = {} |
114 | | - csvfile.write(i + ',,(file not found)\n') |
115 | | - else: |
116 | | - itemstats[item] = getlessonitemstats(itemfile) |
117 | | - print i.ljust(15) + timeFormat(predLength(itemstats[item],itemcoefficients)).rjust(10) |
118 | | - csvfile.write(i + ',' + timeFormat(predLength(itemstats[item],itemcoefficients)) + '\n') |
119 | | - |
120 | | - csvfile.write('\ndescription,time,path\n') |
121 | | - |
122 | | - branchpath = [] |
123 | | - for branch in paths['branches']: |
124 | | - branchpath += max(branch,key = lambda x: sum([predLength(itemstats[i],itemcoefficients) for i in x])) |
125 | | - # This isn't strictly correct -- proper way would be to try all possible lesson paths for all |
126 | | - # possible branch paths, since the lesson timing model is not the sum over items of the item |
127 | | - # timing model. In practice, though, this should be more than good enough, and it's much simpler |
128 | | - # if there are multiple branch points in paths['branches']. |
129 | | - |
130 | | - for path in ['weak + behind','weak + ontime']: |
131 | | - pathstats = [itemstats[i] for i in (paths[path] + branchpath)] |
132 | | - print path.ljust(15) + timeFormat(predLength(lessonStats(pathstats),lessoncoefficients)).rjust(10) |
133 | | - csvfile.write(path + ',' + timeFormat(predLength(lessonStats(pathstats),lessoncoefficients)) + ',') |
134 | | - csvfile.write('->'.join(sorted(paths[path]+branchpath)) + '\n') |
| 102 | + csvfile.write('item,time\n') |
| 103 | + for i in sorted(allitems): |
| 104 | + item = '-'.join(i.split('-')[1:]) |
| 105 | + itemfile = filepath + 'Scripts/' + i + '.docx' |
| 106 | + |
| 107 | + if os.path.exists(itemfile.replace('docx','doc')) and not os.path.exists(itemfile): |
| 108 | + print >> sys.stderr, 'WARNING: script for item ' + item + ' is in *.doc format, not *.docx; skipping.' |
| 109 | + itemstats[item] = {} |
| 110 | + csvfile.write(i + ',,(incorrect file format)\n') |
| 111 | + elif not os.path.exists(itemfile): |
| 112 | + print >> sys.stderr, 'WARNING: Scripts/' + lesson + '-' + item + '.docx not found; skipping.' |
| 113 | + itemstats[item] = {} |
| 114 | + csvfile.write(i + ',,(file not found)\n') |
| 115 | + else: |
| 116 | + itemstats[item] = getlessonitemstats(itemfile) |
| 117 | + print i.ljust(15) + timeFormat(predLength(itemstats[item],itemcoefficients)).rjust(10) |
| 118 | + csvfile.write(i + ',' + timeFormat(predLength(itemstats[item],itemcoefficients)) + '\n') |
| 119 | + |
| 120 | + csvfile.write('\ndescription,time,path\n') |
| 121 | + |
| 122 | + branchpath = [] |
| 123 | + for branch in paths['branches']: |
| 124 | + branchpath += max(branch,key = lambda x: sum([predLength(itemstats[i],itemcoefficients) for i in x])) |
| 125 | + # This isn't strictly correct -- proper way would be to try all possible lesson paths for all |
| 126 | + # possible branch paths, since the lesson timing model is not the sum over items of the item |
| 127 | + # timing model. In practice, though, this should be more than good enough, and it's much simpler |
| 128 | + # if there are multiple branch points in paths['branches']. |
| 129 | + |
| 130 | + for path in ['weak + behind','weak + ontime']: |
| 131 | + pathstats = [itemstats[i] for i in (paths[path] + branchpath)] |
| 132 | + print path.ljust(15) + timeFormat(predLength(lessonStats(pathstats),lessoncoefficients)).rjust(10) |
| 133 | + csvfile.write(path + ',' + timeFormat(predLength(lessonStats(pathstats),lessoncoefficients)) + ',') |
| 134 | + csvfile.write('->'.join(sorted(paths[path]+branchpath)) + '\n') |
135 | 135 |
|
136 | 136 | Popen(csvfilename, shell=True) |
0 commit comments