Skip to content

Commit 352c8d6

Browse files
committed
fix asserts
1 parent 37e833b commit 352c8d6

File tree

1 file changed

+92
-58
lines changed

1 file changed

+92
-58
lines changed

Self_Tracking/play_books_greedy_optimal.py

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@
6565

6666
# ---------------- Utility helpers ----------------
6767
def utility_value(r):
68-
"""Transform rating (1‑5) into cardinal utility."""
68+
"""Transform rating (1‑5) into cardinal utility; make sure min util is <= 0
69+
else benifits to quitting as many books as fast as possible.
70+
"""
6971
return VALUE_BASE ** (r - 1) - 1
7072

7173

@@ -74,22 +76,22 @@ def inverse_utility_value(u):
7476
return np.log(u + 1) / np.log(VALUE_BASE) + 1
7577

7678

77-
def util_if_stop(u, f):
78-
"""Utility if I stop reading now, given that search costs are fixed.
79-
80-
But I haven't standardized on what hourly util means, if it includes search costs or not"""
81-
hourly_u = u / (READ_TIME_HOURS + SEARCH_COST_HOURS)
82-
hourly_u_while_reading = u / READ_TIME_HOURS
83-
return (1 - f) * READ_TIME_HOURS * hourly_u
84-
79+
# def util_if_stop(u, f):
80+
# """Utility if I stop reading now, given that search costs are fixed.
81+
#
82+
# But I haven't standardized on what hourly util means, if it includes search costs or not"""
83+
# hourly_u = u / (READ_TIME_HOURS + SEARCH_COST_HOURS)
84+
# hourly_u_while_reading = u / READ_TIME_HOURS
85+
# return (1 - f) * READ_TIME_HOURS * hourly_u
8586

86-
# Not sure which to go with
87-
def utility_value2(r):
88-
return r**VALUE_BASE
8987

90-
91-
def inverse_utility_value2(u):
92-
return u ** (1 / VALUE_BASE)
88+
# # Not sure which to go with
89+
# def utility_value2(r):
90+
# return r**VALUE_BASE
91+
#
92+
#
93+
# def inverse_utility_value2(u):
94+
# return u ** (1 / VALUE_BASE)
9395

9496

9597
# had 4/207 books I'd say were "average" at 1/3 (or 1/2 way thru?) and 5 at end;
@@ -161,9 +163,9 @@ def optimise_schedule_greedy(
161163
hourly_opportunity=utility_value(2) / (READ_TIME_HOURS + SEARCH_COST_HOURS),
162164
) -> Dict[str, np.ndarray]:
163165
"""
164-
Using greedy approach at each time step find the drop fraction of estimated books that maximize
166+
Using greedy approach at each time step of estimate of book marginal utils
167+
find the drop fraction of estimated books that maximize
165168
the true final utility.
166-
returns what fraction of books remaining to drop at each reading fraction f
167169
Args:
168170
true_ratings: Array of true ratings for each book
169171
hourly_opportunity: utility per hour of the marginal book (includes search costs and quit rate)
@@ -221,6 +223,8 @@ def optimise_schedule_greedy(
221223
finish_u = a_est_full
222224
est_marginal_hourly_u = (finish_u - current_u) / remaining_t
223225

226+
sort_idx = np.argsort(est_marginal_hourly_u) # ascending: worst first
227+
224228
# Old version where hourly opportunity represented average util of replacement book
225229
# not marginal util
226230
# u_continue_est = (
@@ -232,22 +236,14 @@ def optimise_schedule_greedy(
232236
# ) # hourly opportunity includes search costs
233237
# u_stop_est = (cum_value + new_value) / book_time
234238

235-
sort_idx = np.argsort(est_marginal_hourly_u) # ascending: worst first
236239
# print(
237240
# f"Sort idx: {sort_idx} , diff: {diff[sort_idx]} , u_continue_est:"
238241
# f" {a_est_full[sort_idx]} , u_stop_est: {est_now[active_mask][sort_idx]}"
239242
# )
240-
# True marginal Hourly utilities of remaining books
243+
# # True marginal Hourly utilities of remaining books # don't care, only true ending util
241244
active_true_full = val_full[active_mask]
242245
active_true_part = val_partial[active_mask]
243-
244246
current_u = f * active_true_part
245-
finish_u = active_true_full
246-
h_util_keep = (finish_u - current_u) / remaining_t
247-
248-
# util_after_drop = remaining_t * hourly_opportunity
249-
# h_util_drop = (current_u + util_after_drop) / book_time
250-
h_util_drop = hourly_opportunity
251247

252248
# values if no dropping
253249
best_drop = 0.0
@@ -260,17 +256,22 @@ def optimise_schedule_greedy(
260256
best_u = true_avg_utils[idx_f - 1]
261257
best_drop_u = dropped_books_utils[idx_f - 1]
262258

259+
# based on estimate of maringal util, which d maximizes estimated final util?
263260
for d in D_GRID:
264261
k_drop = int(np.floor(d * active_mask.sum())) # number of books to drop
265262

266-
# Get indices of books to keep and drop
267263
drop_set = sort_idx[:k_drop]
268264
keep_mask = np.ones(len(sort_idx), dtype=bool)
269265
keep_mask[drop_set] = False
270266
# include utility from previously dropped books
271-
272-
h_total_u = h_util_keep[keep_mask].sum() + h_util_drop * k_drop
273-
total_u = h_total_u * remaining_t + dropped_books_utils[idx_f - 1]
267+
total_util_from_kept = active_true_full[keep_mask].sum()
268+
total_util_from_dropped = ( # what have so far + replacement with baseline
269+
current_u[~keep_mask].sum() + hourly_opportunity * remaining_t * k_drop
270+
)
271+
total_util_from_dropped += dropped_books_utils[
272+
idx_f - 1
273+
] # accumulate from already dropped before now
274+
total_u = total_util_from_kept + total_util_from_dropped
274275
if total_u > best_u:
275276
best_u = total_u
276277
best_drop = d
@@ -279,7 +280,7 @@ def optimise_schedule_greedy(
279280
else:
280281
best_rating_cut = 5 # dropping more books than have left
281282
# util of now dropped books plus util of replacing them with hourly opportunity
282-
best_drop_u = current_u[~keep_mask].sum() + h_util_drop * remaining_t * k_drop
283+
best_drop_u = total_util_from_dropped
283284

284285
best_spot_drop[idx_f] = best_drop # of books that remain
285286
best_cutoffs[idx_f] = best_rating_cut
@@ -289,29 +290,51 @@ def optimise_schedule_greedy(
289290
# Update active mask: drop the chosen set
290291
k_drop = int(np.floor(best_drop * active_mask.sum()))
291292
if k_drop > 0:
292-
# print(f"Dropping {k_drop} books at f={f:.2f} , d={best_drop:.2f} , best_u={best_u:.2f}")
293293
drop_global_idx = np.where(active_mask)[0][sort_idx[:k_drop]]
294+
keep_global_idx = np.where(active_mask)[0][sort_idx[k_drop:]]
295+
294296
active_mask[drop_global_idx] = False
297+
# print(f"Dropping {k_drop} books at f={f:.2f} , d={best_drop:.2f} , best_u={best_u:.2f}")
298+
# print(val_full[active_mask].mean(), val_full.mean(), val_full[~drop_global_idx].mean())
299+
assert est_now[~active_mask].mean() <= est_now[active_mask].mean(), (
300+
"Total Mask is selecting wrong subset",
301+
est_now[~active_mask].mean(),
302+
est_now[active_mask].mean(),
303+
)
304+
assert est_now[drop_global_idx].mean() <= est_now[keep_global_idx].mean(), (
305+
"Newest changes",
306+
est_now[drop_global_idx].mean(),
307+
est_now[keep_global_idx].mean(),
308+
)
295309

296310
# print(best_cum_drop, best_cutoffs, true_avg_utils, dropped_books_utils, sep="\n")
297311
true_avg_utils /= len(true_ratings)
298312
assert true_avg_utils[-1] >= val_full.mean(), (
299313
"Optimal strategy is worse than just reading all books"
300314
f" {true_avg_utils[-1]} {val_full.mean()}"
301315
)
316+
assert np.allclose(
317+
true_avg_utils, np.maximum.accumulate(true_avg_utils)
318+
), f"getting worse over time, {true_avg_utils}"
319+
320+
final_hourly_u = true_avg_utils[-1] / book_time
321+
# assert final_hourly_u >= hourly_opportunity, (
322+
# "Doing worse than replacement util; is okay if bootstraping samples: could have samples that're all bad",
323+
# final_hourly_u,
324+
# hourly_opportunity,
325+
# )
302326
cum_drop = np.prod(1 - best_spot_drop)
303327
n_keep = math.ceil(len(val_full) * cum_drop)
304328
if n_keep > 0:
305329
u_if_drop_best = np.sort(val_full)[-n_keep:].mean()
306-
print(true_avg_utils)
307-
print(best_spot_drop)
330+
# print(true_avg_utils)
331+
# print(best_spot_drop)
308332
assert true_avg_utils[-1] <= u_if_drop_best + 1e-06, (
309333
"Optimal strategy is better than reading optimal number of books initially"
310-
f" {true_avg_utils[-1]} {u_if_drop_best} {n_keep}/{len(val_full)}"
334+
f" {true_avg_utils[-1]} {u_if_drop_best} {n_keep}/{len(val_full)} baseline:"
335+
f" {hourly_opportunity}; avg_true_utils: {true_avg_utils} \naverage_dropped_u:"
336+
f" {dropped_books_utils/len(true_ratings)}"
311337
)
312-
assert np.allclose(
313-
true_avg_utils, np.maximum.accumulate(true_avg_utils)
314-
), "getting worse over time"
315338
return {"cur_drop": best_spot_drop, "cutoffs": best_cutoffs, "true_avg_utils": true_avg_utils}
316339

317340

@@ -345,17 +368,17 @@ def quit_u_h(df_cat: pd.DataFrame, rating_col: str) -> float:
345368
I get way more utility from quiting many books faster
346369
so the utility is hourly rate of a book with that utility if read whole thing * hours actually read
347370
"""
348-
category_quit_counts = dict(df_cat["Bookshelf"].value_counts())
371+
category_ct = dict(df_cat["Bookshelf"].value_counts())
349372
expected_num_quit = sum(
350-
[(STARTED_TO_FINISHED_RATIO[k] - 1) * v for k, v in category_quit_counts.items()]
373+
[(STARTED_TO_FINISHED_RATIO[k] - 1) * v for k, v in category_ct.items()]
351374
)
352375
if rating_col == "Usefulness /5 to Me":
353376
quit_u_if_read = utility_value(QUIT_USEFULNESS)
354377
else:
355378
quit_u_if_read = utility_value(QUIT_ENJOYMENT)
356-
quit_hourly_rate = quit_u_if_read / (READ_TIME_HOURS + SEARCH_COST_HOURS)
379+
357380
quit_h = expected_num_quit * (QUIT_AT_FRACTION * READ_TIME_HOURS + SEARCH_COST_HOURS)
358-
quit_u = quit_hourly_rate * quit_h
381+
quit_u = QUIT_AT_FRACTION * quit_u_if_read
359382
return quit_u, quit_h
360383

361384

@@ -381,6 +404,11 @@ def current_hourly_u(df_cat: pd.DataFrame, rating_col: str, cur_drop=None) -> fl
381404
finished_h = len(true_ratings_original) * avg_hours_reading(cur_drop)
382405

383406
quit_u, quit_h = quit_u_h(df_cat, rating_col)
407+
assert finished_u / finished_h > quit_u / quit_h, (
408+
"Higher util rate from quit books",
409+
(finished_u, finished_h),
410+
(quit_u, quit_h),
411+
)
384412
hourly_u = (finished_u + quit_u) / (finished_h + quit_h)
385413
return hourly_u
386414

@@ -400,15 +428,15 @@ def simulate_category(df_cat: pd.DataFrame, rating_col: str) -> Dict[str, np.nda
400428

401429
for j in range(N_FOR_BASELINE_U):
402430
# Store individual simulation paths and their true utilities
403-
cur_drop_acc = np.zeros(len(F_GRID))
404-
cutoff_acc = np.zeros(len(F_GRID))
431+
drop_at_f = np.zeros(len(F_GRID))
432+
cutoff_at_f = np.zeros(len(F_GRID))
405433

406434
all_drop_paths = [] # of books remaining, drop fraction at each step
407435
all_cutoffs = []
408436
all_true_utils = []
409437

410438
for i in range(N_SIM):
411-
if False: # j == 0 and i == 0:
439+
if True: # j == 0 and i == 0:
412440
# on first run, use original ratings to match emperical utility from real number of books dropped
413441
# this prevents error correction?
414442
bootstrapped_ratings = true_ratings_original
@@ -419,31 +447,37 @@ def simulate_category(df_cat: pd.DataFrame, rating_col: str) -> Dict[str, np.nda
419447
res = optimise_schedule_greedy(
420448
bootstrapped_ratings, hourly_opportunity=baseline_hourly_u
421449
)
422-
cur_drop_acc += res["cur_drop"]
423-
cutoff_acc += res["cutoffs"]
450+
drop_at_f += res["cur_drop"]
451+
cutoff_at_f += res["cutoffs"]
424452
all_drop_paths.append(res["cur_drop"])
425453
all_cutoffs.append(res["cutoffs"])
426454
all_true_utils.append(res["true_avg_utils"])
427-
cur_drop_acc /= N_SIM
428-
cutoff_acc /= N_SIM
455+
drop_at_f /= N_SIM
456+
cutoff_at_f /= N_SIM
429457

430458
# really we'd set the baseline as the average expected utility of following current strategy
431459
# , adjusting for already quit books, but better to be conservative
432460
new_baseline_u = np.percentile([i[-1] for i in all_true_utils], 20)
433-
hourly_avg_u = (len(true_ratings_original) * new_baseline_u + quit_u) / (
434-
len(true_ratings_original) * (READ_TIME_HOURS + SEARCH_COST_HOURS) + quit_h
461+
finished_u = len(true_ratings_original) * new_baseline_u
462+
finished_h = len(true_ratings_original) * (READ_TIME_HOURS + SEARCH_COST_HOURS)
463+
hourly_avg_u = (finished_u + quit_u) / (finished_h + quit_h)
464+
print(
465+
"end",
466+
baseline_hourly_u,
467+
hourly_avg_u,
468+
# 1 - np.cumprod(1 - drop_at_f), # average at each time path
469+
np.mean(cutoff_at_f),
435470
)
436-
print("end", baseline_hourly_u, hourly_avg_u, np.mean(cur_drop_acc), np.mean(cutoff_acc))
437471
baseline_hourly_u = hourly_avg_u
438472

439-
if baseline_hourly_u >= np.mean(utility_value(true_ratings_original)) / (
473+
if baseline_hourly_u >= np.mean(utility_value(bootstrapped_ratings)) / (
440474
READ_TIME_HOURS + SEARCH_COST_HOURS
441475
):
442-
print("WARNING: hourly_opportunity is greater than the mean utility of completedbooks")
476+
print("WARNING: hourly_opportunity is greater than the mean utility of completed books")
443477

444478
return {
445-
"cur_drop": cur_drop_acc,
446-
"cutoffs": cutoff_acc,
479+
"cur_drop": drop_at_f, # taking mean in wrong order, TODO
480+
"cutoffs": cutoff_at_f,
447481
"cur_drop_path": np.array(all_drop_paths),
448482
"cutoffs_all": np.array(all_cutoffs),
449483
"true_avg_utils": np.array(all_true_utils),
@@ -638,8 +672,8 @@ def print_all_shelves_summary(
638672
print(f"{'Fraction Read':>12} {'Cumulative Drop %':>20}")
639673
print("-" * 50)
640674
for target, idx in zip(target_fractions, milestone_indices):
641-
print(f"{f_grid[idx]:>12.2f} {shelf_data['cumulative_drop'][idx]*100:>20.2f}")
642-
print(f"Final cumulative drop: {shelf_data['cumulative_drop'][-1]*100:.1f}%")
675+
print(f"{f_grid[idx]:>12.2f} {shelf_data['avg_cumulative_drop'][idx]*100:>20.2f}")
676+
print(f"Final cumulative drop: {shelf_data['avg_cumulative_drop'][-1]*100:.1f}%")
643677

644678
median_idx = shelf_data.get(
645679
"median_idx",

0 commit comments

Comments
 (0)